Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
cf184841
Commit
cf184841
authored
Feb 24, 2022
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[conduit] implement conduit for Hal, Pubmed
parent
b16a5b54
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
104 additions
and
67 deletions
+104
-67
Contact.hs
src/Gargantext/API/Node/Contact.hs
+2
-1
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+39
-26
DocumentsFromWriteNodes.hs
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
+1
-1
API.hs
src/Gargantext/Core/Text/Corpus/API.hs
+10
-8
Hal.hs
src/Gargantext/Core/Text/Corpus/API/Hal.hs
+7
-5
Pubmed.hs
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
+9
-3
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+28
-23
Node.hs
src/Gargantext/Database/Query/Table/Node.hs
+8
-0
No files found.
src/Gargantext/API/Node/Contact.hs
View file @
cf184841
...
@@ -22,6 +22,7 @@ Portability : POSIX
...
@@ -22,6 +22,7 @@ Portability : POSIX
module
Gargantext.API.Node.Contact
module
Gargantext.API.Node.Contact
where
where
import
Conduit
import
Data.Aeson
import
Data.Aeson
import
Data.Either
(
Either
(
Right
))
import
Data.Either
(
Either
(
Right
))
import
Data.Maybe
(
Maybe
(
..
))
import
Data.Maybe
(
Maybe
(
..
))
...
@@ -93,7 +94,7 @@ addContact u nId (AddContactParams fn ln) logStatus = do
...
@@ -93,7 +94,7 @@ addContact u nId (AddContactParams fn ln) logStatus = do
,
_scst_remaining
=
Just
1
,
_scst_remaining
=
Just
1
,
_scst_events
=
Just
[]
,
_scst_events
=
Just
[]
}
}
_
<-
flow
(
Nothing
::
Maybe
HyperdataAnnuaire
)
u
(
Right
[
nId
])
(
Multi
EN
)
Nothing
[[
hyperdataContact
fn
ln
]]
logStatus
_
<-
flow
(
Nothing
::
Maybe
HyperdataAnnuaire
)
u
(
Right
[
nId
])
(
Multi
EN
)
Nothing
(
Just
1
,
yield
$
hyperdataContact
fn
ln
)
logStatus
pure
JobLog
{
_scst_succeeded
=
Just
2
pure
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
,
_scst_failed
=
Just
0
...
...
src/Gargantext/API/Node/Corpus/New.hs
View file @
cf184841
...
@@ -19,6 +19,7 @@ module Gargantext.API.Node.Corpus.New
...
@@ -19,6 +19,7 @@ module Gargantext.API.Node.Corpus.New
where
where
import
Conduit
import
Control.Lens
hiding
(
elements
,
Empty
)
import
Control.Lens
hiding
(
elements
,
Empty
)
import
Data.Aeson
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Aeson.TH
(
deriveJSON
)
...
@@ -40,7 +41,7 @@ import Gargantext.Prelude
...
@@ -40,7 +41,7 @@ import Gargantext.Prelude
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
,
ScraperEvent
(
..
),
scst_events
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
,
ScraperEvent
(
..
),
scst_events
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Job
(
jobLogSuccess
,
jobLogFailTotal
,
jobLogFailTotalWithMessage
)
import
Gargantext.API.Job
(
addEvent
,
jobLogSuccess
,
jobLogFailTotal
,
jobLogFailTotalWithMessage
)
import
Gargantext.API.Node.Corpus.New.File
import
Gargantext.API.Node.Corpus.New.File
import
Gargantext.API.Node.Corpus.Searx
import
Gargantext.API.Node.Corpus.Searx
import
Gargantext.API.Node.Corpus.Types
import
Gargantext.API.Node.Corpus.Types
...
@@ -214,8 +215,11 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
...
@@ -214,8 +215,11 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
-- TODO if cid is folder -> create Corpus
-- TODO if cid is folder -> create Corpus
-- if cid is corpus -> add to corpus
-- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private
-- if cid is root -> create corpus in Private
txts
<-
mapM
(
\
db
->
getDataText
db
(
Multi
l
)
q
maybeLimit
)
[
database2origin
dbs
]
eTxts
<-
mapM
(
\
db
->
getDataText
db
(
Multi
l
)
q
maybeLimit
)
[
database2origin
dbs
]
let
lTxts
=
lefts
eTxts
case
lTxts
of
[]
->
do
let
txts
=
rights
eTxts
-- TODO Sum lenghts of each txt elements
-- TODO Sum lenghts of each txt elements
logStatus
JobLog
{
_scst_succeeded
=
Just
2
logStatus
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
,
_scst_failed
=
Just
0
...
@@ -234,6 +238,14 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
...
@@ -234,6 +238,14 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
,
_scst_events
=
Just
[]
,
_scst_events
=
Just
[]
}
}
(
err
:
_
)
->
do
pure
$
addEvent
"ERROR"
(
T
.
pack
$
show
err
)
$
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
1
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
}
type
AddWithForm
=
Summary
"Add with FormUrlEncoded to corpus endpoint"
type
AddWithForm
=
Summary
"Add with FormUrlEncoded to corpus endpoint"
:>
"corpus"
:>
"corpus"
...
@@ -270,15 +282,16 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
...
@@ -270,15 +282,16 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
_
->
cs
d
_
->
cs
d
eDocs
<-
liftBase
$
parse
data
'
eDocs
<-
liftBase
$
parse
data
'
case
eDocs
of
case
eDocs
of
Right
docs
'
->
do
Right
docs
->
do
-- TODO Add progress (jobStatus) update for docs - this is a
-- TODO Add progress (jobStatus) update for docs - this is a
-- long action
-- long action
limit'
<-
view
$
hasConfig
.
gc_max_docs_parsers
limit'
<-
view
$
hasConfig
.
gc_max_docs_parsers
let
limit
=
fromIntegral
limit'
let
limit
=
fromIntegral
limit'
if
length
docs
'
>
limit
then
do
if
length
docs
>
limit
then
do
printDebug
"[addToCorpusWithForm] number of docs exceeds the limit"
(
show
$
length
docs
'
)
printDebug
"[addToCorpusWithForm] number of docs exceeds the limit"
(
show
$
length
docs
)
let
panicMsg'
=
[
"[addToCorpusWithForm] number of docs ("
let
panicMsg'
=
[
"[addToCorpusWithForm] number of docs ("
,
show
$
length
docs
'
,
show
$
length
docs
,
") exceeds the MAX_DOCS_PARSERS limit ("
,
") exceeds the MAX_DOCS_PARSERS limit ("
,
show
limit
,
show
limit
,
")"
]
,
")"
]
...
@@ -287,7 +300,6 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
...
@@ -287,7 +300,6 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
panic
panicMsg
panic
panicMsg
else
else
pure
()
pure
()
let
docs
=
splitEvery
500
$
take
limit
docs'
printDebug
"Parsing corpus finished : "
cid
printDebug
"Parsing corpus finished : "
cid
logStatus
jobLog2
logStatus
jobLog2
...
@@ -298,7 +310,8 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
...
@@ -298,7 +310,8 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
(
Right
[
cid
])
(
Right
[
cid
])
(
Multi
$
fromMaybe
EN
l
)
(
Multi
$
fromMaybe
EN
l
)
Nothing
Nothing
(
map
(
map
toHyperdataDocument
)
docs
)
(
Just
$
fromIntegral
$
length
docs
,
yieldMany
docs
.|
mapC
toHyperdataDocument
)
--(map (map toHyperdataDocument) docs)
logStatus
logStatus
printDebug
"Extraction finished : "
cid
printDebug
"Extraction finished : "
cid
...
...
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
View file @
cf184841
...
@@ -101,7 +101,7 @@ documentsFromWriteNodes uId nId _p logStatus = do
...
@@ -101,7 +101,7 @@ documentsFromWriteNodes uId nId _p logStatus = do
let
parsedE
=
(
\
(
node
,
contents
)
->
hyperdataDocumentFromFrameWrite
(
node
^.
node_hyperdata
,
contents
))
<$>
frameWritesWithContents
let
parsedE
=
(
\
(
node
,
contents
)
->
hyperdataDocumentFromFrameWrite
(
node
^.
node_hyperdata
,
contents
))
<$>
frameWritesWithContents
let
parsed
=
rights
parsedE
let
parsed
=
rights
parsedE
_
<-
flowDataText
(
RootId
(
NodeId
uId
))
(
DataNew
$
yieldMany
parsed
)
(
Multi
EN
)
cId
Nothing
logStatus
_
<-
flowDataText
(
RootId
(
NodeId
uId
))
(
DataNew
(
Just
$
fromIntegral
$
length
parsed
,
yieldMany
parsed
)
)
(
Multi
EN
)
cId
Nothing
logStatus
pure
$
jobLogSuccess
jobLog
pure
$
jobLogSuccess
jobLog
------------------------------------------------------------------------
------------------------------------------------------------------------
...
...
src/Gargantext/Core/Text/Corpus/API.hs
View file @
cf184841
...
@@ -19,6 +19,7 @@ module Gargantext.Core.Text.Corpus.API
...
@@ -19,6 +19,7 @@ module Gargantext.Core.Text.Corpus.API
where
where
import
Conduit
import
Conduit
import
Data.Either
(
Either
(
..
))
import
Data.Maybe
import
Data.Maybe
import
Gargantext.API.Admin.Orchestrator.Types
(
ExternalAPIs
(
..
),
externalAPIs
)
import
Gargantext.API.Admin.Orchestrator.Types
(
ExternalAPIs
(
..
),
externalAPIs
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
...
@@ -28,6 +29,7 @@ import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
...
@@ -28,6 +29,7 @@ import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
import
qualified
Gargantext.Core.Text.Corpus.API.Isidore
as
ISIDORE
import
qualified
Gargantext.Core.Text.Corpus.API.Isidore
as
ISIDORE
import
qualified
Gargantext.Core.Text.Corpus.API.Istex
as
ISTEX
import
qualified
Gargantext.Core.Text.Corpus.API.Istex
as
ISTEX
import
qualified
Gargantext.Core.Text.Corpus.API.Pubmed
as
PUBMED
import
qualified
Gargantext.Core.Text.Corpus.API.Pubmed
as
PUBMED
import
Servant.Client
(
ClientError
)
-- | TODO put in gargantext.init
-- | TODO put in gargantext.init
default_limit
::
Maybe
Integer
default_limit
::
Maybe
Integer
...
@@ -39,17 +41,17 @@ get :: ExternalAPIs
...
@@ -39,17 +41,17 @@ get :: ExternalAPIs
->
Query
->
Query
->
Maybe
Limit
->
Maybe
Limit
-- -> IO [HyperdataDocument]
-- -> IO [HyperdataDocument]
->
IO
(
ConduitT
()
HyperdataDocument
IO
(
)
)
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
get
PubMed
_la
q
_l
=
do
get
PubMed
_la
q
_l
=
PUBMED
.
get
q
Nothing
re
s
<-
PUBMED
.
get
q
default_limit
-- EN only by default
--doc
s <- PUBMED.get q default_limit -- EN only by default
pure
$
yieldMany
res
--pure (Just $ fromIntegral $ length docs, yieldMany docs)
get
HAL
la
q
_l
=
HAL
.
getC
la
q
Nothing
get
HAL
la
q
_l
=
HAL
.
getC
la
q
Nothing
get
IsTex
la
q
_l
=
do
get
IsTex
la
q
_l
=
do
re
s
<-
ISTEX
.
get
la
q
default_limit
doc
s
<-
ISTEX
.
get
la
q
default_limit
pure
$
yieldMany
res
pure
$
Right
(
Just
$
fromIntegral
$
length
docs
,
yieldMany
docs
)
get
Isidore
la
q
_l
=
do
get
Isidore
la
q
_l
=
do
re
s
<-
ISIDORE
.
get
la
(
fromIntegral
<$>
default_limit
)
(
Just
q
)
Nothing
doc
s
<-
ISIDORE
.
get
la
(
fromIntegral
<$>
default_limit
)
(
Just
q
)
Nothing
pure
$
yieldMany
res
pure
$
Right
(
Just
$
fromIntegral
$
length
docs
,
yieldMany
docs
)
get
_
_
_
_
=
undefined
get
_
_
_
_
=
undefined
-- | Some Sugar for the documentation
-- | Some Sugar for the documentation
...
...
src/Gargantext/Core/Text/Corpus/API/Hal.hs
View file @
cf184841
...
@@ -16,6 +16,7 @@ import Conduit
...
@@ -16,6 +16,7 @@ import Conduit
import
Data.Either
import
Data.Either
import
Data.Maybe
import
Data.Maybe
import
Data.Text
(
Text
,
pack
,
intercalate
)
import
Data.Text
(
Text
,
pack
,
intercalate
)
import
Servant.Client
(
ClientError
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
...
@@ -30,12 +31,13 @@ get la q ml = do
...
@@ -30,12 +31,13 @@ get la q ml = do
eDocs
<-
HAL
.
getMetadataWith
q
(
Just
0
)
ml
eDocs
<-
HAL
.
getMetadataWith
q
(
Just
0
)
ml
either
(
panic
.
pack
.
show
)
(
\
d
->
mapM
(
toDoc'
la
)
$
HAL
.
_docs
d
)
eDocs
either
(
panic
.
pack
.
show
)
(
\
d
->
mapM
(
toDoc'
la
)
$
HAL
.
_docs
d
)
eDocs
getC
::
Lang
->
Text
->
Maybe
Integer
->
IO
(
ConduitT
()
HyperdataDocument
IO
(
)
)
getC
::
Lang
->
Text
->
Maybe
Integer
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
getC
la
q
ml
=
do
getC
la
q
ml
=
do
eDocs
<-
HAL
.
getMetadataRecursively
q
(
Just
0
)
ml
eRes
<-
HAL
.
getMetadataWithC
q
(
Just
0
)
ml
case
eDocs
of
pure
$
(
\
(
len
,
docsC
)
->
(
len
,
docsC
.|
mapMC
(
toDoc'
la
)))
<$>
eRes
Left
err
->
panic
$
pack
$
show
err
-- case eRes of
Right
docsC
->
pure
$
docsC
.|
mapMC
(
toDoc'
la
)
-- Left err -> panic $ pack $ show err
-- Right (len, docsC) -> pure (len, docsC .| mapMC (toDoc' la))
toDoc'
::
Lang
->
HAL
.
Corpus
->
IO
HyperdataDocument
toDoc'
::
Lang
->
HAL
.
Corpus
->
IO
HyperdataDocument
toDoc'
la
(
HAL
.
Corpus
i
t
ab
d
s
aus
affs
struct_id
)
=
do
toDoc'
la
(
HAL
.
Corpus
i
t
ab
d
s
aus
affs
struct_id
)
=
do
...
...
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
View file @
cf184841
...
@@ -13,9 +13,12 @@ Portability : POSIX
...
@@ -13,9 +13,12 @@ Portability : POSIX
module
Gargantext.Core.Text.Corpus.API.Pubmed
module
Gargantext.Core.Text.Corpus.API.Pubmed
where
where
import
Conduit
import
Data.Either
(
Either
)
import
Data.Maybe
import
Data.Maybe
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
Text
import
qualified
Data.Text
as
Text
import
Servant.Client
(
ClientError
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
...
@@ -31,9 +34,12 @@ type Limit = PubMed.Limit
...
@@ -31,9 +34,12 @@ type Limit = PubMed.Limit
-- | TODO put default pubmed query in gargantext.ini
-- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs
-- by default: 10K docs
get
::
Query
->
Maybe
Limit
->
IO
[
HyperdataDocument
]
get
::
Query
->
Maybe
Limit
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
get
q
l
=
either
(
\
e
->
panic
$
"CRAWL: PubMed"
<>
e
)
(
map
(
toDoc
EN
))
get
q
l
=
do
<$>
PubMed
.
getMetadataWith
q
l
eRes
<-
PubMed
.
getMetadataWithC
q
l
pure
$
(
\
(
len
,
docsC
)
->
(
len
,
docsC
.|
mapC
(
toDoc
EN
)))
<$>
eRes
--either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
-- <$> PubMed.getMetadataWithC q l
toDoc
::
Lang
->
PubMedDoc
.
PubMed
->
HyperdataDocument
toDoc
::
Lang
->
PubMedDoc
.
PubMed
->
HyperdataDocument
toDoc
l
(
PubMedDoc
.
PubMed
(
PubMedDoc
.
PubMedArticle
t
j
as
aus
)
toDoc
l
(
PubMedDoc
.
PubMed
(
PubMedDoc
.
PubMedArticle
t
j
as
aus
)
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
cf184841
...
@@ -62,6 +62,7 @@ import qualified Data.Text as T
...
@@ -62,6 +62,7 @@ import qualified Data.Text as T
import
Data.Traversable
(
traverse
)
import
Data.Traversable
(
traverse
)
import
Data.Tuple.Extra
(
first
,
second
)
import
Data.Tuple.Extra
(
first
,
second
)
import
GHC.Generics
(
Generic
)
import
GHC.Generics
(
Generic
)
import
Servant.Client
(
ClientError
)
import
System.FilePath
(
FilePath
)
import
System.FilePath
(
FilePath
)
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Gargantext.Data.HashMap.Strict.Utils
as
HashMap
import
qualified
Gargantext.Data.HashMap.Strict.Utils
as
HashMap
...
@@ -130,7 +131,7 @@ allDataOrigins = map InternalOrigin API.externalAPIs
...
@@ -130,7 +131,7 @@ allDataOrigins = map InternalOrigin API.externalAPIs
---------------
---------------
data
DataText
=
DataOld
!
[
NodeId
]
data
DataText
=
DataOld
!
[
NodeId
]
|
DataNew
!
(
ConduitT
()
HyperdataDocument
IO
()
)
|
DataNew
!
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
)
-- | DataNew ![[HyperdataDocument]]
-- | DataNew ![[HyperdataDocument]]
-- TODO use the split parameter in config file
-- TODO use the split parameter in config file
...
@@ -139,10 +140,10 @@ getDataText :: FlowCmdM env err m
...
@@ -139,10 +140,10 @@ getDataText :: FlowCmdM env err m
->
TermType
Lang
->
TermType
Lang
->
API
.
Query
->
API
.
Query
->
Maybe
API
.
Limit
->
Maybe
API
.
Limit
->
m
DataText
->
m
(
Either
ClientError
DataText
)
getDataText
(
ExternalOrigin
api
)
la
q
li
=
liftBase
$
do
getDataText
(
ExternalOrigin
api
)
la
q
li
=
liftBase
$
do
docsC
<-
API
.
get
api
(
_tt_lang
la
)
q
li
eRes
<-
API
.
get
api
(
_tt_lang
la
)
q
li
pure
$
DataNew
docsC
pure
$
DataNew
<$>
eRes
getDataText
(
InternalOrigin
_
)
_la
q
_li
=
do
getDataText
(
InternalOrigin
_
)
_la
q
_li
=
do
(
_masterUserId
,
_masterRootId
,
cId
)
<-
getOrMk_RootWithCorpus
(
_masterUserId
,
_masterRootId
,
cId
)
<-
getOrMk_RootWithCorpus
...
@@ -150,7 +151,7 @@ getDataText (InternalOrigin _) _la q _li = do
...
@@ -150,7 +151,7 @@ getDataText (InternalOrigin _) _la q _li = do
(
Left
""
)
(
Left
""
)
(
Nothing
::
Maybe
HyperdataCorpus
)
(
Nothing
::
Maybe
HyperdataCorpus
)
ids
<-
map
fst
<$>
searchDocInDatabase
cId
(
stemIt
q
)
ids
<-
map
fst
<$>
searchDocInDatabase
cId
(
stemIt
q
)
pure
$
DataOld
ids
pure
$
Right
$
DataOld
ids
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
flowDataText
::
forall
env
err
m
.
flowDataText
::
forall
env
err
m
.
...
@@ -166,7 +167,8 @@ flowDataText :: forall env err m.
...
@@ -166,7 +167,8 @@ flowDataText :: forall env err m.
flowDataText
u
(
DataOld
ids
)
tt
cid
mfslw
_
=
flowCorpusUser
(
_tt_lang
tt
)
u
(
Right
[
cid
])
corpusType
ids
mfslw
flowDataText
u
(
DataOld
ids
)
tt
cid
mfslw
_
=
flowCorpusUser
(
_tt_lang
tt
)
u
(
Right
[
cid
])
corpusType
ids
mfslw
where
where
corpusType
=
(
Nothing
::
Maybe
HyperdataCorpus
)
corpusType
=
(
Nothing
::
Maybe
HyperdataCorpus
)
flowDataText
u
(
DataNew
txtC
)
tt
cid
mfslw
logStatus
=
flowCorpus
u
(
Right
[
cid
])
tt
mfslw
(
transPipe
liftBase
txtC
)
logStatus
flowDataText
u
(
DataNew
(
mLen
,
txtC
))
tt
cid
mfslw
logStatus
=
flowCorpus
u
(
Right
[
cid
])
tt
mfslw
(
mLen
,
transPipe
liftBase
txtC
)
logStatus
------------------------------------------------------------------------
------------------------------------------------------------------------
-- TODO use proxy
-- TODO use proxy
...
@@ -180,7 +182,7 @@ flowAnnuaire :: (FlowCmdM env err m)
...
@@ -180,7 +182,7 @@ flowAnnuaire :: (FlowCmdM env err m)
flowAnnuaire
u
n
l
filePath
logStatus
=
do
flowAnnuaire
u
n
l
filePath
logStatus
=
do
-- TODO Conduit for file
-- TODO Conduit for file
docs
<-
liftBase
$
((
readFile_Annuaire
filePath
)
::
IO
[
HyperdataContact
])
docs
<-
liftBase
$
((
readFile_Annuaire
filePath
)
::
IO
[
HyperdataContact
])
flow
(
Nothing
::
Maybe
HyperdataAnnuaire
)
u
n
l
Nothing
(
yieldMany
docs
)
logStatus
flow
(
Nothing
::
Maybe
HyperdataAnnuaire
)
u
n
l
Nothing
(
Just
$
fromIntegral
$
length
docs
,
yieldMany
docs
)
logStatus
------------------------------------------------------------------------
------------------------------------------------------------------------
flowCorpusFile
::
(
FlowCmdM
env
err
m
)
flowCorpusFile
::
(
FlowCmdM
env
err
m
)
...
@@ -195,7 +197,7 @@ flowCorpusFile u n _l la ff fp mfslw logStatus = do
...
@@ -195,7 +197,7 @@ flowCorpusFile u n _l la ff fp mfslw logStatus = do
eParsed
<-
liftBase
$
parseFile
ff
fp
eParsed
<-
liftBase
$
parseFile
ff
fp
case
eParsed
of
case
eParsed
of
Right
parsed
->
do
Right
parsed
->
do
flowCorpus
u
n
la
mfslw
(
yieldMany
parsed
.|
mapC
toHyperdataDocument
)
logStatus
flowCorpus
u
n
la
mfslw
(
Just
$
fromIntegral
$
length
parsed
,
yieldMany
parsed
.|
mapC
toHyperdataDocument
)
logStatus
--let docs = splitEvery 500 $ take l parsed
--let docs = splitEvery 500 $ take l parsed
--flowCorpus u n la mfslw (yieldMany $ map (map toHyperdataDocument) docs) logStatus
--flowCorpus u n la mfslw (yieldMany $ map (map toHyperdataDocument) docs) logStatus
Left
e
->
panic
$
"Error: "
<>
(
T
.
pack
e
)
Left
e
->
panic
$
"Error: "
<>
(
T
.
pack
e
)
...
@@ -208,7 +210,7 @@ flowCorpus :: (FlowCmdM env err m, FlowCorpus a)
...
@@ -208,7 +210,7 @@ flowCorpus :: (FlowCmdM env err m, FlowCorpus a)
->
Either
CorpusName
[
CorpusId
]
->
Either
CorpusName
[
CorpusId
]
->
TermType
Lang
->
TermType
Lang
->
Maybe
FlowSocialListWith
->
Maybe
FlowSocialListWith
->
ConduitT
()
a
m
(
)
->
(
Maybe
Integer
,
ConduitT
()
a
m
()
)
->
(
JobLog
->
m
()
)
->
(
JobLog
->
m
()
)
->
m
CorpusId
->
m
CorpusId
flowCorpus
=
flow
(
Nothing
::
Maybe
HyperdataCorpus
)
flowCorpus
=
flow
(
Nothing
::
Maybe
HyperdataCorpus
)
...
@@ -224,10 +226,10 @@ flow :: forall env err m a c.
...
@@ -224,10 +226,10 @@ flow :: forall env err m a c.
->
Either
CorpusName
[
CorpusId
]
->
Either
CorpusName
[
CorpusId
]
->
TermType
Lang
->
TermType
Lang
->
Maybe
FlowSocialListWith
->
Maybe
FlowSocialListWith
->
ConduitT
()
a
m
(
)
->
(
Maybe
Integer
,
ConduitT
()
a
m
()
)
->
(
JobLog
->
m
()
)
->
(
JobLog
->
m
()
)
->
m
CorpusId
->
m
CorpusId
flow
c
u
cn
la
mfslw
docsC
_
logStatus
=
do
flow
c
u
cn
la
mfslw
(
mLength
,
docsC
)
logStatus
=
do
-- TODO if public insertMasterDocs else insertUserDocs
-- TODO if public insertMasterDocs else insertUserDocs
ids
<-
runConduit
$
ids
<-
runConduit
$
zipSources
(
yieldMany
[
1
..
])
docsC
zipSources
(
yieldMany
[
1
..
])
docsC
...
@@ -245,14 +247,17 @@ flow c u cn la mfslw docsC _logStatus = do
...
@@ -245,14 +247,17 @@ flow c u cn la mfslw docsC _logStatus = do
flowCorpusUser
(
la
^.
tt_lang
)
u
cn
c
ids
mfslw
flowCorpusUser
(
la
^.
tt_lang
)
u
cn
c
ids
mfslw
where
where
insertDoc
::
(
Int
,
a
)
->
m
NodeId
insertDoc
::
(
Int
eger
,
a
)
->
m
NodeId
insertDoc
(
_
idx
,
doc
)
=
do
insertDoc
(
idx
,
doc
)
=
do
id
<-
insertMasterDocs
c
la
[
doc
]
id
<-
insertMasterDocs
c
la
[
doc
]
-- logStatus JobLog { _scst_succeeded = Just $ 1 + idx
case
mLength
of
-- , _scst_failed = Just 0
Nothing
->
pure
()
-- , _scst_remaining = Just $ length docs - idx
Just
len
->
-- , _scst_events = Just []
logStatus
JobLog
{
_scst_succeeded
=
Just
$
fromIntegral
$
1
+
idx
-- }
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
$
fromIntegral
$
len
-
idx
,
_scst_events
=
Just
[]
}
pure
$
Prelude
.
head
id
pure
$
Prelude
.
head
id
...
@@ -272,7 +277,7 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
...
@@ -272,7 +277,7 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
-- User Flow
-- User Flow
(
userId
,
_rootId
,
userCorpusId
)
<-
getOrMk_RootWithCorpus
user
corpusName
ctype
(
userId
,
_rootId
,
userCorpusId
)
<-
getOrMk_RootWithCorpus
user
corpusName
ctype
-- NodeTexts is first
-- NodeTexts is first
_tId
<-
insertDefaultNode
NodeTexts
userCorpusId
userId
_tId
<-
insertDefaultNode
IfNotExists
NodeTexts
userCorpusId
userId
-- printDebug "NodeTexts: " tId
-- printDebug "NodeTexts: " tId
-- NodeList is second
-- NodeList is second
...
@@ -298,8 +303,8 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
...
@@ -298,8 +303,8 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
-- _ <- insertOccsUpdates userCorpusId mastListId
-- _ <- insertOccsUpdates userCorpusId mastListId
-- printDebug "userListId" userListId
-- printDebug "userListId" userListId
-- User Graph Flow
-- User Graph Flow
_
<-
insertDefaultNode
NodeDashboard
userCorpusId
userId
_
<-
insertDefaultNode
IfNotExists
NodeDashboard
userCorpusId
userId
_
<-
insertDefaultNode
NodeGraph
userCorpusId
userId
_
<-
insertDefaultNode
IfNotExists
NodeGraph
userCorpusId
userId
--
_
<-
mkPhylo
userCorpusId
userId
--
_
<-
mkPhylo
userCorpusId
userId
-- Annuaire Flow
-- Annuaire Flow
-- _ <- mkAnnuaire rootUserId userId
-- _ <- mkAnnuaire rootUserId userId
...
@@ -344,7 +349,7 @@ saveDocNgramsWith :: ( FlowCmdM env err m)
...
@@ -344,7 +349,7 @@ saveDocNgramsWith :: ( FlowCmdM env err m)
->
m
()
->
m
()
saveDocNgramsWith
lId
mapNgramsDocs'
=
do
saveDocNgramsWith
lId
mapNgramsDocs'
=
do
terms2id
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocs'
terms2id
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocs'
printDebug
"terms2id"
terms2id
--
printDebug "terms2id" terms2id
let
mapNgramsDocs
=
HashMap
.
mapKeys
extracted2ngrams
mapNgramsDocs'
let
mapNgramsDocs
=
HashMap
.
mapKeys
extracted2ngrams
mapNgramsDocs'
...
@@ -353,7 +358,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
...
@@ -353,7 +358,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
$
map
(
first
_ngramsTerms
.
second
Map
.
keys
)
$
map
(
first
_ngramsTerms
.
second
Map
.
keys
)
$
HashMap
.
toList
mapNgramsDocs
$
HashMap
.
toList
mapNgramsDocs
printDebug
"saveDocNgramsWith"
mapCgramsId
--
printDebug "saveDocNgramsWith" mapCgramsId
-- insertDocNgrams
-- insertDocNgrams
_return
<-
insertContextNodeNgrams2
_return
<-
insertContextNodeNgrams2
$
catMaybes
[
ContextNodeNgrams2
<$>
Just
nId
$
catMaybes
[
ContextNodeNgrams2
<$>
Just
nId
...
...
src/Gargantext/Database/Query/Table/Node.hs
View file @
cf184841
...
@@ -255,6 +255,14 @@ insertDefaultNode :: HasDBid NodeType
...
@@ -255,6 +255,14 @@ insertDefaultNode :: HasDBid NodeType
=>
NodeType
->
ParentId
->
UserId
->
Cmd
err
[
NodeId
]
=>
NodeType
->
ParentId
->
UserId
->
Cmd
err
[
NodeId
]
insertDefaultNode
nt
p
u
=
insertNode
nt
Nothing
Nothing
p
u
insertDefaultNode
nt
p
u
=
insertNode
nt
Nothing
Nothing
p
u
insertDefaultNodeIfNotExists
::
HasDBid
NodeType
=>
NodeType
->
ParentId
->
UserId
->
Cmd
err
[
NodeId
]
insertDefaultNodeIfNotExists
nt
p
u
=
do
children
<-
getChildrenByType
p
nt
case
children
of
[]
->
insertDefaultNode
nt
p
u
xs
->
pure
xs
insertNode
::
HasDBid
NodeType
insertNode
::
HasDBid
NodeType
=>
NodeType
->
Maybe
Name
->
Maybe
DefaultHyperdata
->
ParentId
->
UserId
->
Cmd
err
[
NodeId
]
=>
NodeType
->
Maybe
Name
->
Maybe
DefaultHyperdata
->
ParentId
->
UserId
->
Cmd
err
[
NodeId
]
insertNode
nt
n
h
p
u
=
insertNodesR
[
nodeW
nt
n
h
p
u
]
insertNode
nt
n
h
p
u
=
insertNodesR
[
nodeW
nt
n
h
p
u
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment