Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
149
Issues
149
List
Board
Labels
Milestones
Merge Requests
8
Merge Requests
8
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
92cb0a6c
Commit
92cb0a6c
authored
Feb 18, 2022
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[text-api] first rewrite using Conduit
NOTE This doesn't compile yet.
parent
d54d5f06
Pipeline
#2509
failed with stage
in 8 minutes and 43 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
60 additions
and
31 deletions
+60
-31
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+2
-1
DocumentsFromWriteNodes.hs
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
+2
-1
API.hs
src/Gargantext/Core/Text/Corpus/API.hs
+12
-4
Hal.hs
src/Gargantext/Core/Text/Corpus/API/Hal.hs
+2
-2
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+42
-23
No files found.
src/Gargantext/API/Node/Corpus/New.hs
View file @
92cb0a6c
...
...
@@ -215,7 +215,8 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
-- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private
txts
<-
mapM
(
\
db
->
getDataText
db
(
Multi
l
)
q
maybeLimit
)
[
database2origin
dbs
]
-- TODO Sum lenghts of each txt elements
logStatus
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
$
1
+
length
txts
...
...
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
View file @
92cb0a6c
...
...
@@ -16,6 +16,7 @@ Portability : POSIX
module
Gargantext.API.Node.DocumentsFromWriteNodes
where
import
Conduit
import
Control.Lens
((
^.
))
import
Data.Aeson
import
Data.Either
(
Either
(
..
),
rights
)
...
...
@@ -100,7 +101,7 @@ documentsFromWriteNodes uId nId _p logStatus = do
let
parsedE
=
(
\
(
node
,
contents
)
->
hyperdataDocumentFromFrameWrite
(
node
^.
node_hyperdata
,
contents
))
<$>
frameWritesWithContents
let
parsed
=
rights
parsedE
_
<-
flowDataText
(
RootId
(
NodeId
uId
))
(
DataNew
[
parsed
]
)
(
Multi
EN
)
cId
Nothing
logStatus
_
<-
flowDataText
(
RootId
(
NodeId
uId
))
(
DataNew
$
yield
parsed
)
(
Multi
EN
)
cId
Nothing
logStatus
pure
$
jobLogSuccess
jobLog
------------------------------------------------------------------------
...
...
src/Gargantext/Core/Text/Corpus/API.hs
View file @
92cb0a6c
...
...
@@ -18,6 +18,7 @@ module Gargantext.Core.Text.Corpus.API
)
where
import
Conduit
import
Data.Maybe
import
Gargantext.API.Admin.Orchestrator.Types
(
ExternalAPIs
(
..
),
externalAPIs
)
import
Gargantext.Core
(
Lang
(
..
))
...
...
@@ -37,11 +38,18 @@ get :: ExternalAPIs
->
Lang
->
Query
->
Maybe
Limit
->
IO
[
HyperdataDocument
]
get
PubMed
_la
q
_l
=
PUBMED
.
get
q
default_limit
-- EN only by default
-- -> IO [HyperdataDocument]
->
IO
(
ConduitT
()
HyperdataDocument
IO
()
)
get
PubMed
_la
q
_l
=
do
res
<-
PUBMED
.
get
q
default_limit
-- EN only by default
pure
$
yieldMany
res
get
HAL
la
q
_l
=
HAL
.
getC
la
q
Nothing
get
IsTex
la
q
_l
=
ISTEX
.
get
la
q
default_limit
get
Isidore
la
q
_l
=
ISIDORE
.
get
la
(
fromIntegral
<$>
default_limit
)
(
Just
q
)
Nothing
get
IsTex
la
q
_l
=
do
res
<-
ISTEX
.
get
la
q
default_limit
pure
$
yieldMany
res
get
Isidore
la
q
_l
=
do
res
<-
ISIDORE
.
get
la
(
fromIntegral
<$>
default_limit
)
(
Just
q
)
Nothing
pure
$
yieldMany
res
get
_
_
_
_
=
undefined
-- | Some Sugar for the documentation
...
...
src/Gargantext/Core/Text/Corpus/API/Hal.hs
View file @
92cb0a6c
...
...
@@ -30,12 +30,12 @@ get la q ml = do
eDocs
<-
HAL
.
getMetadataWith
q
(
Just
0
)
ml
either
(
panic
.
pack
.
show
)
(
\
d
->
mapM
(
toDoc'
la
)
$
HAL
.
_docs
d
)
eDocs
getC
::
Lang
->
Text
->
Maybe
Integer
->
IO
[
HyperdataDocument
]
getC
::
Lang
->
Text
->
Maybe
Integer
->
IO
(
ConduitT
()
HyperdataDocument
IO
()
)
getC
la
q
ml
=
do
eDocs
<-
HAL
.
getMetadataRecursively
q
(
Just
0
)
ml
case
eDocs
of
Left
err
->
panic
$
pack
$
show
err
Right
docsC
->
runConduit
$
docsC
.|
mapMC
(
toDoc'
la
)
.|
sinkList
Right
docsC
->
pure
$
docsC
.|
mapMC
(
toDoc'
la
)
toDoc'
::
Lang
->
HAL
.
Corpus
->
IO
HyperdataDocument
toDoc'
la
(
HAL
.
Corpus
i
t
ab
d
s
aus
affs
struct_id
)
=
do
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
92cb0a6c
...
...
@@ -46,8 +46,10 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
)
where
import
Conduit
import
Control.Lens
((
^.
),
view
,
_Just
,
makeLenses
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Conduit.Internal
(
zipSources
)
import
Data.Either
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Hashable
(
Hashable
)
...
...
@@ -103,6 +105,7 @@ import Gargantext.Prelude
import
Gargantext.Prelude.Crypto.Hash
(
Hash
)
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
qualified
Gargantext.Database.Query.Table.Node.Document.Add
as
Doc
(
add
)
import
qualified
Prelude
as
Prelude
------------------------------------------------------------------------
-- Imports for upgrade function
...
...
@@ -127,7 +130,8 @@ allDataOrigins = map InternalOrigin API.externalAPIs
---------------
data
DataText
=
DataOld
!
[
NodeId
]
|
DataNew
!
[[
HyperdataDocument
]]
|
DataNew
!
(
ConduitT
()
HyperdataDocument
IO
()
)
-- | DataNew ![[HyperdataDocument]]
-- TODO use the split parameter in config file
getDataText
::
FlowCmdM
env
err
m
...
...
@@ -136,9 +140,9 @@ getDataText :: FlowCmdM env err m
->
API
.
Query
->
Maybe
API
.
Limit
->
m
DataText
getDataText
(
ExternalOrigin
api
)
la
q
li
=
liftBase
$
DataNew
<$>
splitEvery
500
<$>
API
.
get
api
(
_tt_lang
la
)
q
li
getDataText
(
ExternalOrigin
api
)
la
q
li
=
liftBase
$
do
docsC
<-
API
.
get
api
(
_tt_lang
la
)
q
li
pure
$
DataNew
docsC
getDataText
(
InternalOrigin
_
)
_la
q
_li
=
do
(
_masterUserId
,
_masterRootId
,
cId
)
<-
getOrMk_RootWithCorpus
...
...
@@ -161,7 +165,7 @@ flowDataText :: ( FlowCmdM env err m
flowDataText
u
(
DataOld
ids
)
tt
cid
mfslw
_
=
flowCorpusUser
(
_tt_lang
tt
)
u
(
Right
[
cid
])
corpusType
ids
mfslw
where
corpusType
=
(
Nothing
::
Maybe
HyperdataCorpus
)
flowDataText
u
(
DataNew
txt
)
tt
cid
mfslw
logStatus
=
flowCorpus
u
(
Right
[
cid
])
tt
mfslw
txt
logStatus
flowDataText
u
(
DataNew
txt
C
)
tt
cid
mfslw
logStatus
=
flowCorpus
u
(
Right
[
cid
])
tt
mfslw
txtC
logStatus
------------------------------------------------------------------------
-- TODO use proxy
...
...
@@ -173,8 +177,9 @@ flowAnnuaire :: (FlowCmdM env err m)
->
(
JobLog
->
m
()
)
->
m
AnnuaireId
flowAnnuaire
u
n
l
filePath
logStatus
=
do
docs
<-
liftBase
$
((
splitEvery
500
<$>
readFile_Annuaire
filePath
)
::
IO
[[
HyperdataContact
]])
flow
(
Nothing
::
Maybe
HyperdataAnnuaire
)
u
n
l
Nothing
docs
logStatus
-- TODO Conduit for file
docs
<-
liftBase
$
((
readFile_Annuaire
filePath
)
::
IO
[
HyperdataContact
])
flow
(
Nothing
::
Maybe
HyperdataAnnuaire
)
u
n
l
Nothing
(
yieldMany
docs
)
logStatus
------------------------------------------------------------------------
flowCorpusFile
::
(
FlowCmdM
env
err
m
)
...
...
@@ -189,8 +194,9 @@ flowCorpusFile u n l la ff fp mfslw logStatus = do
eParsed
<-
liftBase
$
parseFile
ff
fp
case
eParsed
of
Right
parsed
->
do
let
docs
=
splitEvery
500
$
take
l
parsed
flowCorpus
u
n
la
mfslw
(
map
(
map
toHyperdataDocument
)
docs
)
logStatus
flowCorpus
u
n
la
mfslw
(
yieldMany
parsed
.|
mapC
toHyperdataDocument
)
logStatus
--let docs = splitEvery 500 $ take l parsed
--flowCorpus u n la mfslw (yieldMany $ map (map toHyperdataDocument) docs) logStatus
Left
e
->
panic
$
"Error: "
<>
(
T
.
pack
e
)
------------------------------------------------------------------------
...
...
@@ -201,7 +207,7 @@ flowCorpus :: (FlowCmdM env err m, FlowCorpus a)
->
Either
CorpusName
[
CorpusId
]
->
TermType
Lang
->
Maybe
FlowSocialListWith
->
[[
a
]]
->
ConduitT
()
a
IO
()
->
(
JobLog
->
m
()
)
->
m
CorpusId
flowCorpus
=
flow
(
Nothing
::
Maybe
HyperdataCorpus
)
...
...
@@ -216,23 +222,36 @@ flow :: ( FlowCmdM env err m
->
Either
CorpusName
[
CorpusId
]
->
TermType
Lang
->
Maybe
FlowSocialListWith
->
[[
a
]]
->
ConduitT
()
a
IO
()
->
(
JobLog
->
m
()
)
->
m
CorpusId
flow
c
u
cn
la
mfslw
docs
logStatus
=
do
flow
c
u
cn
la
mfslw
docs
C
logStatus
=
do
-- TODO if public insertMasterDocs else insertUserDocs
ids
<-
traverse
(
\
(
idx
,
doc
)
->
do
id
<-
insertMasterDocs
c
la
doc
logStatus
JobLog
{
_scst_succeeded
=
Just
$
1
+
idx
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
$
length
docs
-
idx
,
_scst_events
=
Just
[]
}
pure
id
)
(
zip
[
1
..
]
docs
)
flowCorpusUser
(
la
^.
tt_lang
)
u
cn
c
(
concat
ids
)
mfslw
ids
<-
liftBase
$
runConduit
$
zipSources
(
yieldMany
[
1
..
])
docsC
.|
mapMC
insertDoc
.|
sinkList
-- ids <- traverse (\(idx, doc) -> do
-- id <- insertMasterDocs c la doc
-- logStatus JobLog { _scst_succeeded = Just $ 1 + idx
-- , _scst_failed = Just 0
-- , _scst_remaining = Just $ length docs - idx
-- , _scst_events = Just []
-- }
-- pure id
-- ) (zip [1..] docs)
flowCorpusUser
(
la
^.
tt_lang
)
u
cn
c
ids
mfslw
where
insertDoc
(
idx
,
doc
)
=
do
id
<-
insertMasterDocs
c
la
[
doc
]
-- logStatus JobLog { _scst_succeeded = Just $ 1 + idx
-- , _scst_failed = Just 0
-- , _scst_remaining = Just $ length docs - idx
-- , _scst_events = Just []
-- }
pure
$
Prelude
.
head
id
------------------------------------------------------------------------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment