Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
10
Merge Requests
10
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
797e19df
Commit
797e19df
authored
Nov 10, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[flow] add better progress report to flow corpus
parent
8306c484
Pipeline
#2064
failed with stage
in 10 minutes and 21 seconds
Changes
6
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
32 additions
and
17 deletions
+32
-17
Main.hs
bin/gargantext-import/Main.hs
+3
-3
Contact.hs
src/Gargantext/API/Node/Contact.hs
+1
-1
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+4
-3
DocumentUpload.hs
src/Gargantext/API/Node/DocumentUpload.hs
+1
-1
DocumentsFromWriteNodes.hs
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
+1
-1
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+22
-8
No files found.
bin/gargantext-import/Main.hs
View file @
797e19df
...
...
@@ -47,13 +47,13 @@ main = do
tt
=
(
Multi
EN
)
format
=
CsvGargV3
-- CsvHal --WOS
corpus
::
forall
m
.
FlowCmdM
DevEnv
GargError
m
=>
m
CorpusId
corpus
=
flowCorpusFile
(
UserName
$
cs
user
)
(
Left
(
cs
name
::
Text
))
(
read
limit
::
Int
)
tt
format
corpusPath
Nothing
corpus
=
flowCorpusFile
(
UserName
$
cs
user
)
(
Left
(
cs
name
::
Text
))
(
read
limit
::
Int
)
tt
format
corpusPath
Nothing
(
\
_
->
pure
()
)
corpusCsvHal
::
forall
m
.
FlowCmdM
DevEnv
GargError
m
=>
m
CorpusId
corpusCsvHal
=
flowCorpusFile
(
UserName
$
cs
user
)
(
Left
(
cs
name
::
Text
))
(
read
limit
::
Int
)
tt
CsvHal
corpusPath
Nothing
corpusCsvHal
=
flowCorpusFile
(
UserName
$
cs
user
)
(
Left
(
cs
name
::
Text
))
(
read
limit
::
Int
)
tt
CsvHal
corpusPath
Nothing
(
\
_
->
pure
()
)
annuaire
::
forall
m
.
FlowCmdM
DevEnv
GargError
m
=>
m
CorpusId
annuaire
=
flowAnnuaire
(
UserName
$
cs
user
)
(
Left
"Annuaire"
)
(
Multi
EN
)
corpusPath
annuaire
=
flowAnnuaire
(
UserName
$
cs
user
)
(
Left
"Annuaire"
)
(
Multi
EN
)
corpusPath
(
\
_
->
pure
()
)
{-
let debatCorpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
...
...
src/Gargantext/API/Node/Contact.hs
View file @
797e19df
...
...
@@ -93,7 +93,7 @@ addContact u nId (AddContactParams fn ln) logStatus = do
,
_scst_remaining
=
Just
1
,
_scst_events
=
Just
[]
}
_
<-
flow
(
Nothing
::
Maybe
HyperdataAnnuaire
)
u
(
Right
[
nId
])
(
Multi
EN
)
Nothing
[[
hyperdataContact
fn
ln
]]
_
<-
flow
(
Nothing
::
Maybe
HyperdataAnnuaire
)
u
(
Right
[
nId
])
(
Multi
EN
)
Nothing
[[
hyperdataContact
fn
ln
]]
logStatus
pure
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
...
...
src/Gargantext/API/Node/Corpus/New.hs
View file @
797e19df
...
...
@@ -213,15 +213,15 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
-- TODO if cid is folder -> create Corpus
-- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private
txts
<-
mapM
(
\
db
->
getDataText
db
(
Multi
l
)
q
maybeLimit
)
[
database2origin
dbs
]
txts
<-
mapM
(
\
db
->
getDataText
db
(
Multi
l
)
q
maybeLimit
)
[
database2origin
dbs
]
logStatus
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
1
,
_scst_remaining
=
Just
$
1
+
length
txts
,
_scst_events
=
Just
[]
}
cids
<-
mapM
(
\
txt
->
flowDataText
user
txt
(
Multi
l
)
cid
Nothing
)
txts
cids
<-
mapM
(
\
txt
->
flowDataText
user
txt
(
Multi
l
)
cid
Nothing
logStatus
)
txts
printDebug
"corpus id"
cids
printDebug
"sending email"
(
"xxxxxxxxxxxxxxxxxxxxx"
::
Text
)
sendMail
user
...
...
@@ -297,6 +297,7 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
(
Multi
$
fromMaybe
EN
l
)
Nothing
(
map
(
map
toHyperdataDocument
)
docs
)
logStatus
printDebug
"Extraction finished : "
cid
printDebug
"sending email"
(
"xxxxxxxxxxxxxxxxxxxxx"
::
Text
)
...
...
src/Gargantext/API/Node/DocumentUpload.hs
View file @
797e19df
...
...
@@ -107,6 +107,6 @@ documentUpload uId nId doc logStatus = do
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
T
.
pack
$
show
EN
}
_
<-
flowDataText
(
RootId
(
NodeId
uId
))
(
DataNew
[[
hd
]])
(
Multi
EN
)
cId
Nothing
_
<-
flowDataText
(
RootId
(
NodeId
uId
))
(
DataNew
[[
hd
]])
(
Multi
EN
)
cId
Nothing
logStatus
pure
$
jobLogSuccess
jl
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
View file @
797e19df
...
...
@@ -100,7 +100,7 @@ documentsFromWriteNodes uId nId _p logStatus = do
let
parsedE
=
(
\
(
node
,
contents
)
->
hyperdataDocumentFromFrameWrite
(
node
^.
node_hyperdata
,
contents
))
<$>
frameWritesWithContents
let
parsed
=
rights
parsedE
_
<-
flowDataText
(
RootId
(
NodeId
uId
))
(
DataNew
[
parsed
])
(
Multi
EN
)
cId
Nothing
_
<-
flowDataText
(
RootId
(
NodeId
uId
))
(
DataNew
[
parsed
])
(
Multi
EN
)
cId
Nothing
logStatus
pure
$
jobLogSuccess
jobLog
------------------------------------------------------------------------
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
797e19df
...
...
@@ -65,6 +65,7 @@ import qualified Data.HashMap.Strict as HashMap
import
qualified
Gargantext.Data.HashMap.Strict.Utils
as
HashMap
import
qualified
Data.Map
as
Map
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
))
import
Gargantext.Core
(
Lang
(
..
),
PosTagAlgo
(
..
))
import
Gargantext.Core.Ext.IMT
(
toSchoolName
)
import
Gargantext.Core.Ext.IMTUser
(
readFile_Annuaire
)
...
...
@@ -154,11 +155,12 @@ flowDataText :: ( FlowCmdM env err m
->
TermType
Lang
->
CorpusId
->
Maybe
FlowSocialListWith
->
(
JobLog
->
m
()
)
->
m
CorpusId
flowDataText
u
(
DataOld
ids
)
tt
cid
mfslw
=
flowCorpusUser
(
_tt_lang
tt
)
u
(
Right
[
cid
])
corpusType
ids
mfslw
flowDataText
u
(
DataOld
ids
)
tt
cid
mfslw
_
=
flowCorpusUser
(
_tt_lang
tt
)
u
(
Right
[
cid
])
corpusType
ids
mfslw
where
corpusType
=
(
Nothing
::
Maybe
HyperdataCorpus
)
flowDataText
u
(
DataNew
txt
)
tt
cid
mfslw
=
flowCorpus
u
(
Right
[
cid
])
tt
mfslw
txt
flowDataText
u
(
DataNew
txt
)
tt
cid
mfslw
logStatus
=
flowCorpus
u
(
Right
[
cid
])
tt
mfslw
txt
logStatus
------------------------------------------------------------------------
-- TODO use proxy
...
...
@@ -167,10 +169,11 @@ flowAnnuaire :: (FlowCmdM env err m)
->
Either
CorpusName
[
CorpusId
]
->
(
TermType
Lang
)
->
FilePath
->
(
JobLog
->
m
()
)
->
m
AnnuaireId
flowAnnuaire
u
n
l
filePath
=
do
flowAnnuaire
u
n
l
filePath
logStatus
=
do
docs
<-
liftBase
$
((
splitEvery
500
<$>
readFile_Annuaire
filePath
)
::
IO
[[
HyperdataContact
]])
flow
(
Nothing
::
Maybe
HyperdataAnnuaire
)
u
n
l
Nothing
docs
flow
(
Nothing
::
Maybe
HyperdataAnnuaire
)
u
n
l
Nothing
docs
logStatus
------------------------------------------------------------------------
flowCorpusFile
::
(
FlowCmdM
env
err
m
)
...
...
@@ -179,13 +182,14 @@ flowCorpusFile :: (FlowCmdM env err m)
->
Limit
-- Limit the number of docs (for dev purpose)
->
TermType
Lang
->
FileFormat
->
FilePath
->
Maybe
FlowSocialListWith
->
(
JobLog
->
m
()
)
->
m
CorpusId
flowCorpusFile
u
n
l
la
ff
fp
mfslw
=
do
flowCorpusFile
u
n
l
la
ff
fp
mfslw
logStatus
=
do
eParsed
<-
liftBase
$
parseFile
ff
fp
case
eParsed
of
Right
parsed
->
do
let
docs
=
splitEvery
500
$
take
l
parsed
flowCorpus
u
n
la
mfslw
(
map
(
map
toHyperdataDocument
)
docs
)
flowCorpus
u
n
la
mfslw
(
map
(
map
toHyperdataDocument
)
docs
)
logStatus
Left
e
->
panic
$
"Error: "
<>
(
T
.
pack
e
)
------------------------------------------------------------------------
...
...
@@ -197,6 +201,7 @@ flowCorpus :: (FlowCmdM env err m, FlowCorpus a)
->
TermType
Lang
->
Maybe
FlowSocialListWith
->
[[
a
]]
->
(
JobLog
->
m
()
)
->
m
CorpusId
flowCorpus
=
flow
(
Nothing
::
Maybe
HyperdataCorpus
)
...
...
@@ -211,10 +216,19 @@ flow :: ( FlowCmdM env err m
->
TermType
Lang
->
Maybe
FlowSocialListWith
->
[[
a
]]
->
(
JobLog
->
m
()
)
->
m
CorpusId
flow
c
u
cn
la
mfslw
docs
=
do
flow
c
u
cn
la
mfslw
docs
logStatus
=
do
-- TODO if public insertMasterDocs else insertUserDocs
ids
<-
traverse
(
insertMasterDocs
c
la
)
docs
ids
<-
traverse
(
\
(
idx
,
doc
)
->
do
id
<-
insertMasterDocs
c
la
doc
logStatus
JobLog
{
_scst_succeeded
=
Just
$
1
+
idx
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
$
length
docs
-
idx
,
_scst_events
=
Just
[]
}
pure
id
)
(
zip
[
1
..
]
docs
)
flowCorpusUser
(
la
^.
tt_lang
)
u
cn
c
(
concat
ids
)
mfslw
------------------------------------------------------------------------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment