Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
d63df339
Commit
d63df339
authored
Aug 05, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[upload] fix csv upload job response
Now the job doesn't return 'succeeded' fields when it actually fails.
parent
8667dfeb
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
265 additions
and
220 deletions
+265
-220
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+58
-49
CleanCsvCorpus.hs
bin/gargantext-cli/CleanCsvCorpus.hs
+14
-11
Main.hs
bin/gargantext-cli/Main.hs
+26
-29
Main.hs
bin/gargantext-phylo/Main.hs
+10
-9
Types.hs
src/Gargantext/API/Admin/Orchestrator/Types.hs
+0
-12
Job.hs
src/Gargantext/API/Job.hs
+21
-16
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+36
-20
FrameCalcUpload.hs
src/Gargantext/API/Node/FrameCalcUpload.hs
+4
-3
Routes.hs
src/Gargantext/API/Routes.hs
+3
-3
IMT.hs
src/Gargantext/Core/Ext/IMT.hs
+9
-4
Convert.hs
src/Gargantext/Core/Text/Convert.hs
+8
-2
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+33
-21
CSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
+34
-33
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+9
-8
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
d63df339
...
@@ -16,11 +16,24 @@ Adaptative Phylo binaries
...
@@ -16,11 +16,24 @@ Adaptative Phylo binaries
module
Main
where
module
Main
where
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Crypto.Hash.SHA256
(
hash
)
import
Data.Aeson
import
Data.Aeson
import
Data.Either
(
Either
(
..
))
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.String
(
String
)
import
Data.String
(
String
)
import
GHC.IO
(
FilePath
)
import
qualified
Prelude
as
Prelude
import
System.Environment
import
System.Directory
(
listDirectory
,
doesFileExist
)
import
Data.Text
(
Text
,
unwords
,
unpack
,
replace
,
pack
)
import
Data.Text
(
Text
,
unwords
,
unpack
,
replace
,
pack
)
import
Crypto.Hash.SHA256
(
hash
)
import
Data.Time.Calendar
(
fromGregorian
,
diffGregorianDurationClip
,
cdMonths
,
diffDays
,
showGregorian
)
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.Vector
as
Vector
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
import
qualified
Data.Text
as
T
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
...
@@ -36,20 +49,6 @@ import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setCon
...
@@ -36,20 +49,6 @@ import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setCon
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
-- import Gargantext.API.Ngrams.Prelude (toTermList)
-- import Gargantext.API.Ngrams.Prelude (toTermList)
import
GHC.IO
(
FilePath
)
import
Prelude
(
Either
(
Left
,
Right
),
toInteger
)
import
System.Environment
import
System.Directory
(
listDirectory
,
doesFileExist
)
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Data.Time.Calendar
(
fromGregorian
,
diffGregorianDurationClip
,
cdMonths
,
diffDays
,
showGregorian
)
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.Vector
as
Vector
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
import
qualified
Data.Text
as
T
-- import Debug.Trace (trace)
-- import Debug.Trace (trace)
data
PhyloStage
=
PhyloWithCliques
|
PhyloWithLinks
deriving
(
Show
)
data
PhyloStage
=
PhyloWithCliques
|
PhyloWithLinks
deriving
(
Show
)
...
@@ -84,13 +83,13 @@ toDays y m d = fromIntegral
...
@@ -84,13 +83,13 @@ toDays y m d = fromIntegral
toPhyloDate
::
Int
->
Int
->
Int
->
TimeUnit
->
Date
toPhyloDate
::
Int
->
Int
->
Int
->
TimeUnit
->
Date
toPhyloDate
y
m
d
tu
=
case
tu
of
toPhyloDate
y
m
d
tu
=
case
tu
of
Year
_
_
_
->
y
Year
_
_
_
->
y
Month
_
_
_
->
toMonths
(
toInteger
y
)
m
d
Month
_
_
_
->
toMonths
(
Prelude
.
toInteger
y
)
m
d
Week
_
_
_
->
div
(
toDays
(
toInteger
y
)
m
d
)
7
Week
_
_
_
->
div
(
toDays
(
Prelude
.
toInteger
y
)
m
d
)
7
Day
_
_
_
->
toDays
(
toInteger
y
)
m
d
Day
_
_
_
->
toDays
(
Prelude
.
toInteger
y
)
m
d
toPhyloDate'
::
Int
->
Int
->
Int
->
Text
toPhyloDate'
::
Int
->
Int
->
Int
->
Text
toPhyloDate'
y
m
d
=
pack
$
showGregorian
$
fromGregorian
(
toInteger
y
)
m
d
toPhyloDate'
y
m
d
=
pack
$
showGregorian
$
fromGregorian
(
Prelude
.
toInteger
y
)
m
d
--------------
--------------
...
@@ -113,43 +112,53 @@ termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList p
...
@@ -113,43 +112,53 @@ termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList p
-- | To transform a Wos file (or [file]) into a list of Docs
-- | To transform a Wos file (or [file]) into a list of Docs
wosToDocs
::
Int
->
Patterns
->
TimeUnit
->
FilePath
->
IO
([
Document
])
wosToDocs
::
Int
->
Patterns
->
TimeUnit
->
FilePath
->
IO
[
Document
]
wosToDocs
limit
patterns
time
path
=
do
wosToDocs
limit
patterns
time
path
=
do
files
<-
getFilesFromPath
path
files
<-
getFilesFromPath
path
take
limit
let
parseFile'
file
=
do
<$>
map
(
\
d
->
let
title
=
fromJust
$
_hd_title
d
eParsed
<-
parseFile
WOS
(
path
<>
file
)
abstr
=
if
(
isJust
$
_hd_abstract
d
)
case
eParsed
of
then
fromJust
$
_hd_abstract
d
Right
ps
->
pure
ps
else
""
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
in
Document
(
toPhyloDate
take
limit
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
<$>
map
(
\
d
->
let
title
=
fromJust
$
_hd_title
d
(
fromJust
$
_hd_publication_month
d
)
abstr
=
if
(
isJust
$
_hd_abstract
d
)
(
fromJust
$
_hd_publication_day
d
)
time
)
then
fromJust
$
_hd_abstract
d
(
toPhyloDate'
else
""
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
in
Document
(
toPhyloDate
(
fromJust
$
_hd_publication_month
d
)
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_day
d
))
(
fromJust
$
_hd_publication_month
d
)
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
(
fromJust
$
_hd_publication_day
d
)
time
)
<$>
concat
(
toPhyloDate'
<$>
mapConcurrently
(
\
file
->
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
&&
(
isJust
$
_hd_title
d
))
(
fromJust
$
_hd_publication_day
d
))
<$>
parseFile
WOS
(
path
<>
file
)
)
files
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
<$>
concat
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
&&
(
isJust
$
_hd_title
d
))
<$>
parseFile'
file
)
files
-- To transform a Csv file into a list of Document
-- To transform a Csv file into a list of Document
csvToDocs
::
CorpusParser
->
Patterns
->
TimeUnit
->
FilePath
->
IO
([
Document
])
csvToDocs
::
CorpusParser
->
Patterns
->
TimeUnit
->
FilePath
->
IO
[
Document
]
csvToDocs
parser
patterns
time
path
=
csvToDocs
parser
patterns
time
path
=
case
parser
of
case
parser
of
Wos
_
->
undefined
Wos
_
->
undefined
Csv
limit
->
Vector
.
toList
Csv
limit
->
do
<$>
Vector
.
take
limit
eR
<-
Csv
.
readFile
path
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
)
time
)
case
eR
of
(
toPhyloDate'
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
))
Right
r
->
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
pure
$
Vector
.
toList
Nothing
$
Vector
.
take
limit
[]
$
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
)
time
)
)
<$>
snd
<$>
Csv
.
readFile
path
(
toPhyloDate'
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
))
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
[]
)
$
snd
r
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
Csv'
limit
->
Vector
.
toList
Csv'
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
)
time
)
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
)
time
)
...
...
bin/gargantext-cli/CleanCsvCorpus.hs
View file @
d63df339
...
@@ -15,6 +15,7 @@ compress the contexts around the main terms of the query.
...
@@ -15,6 +15,7 @@ compress the contexts around the main terms of the query.
module
CleanCsvCorpus
where
module
CleanCsvCorpus
where
--import GHC.IO (FilePath)
--import GHC.IO (FilePath)
import
Data.Either
(
Either
(
..
))
import
Data.SearchEngine
as
S
import
Data.SearchEngine
as
S
import
qualified
Data.Set
as
S
import
qualified
Data.Set
as
S
import
Data.Text
(
pack
)
import
Data.Text
(
pack
)
...
@@ -39,17 +40,19 @@ main = do
...
@@ -39,17 +40,19 @@ main = do
--let q = ["water", "scarcity", "morocco", "shortage","flood"]
--let q = ["water", "scarcity", "morocco", "shortage","flood"]
let
q
=
[
"gratuit"
,
"gratuité"
,
"culture"
,
"culturel"
]
let
q
=
[
"gratuit"
,
"gratuité"
,
"culture"
,
"culturel"
]
(
h
,
csvDocs
)
<-
CSV
.
readFile
rPath
eDocs
<-
CSV
.
readFile
rPath
case
eDocs
of
Right
(
h
,
csvDocs
)
->
do
putStrLn
$
"Number of documents before:"
<>
show
(
V
.
length
csvDocs
)
putStrLn
$
"Mean size of docs:"
<>
show
(
CSV
.
docsSize
csvDocs
)
putStrLn
$
"Number of documents before:"
<>
show
(
V
.
length
csvDocs
)
let
docs
=
CSV
.
toDocs
csvDocs
putStrLn
$
"Mean size of docs:"
<>
show
(
CSV
.
docsSize
csvDocs
)
let
engine
=
insertDocs
docs
initialDocSearchEngine
let
docIds
=
S
.
query
engine
(
map
pack
q
)
let
docs'
=
CSV
.
fromDocs
$
filterDocs
docIds
(
V
.
fromList
docs
)
let
docs
=
CSV
.
toDocs
csvDocs
putStrLn
$
"Number of documents after:"
<>
show
(
V
.
length
docs'
)
let
engine
=
insertDocs
docs
initialDocSearchEngine
putStrLn
$
"Mean size of docs:"
<>
show
(
CSV
.
docsSize
docs'
)
let
docIds
=
S
.
query
engine
(
map
pack
q
)
let
docs'
=
CSV
.
fromDocs
$
filterDocs
docIds
(
V
.
fromList
docs
)
putStrLn
$
"Number of documents after:"
<>
show
(
V
.
length
docs'
)
CSV
.
writeFile
wPath
(
h
,
docs'
)
putStrLn
$
"Mean size of docs:"
<>
show
(
CSV
.
docsSize
docs'
)
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
CSV
.
writeFile
wPath
(
h
,
docs'
)
bin/gargantext-cli/Main.hs
View file @
d63df339
...
@@ -17,30 +17,24 @@ Main specifications to index a corpus with a term list
...
@@ -17,30 +17,24 @@ Main specifications to index a corpus with a term list
module
Main
where
module
Main
where
import
Data.ByteString.Lazy
(
writeFile
)
import
Control.Concurrent.Async
as
CCA
(
mapConcurrently
)
import
Control.Concurrent
(
getNumCapabilities
,
myThreadId
,
threadCapability
)
import
Data.Text
(
pack
)
import
qualified
Data.Text
as
DT
import
Data.Tuple.Extra
(
both
)
import
qualified
Data.Vector
as
DV
import
Control.Monad
(
zipWithM
)
import
Control.Monad
(
zipWithM
)
import
Control.Monad.IO.Class
import
Control.Monad.IO.Class
import
Data.Map
(
Map
)
import
qualified
Data.Map
as
DM
import
GHC.Generics
import
Data.Aeson
import
Data.Aeson
import
Data.ByteString.Lazy
(
writeFile
)
import
Data.
Text
(
Text
)
import
Data.
Either
(
Either
(
..
)
)
import
Data.List
(
cycle
,
concat
,
unwords
)
import
Data.List
(
cycle
,
concat
,
unwords
)
import
Data.List.Split
(
chunksOf
)
import
Data.List.Split
(
chunksOf
)
import
Data.Map
(
Map
)
import
qualified
Data.Map
as
DM
import
Data.Text
(
pack
,
Text
)
import
qualified
Data.Text
as
DT
import
Data.Tuple.Extra
(
both
)
import
qualified
Data.Vector
as
DV
import
GHC.Generics
import
System.IO
(
hPutStr
,
hFlush
,
stderr
)
import
System.IO
(
hPutStr
,
hFlush
,
stderr
)
import
System.Environment
import
System.Environment
import
Control.Concurrent.Async
as
CCA
(
mapConcurrently
)
import
Control.Concurrent
(
getNumCapabilities
,
myThreadId
,
threadCapability
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Core
import
Gargantext.Core
...
@@ -92,22 +86,25 @@ main = do
...
@@ -92,22 +86,25 @@ main = do
[
corpusFile
,
termListFile
,
outputFile
]
<-
getArgs
[
corpusFile
,
termListFile
,
outputFile
]
<-
getArgs
--corpus :: IO (DM.IntMap [[Text]])
--corpus :: IO (DM.IntMap [[Text]])
corpus
<-
DM
.
fromListWith
(
<>
)
eCorpusFile
<-
readFile
corpusFile
.
DV
.
toList
case
eCorpusFile
of
.
DV
.
map
(
\
n
->
(
csv_publication_year
n
,
[(
csv_title
n
)
<>
" "
<>
(
csv_abstract
n
)]))
Right
cf
->
do
.
snd
let
corpus
=
DM
.
fromListWith
(
<>
)
<$>
readFile
corpusFile
.
DV
.
toList
.
DV
.
map
(
\
n
->
(
csv_publication_year
n
,
[(
csv_title
n
)
<>
" "
<>
(
csv_abstract
n
)]))
.
snd
$
cf
-- termListMap :: [Text]
-- termListMap :: [Text]
termList
<-
csvMapTermList
termListFile
termList
<-
csvMapTermList
termListFile
putStrLn
$
show
$
length
termList
putStrLn
$
show
$
length
termList
let
patterns
=
buildPatterns
termList
let
patterns
=
buildPatterns
termList
-- r <- mapConcurrentlyChunked (filterTermsAndCooc patterns) (DM.toList corpus)
-- r <- mapConcurrentlyChunked (filterTermsAndCooc patterns) (DM.toList corpus)
r
<-
mapConcurrently
(
filterTermsAndCooc
patterns
)
(
DM
.
toList
corpus
)
r
<-
mapConcurrently
(
filterTermsAndCooc
patterns
)
(
DM
.
toList
corpus
)
writeFile
outputFile
$
encode
(
CoocByYears
r
)
writeFile
outputFile
$
encode
(
CoocByYears
r
)
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
...
...
bin/gargantext-phylo/Main.hs
View file @
d63df339
...
@@ -24,6 +24,16 @@ import Data.Maybe
...
@@ -24,6 +24,16 @@ import Data.Maybe
import
Data.Text
(
Text
,
unwords
)
import
Data.Text
(
Text
,
unwords
)
import
GHC.Generics
import
GHC.Generics
import
GHC.IO
(
FilePath
)
import
GHC.IO
(
FilePath
)
import
System.Directory
(
doesFileExist
)
import
System.Environment
import
qualified
Data.ByteString.Lazy
as
L
import
qualified
Data.List
as
DL
import
qualified
Data.Map
as
DM
import
qualified
Data.Text
as
DT
import
qualified
Data.Vector
as
DV
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
CSV
import
qualified
Prelude
as
P
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Context
(
TermList
)
...
@@ -36,15 +46,6 @@ import Gargantext.Core.Viz.Phylo.LevelMaker
...
@@ -36,15 +46,6 @@ import Gargantext.Core.Viz.Phylo.LevelMaker
import
Gargantext.Core.Viz.Phylo.Tools
import
Gargantext.Core.Viz.Phylo.Tools
import
Gargantext.Core.Viz.Phylo.View.Export
import
Gargantext.Core.Viz.Phylo.View.Export
import
Gargantext.Core.Viz.Phylo.View.ViewMaker
import
Gargantext.Core.Viz.Phylo.View.ViewMaker
import
System.Directory
(
doesFileExist
)
import
System.Environment
import
qualified
Data.ByteString.Lazy
as
L
import
qualified
Data.List
as
DL
import
qualified
Data.Map
as
DM
import
qualified
Data.Text
as
DT
import
qualified
Data.Vector
as
DV
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
CSV
import
qualified
Prelude
as
P
--------------
--------------
...
...
src/Gargantext/API/Admin/Orchestrator/Types.hs
View file @
d63df339
...
@@ -131,18 +131,6 @@ type ScrapersEnv = JobEnv JobLog JobLog
...
@@ -131,18 +131,6 @@ type ScrapersEnv = JobEnv JobLog JobLog
type
ScraperAPI
=
AsyncJobsAPI
JobLog
ScraperInput
JobLog
type
ScraperAPI
=
AsyncJobsAPI
JobLog
ScraperInput
JobLog
jobLogInit
::
Int
->
JobLog
jobLogInit
n
=
JobLog
{
_scst_succeeded
=
Just
n
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
}
jobLogSucc
::
JobLog
->
JobLog
jobLogSucc
jl
=
over
(
scst_succeeded
.
_Just
)
(
+
1
)
$
over
(
scst_remaining
.
_Just
)
(
\
c
->
c
-
1
)
jl
jobLogErr
::
JobLog
->
JobLog
jobLogErr
jl
=
over
(
scst_failed
.
_Just
)
(
+
1
)
$
over
(
scst_remaining
.
_Just
)
(
\
c
->
c
-
1
)
jl
------------------------------------------------------------------------
------------------------------------------------------------------------
type
AsyncJobs
event
ctI
input
output
=
type
AsyncJobs
event
ctI
input
output
=
AsyncJobsAPI'
'U
n
safe
'S
a
fe
ctI
'[
J
SON
]
Maybe
event
input
output
AsyncJobsAPI'
'U
n
safe
'S
a
fe
ctI
'[
J
SON
]
Maybe
event
input
output
src/Gargantext/API/Job.hs
View file @
d63df339
module
Gargantext.API.Job
where
module
Gargantext.API.Job
where
import
Control.Lens
(
over
,
_Just
)
import
Data.IORef
import
Data.IORef
import
Data.Maybe
import
Data.Maybe
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
))
import
Gargantext.API.Admin.Orchestrator.Types
jobLogInit
::
Int
->
JobLog
jobLogInit
::
Int
->
JobLog
...
@@ -16,25 +17,29 @@ jobLogInit rem =
...
@@ -16,25 +17,29 @@ jobLogInit rem =
,
_scst_events
=
Just
[]
}
,
_scst_events
=
Just
[]
}
jobLogSuccess
::
JobLog
->
JobLog
jobLogSuccess
::
JobLog
->
JobLog
jobLogSuccess
(
JobLog
{
_scst_succeeded
=
mSucc
jobLogSuccess
jl
=
over
(
scst_succeeded
.
_Just
)
(
+
1
)
$
,
_scst_remaining
=
mRem
over
(
scst_remaining
.
_Just
)
(
\
x
->
x
-
1
)
jl
,
_scst_failed
=
mFail
,
_scst_events
=
evt
})
=
JobLog
{
_scst_succeeded
=
(
+
1
)
<$>
mSucc
,
_scst_remaining
=
(
\
x
->
x
-
1
)
<$>
mRem
,
_scst_failed
=
mFail
,
_scst_events
=
evt
}
jobLogFail
::
JobLog
->
JobLog
jobLogFail
::
JobLog
->
JobLog
jobLogFail
(
JobLog
{
_scst_succeeded
=
mSucc
jobLogFail
jl
=
over
(
scst_failed
.
_Just
)
(
+
1
)
$
,
_scst_remaining
=
mRem
over
(
scst_remaining
.
_Just
)
(
\
x
->
x
-
1
)
jl
,
_scst_failed
=
mFail
,
_scst_events
=
evt
})
=
jobLogFailTotal
::
JobLog
->
JobLog
jobLogFailTotal
(
JobLog
{
_scst_succeeded
=
mSucc
,
_scst_remaining
=
mRem
,
_scst_failed
=
mFail
,
_scst_events
=
evt
})
=
JobLog
{
_scst_succeeded
=
mSucc
JobLog
{
_scst_succeeded
=
mSucc
,
_scst_remaining
=
(
\
x
->
x
-
1
)
<$>
m
Rem
,
_scst_remaining
=
new
Rem
,
_scst_failed
=
(
+
1
)
<$>
m
Fail
,
_scst_failed
=
new
Fail
,
_scst_events
=
evt
}
,
_scst_events
=
evt
}
where
(
newRem
,
newFail
)
=
case
mRem
of
Nothing
->
(
Nothing
,
mFail
)
Just
rem
->
(
Just
0
,
(
+
rem
)
<$>
mFail
)
jobLogEvt
::
JobLog
->
ScraperEvent
->
JobLog
jobLogEvt
jl
evt
=
over
(
scst_events
.
_Just
)
(
\
evts
->
(
evt
:
evts
))
jl
runJobLog
::
MonadBase
IO
m
=>
Int
->
(
JobLog
->
m
()
)
->
m
(
m
()
,
m
()
,
m
JobLog
)
runJobLog
::
MonadBase
IO
m
=>
Int
->
(
JobLog
->
m
()
)
->
m
(
m
()
,
m
()
,
m
JobLog
)
runJobLog
num
logStatus
=
do
runJobLog
num
logStatus
=
do
...
...
src/Gargantext/API/Node/Corpus/New.hs
View file @
d63df339
...
@@ -27,6 +27,8 @@ import Data.Swagger
...
@@ -27,6 +27,8 @@ import Data.Swagger
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
T
import
qualified
Data.Text
as
T
import
GHC.Generics
(
Generic
)
import
GHC.Generics
(
Generic
)
import
qualified
Prelude
as
Prelude
import
Protolude
(
readFile
)
import
Servant
import
Servant
import
Servant.Job.Utils
(
jsonOptions
)
import
Servant.Job.Utils
(
jsonOptions
)
-- import Servant.Multipart
-- import Servant.Multipart
...
@@ -35,8 +37,9 @@ import Test.QuickCheck.Arbitrary
...
@@ -35,8 +37,9 @@ import Test.QuickCheck.Arbitrary
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
,
jobLogSucc
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Job
(
jobLogSuccess
,
jobLogFailTotal
)
import
Gargantext.API.Node.Corpus.New.File
import
Gargantext.API.Node.Corpus.New.File
import
Gargantext.API.Node.Corpus.Searx
import
Gargantext.API.Node.Corpus.Searx
import
Gargantext.API.Node.Corpus.Types
import
Gargantext.API.Node.Corpus.Types
...
@@ -248,28 +251,41 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
...
@@ -248,28 +251,41 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
PresseRIS
->
Parser
.
parseFormat
Parser
.
RisPresse
PresseRIS
->
Parser
.
parseFormat
Parser
.
RisPresse
-- TODO granularity of the logStatus
-- TODO granularity of the logStatus
docs
<-
liftBase
$
splitEvery
500
eDocs
<-
liftBase
$
parse
$
cs
d
<$>
take
1000000
case
eDocs
of
<$>
parse
(
cs
d
)
Right
docs'
->
do
let
docs
=
splitEvery
500
$
take
1000000
docs'
printDebug
"Parsing corpus finished : "
cid
logStatus
jobLog2
printDebug
"Parsing corpus finished : "
cid
logStatus
jobLog2
printDebug
"Starting extraction : "
cid
-- TODO granularity of the logStatus
printDebug
"Starting extraction : "
cid
_cid'
<-
flowCorpus
user
-- TODO granularity of the logStatus
(
Right
[
cid
])
_cid'
<-
flowCorpus
user
(
Multi
$
fromMaybe
EN
l
)
(
Right
[
cid
])
(
map
(
map
toHyperdataDocument
)
docs
)
(
Multi
$
fromMaybe
EN
l
)
(
map
(
map
toHyperdataDocument
)
docs
)
printDebug
"Extraction finished : "
cid
printDebug
"sending email"
(
"xxxxxxxxxxxxxxxxxxxxx"
::
Text
)
sendMail
user
printDebug
"Extraction finished : "
cid
logStatus
jobLog3
printDebug
"sending email"
(
"xxxxxxxxxxxxxxxxxxxxx"
::
Text
)
pure
$
jobLog3
sendMail
user
Left
e
->
do
printDebug
"Error"
e
pure
jobLog3
logStatus
jobLogE
pure
jobLogE
where
where
jobLog2
=
jobLogSucc
jobLog
jobLog2
=
jobLogSuccess
jobLog
jobLog3
=
jobLogSucc
jobLog2
jobLog3
=
jobLogSuccess
jobLog2
jobLogE
=
jobLogFailTotal
jobLog
parseCsvGargV3Path
::
[
Char
]
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseCsvGargV3Path
fp
=
do
contents
<-
readFile
fp
Parser
.
parseFormat
Parser
.
CsvGargV3
$
cs
contents
{-
{-
addToCorpusWithFile :: FlowCmdM env err m
addToCorpusWithFile :: FlowCmdM env err m
...
...
src/Gargantext/API/Node/FrameCalcUpload.hs
View file @
d63df339
...
@@ -17,7 +17,8 @@ import Servant
...
@@ -17,7 +17,8 @@ import Servant
import
Servant.Job.Async
import
Servant.Job.Async
import
Web.FormUrlEncoded
(
FromForm
)
import
Web.FormUrlEncoded
(
FromForm
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
,
jobLogInit
,
jobLogSucc
,
jobLogErr
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Job
(
jobLogInit
,
jobLogSuccess
,
jobLogFail
)
import
Gargantext.API.Node.Corpus.New
(
addToCorpusWithForm
)
import
Gargantext.API.Node.Corpus.New
(
addToCorpusWithForm
)
import
Gargantext.API.Node.Corpus.New.File
(
FileType
(
..
))
import
Gargantext.API.Node.Corpus.New.File
(
FileType
(
..
))
import
Gargantext.API.Node.Types
(
NewWithForm
(
..
))
import
Gargantext.API.Node.Types
(
NewWithForm
(
..
))
...
@@ -82,8 +83,8 @@ frameCalcUploadAsync uId nId _f logStatus jobLog = do
...
@@ -82,8 +83,8 @@ frameCalcUploadAsync uId nId _f logStatus jobLog = do
-- printDebug "[frameCalcUploadAsync] mCId" mCId
-- printDebug "[frameCalcUploadAsync] mCId" mCId
jobLog2
<-
case
mCId
of
jobLog2
<-
case
mCId
of
Nothing
->
pure
$
jobLog
Err
jobLog
Nothing
->
pure
$
jobLog
Fail
jobLog
Just
cId
->
Just
cId
->
addToCorpusWithForm
(
RootId
(
NodeId
uId
))
cId
(
NewWithForm
CSV
body
Nothing
"calc-upload.csv"
)
logStatus
jobLog
addToCorpusWithForm
(
RootId
(
NodeId
uId
))
cId
(
NewWithForm
CSV
body
Nothing
"calc-upload.csv"
)
logStatus
jobLog
pure
$
jobLogSucc
jobLog2
pure
$
jobLogSucc
ess
jobLog2
src/Gargantext/API/Routes.hs
View file @
d63df339
...
@@ -43,8 +43,8 @@ import qualified Gargantext.API.Public as Public
...
@@ -43,8 +43,8 @@ import qualified Gargantext.API.Public as Public
import
Gargantext.API.Admin.Auth.Types
(
AuthRequest
,
AuthResponse
,
AuthenticatedUser
(
..
),
PathId
(
..
))
import
Gargantext.API.Admin.Auth.Types
(
AuthRequest
,
AuthResponse
,
AuthenticatedUser
(
..
),
PathId
(
..
))
import
Gargantext.API.Admin.Auth
(
withAccess
)
import
Gargantext.API.Admin.Auth
(
withAccess
)
import
Gargantext.API.Admin.FrontEnd
(
FrontEndAPI
)
import
Gargantext.API.Admin.FrontEnd
(
FrontEndAPI
)
import
Gargantext.API.Admin.Orchestrator.Types
(
jobLogInit
)
import
Gargantext.API.Count
(
CountAPI
,
count
,
Query
)
import
Gargantext.API.Count
(
CountAPI
,
count
,
Query
)
import
Gargantext.API.Job
(
jobLogInit
)
import
Gargantext.API.Ngrams
(
TableNgramsApi
,
apiNgramsTableDoc
)
import
Gargantext.API.Ngrams
(
TableNgramsApi
,
apiNgramsTableDoc
)
import
Gargantext.API.Node
import
Gargantext.API.Node
import
Gargantext.API.Prelude
import
Gargantext.API.Prelude
...
@@ -285,7 +285,7 @@ addCorpusWithForm user cid =
...
@@ -285,7 +285,7 @@ addCorpusWithForm user cid =
JobFunction
(
\
i
log'
->
JobFunction
(
\
i
log'
->
let
let
log''
x
=
do
log''
x
=
do
printDebug
"
addToCorpusWithForm
"
x
printDebug
"
[addToCorpusWithForm]
"
x
liftBase
$
log'
x
liftBase
$
log'
x
in
New
.
addToCorpusWithForm
user
cid
i
log''
(
jobLogInit
3
))
in
New
.
addToCorpusWithForm
user
cid
i
log''
(
jobLogInit
3
))
...
@@ -295,7 +295,7 @@ addCorpusWithFile user cid =
...
@@ -295,7 +295,7 @@ addCorpusWithFile user cid =
JobFunction
(
\
i
log'
->
JobFunction
(
\
i
log'
->
let
let
log''
x
=
do
log''
x
=
do
printDebug
"
addToCorpusWithFile
"
x
printDebug
"
[addToCorpusWithFile]
"
x
liftBase
$
log'
x
liftBase
$
log'
x
in
New
.
addToCorpusWithFile
user
cid
i
log''
)
in
New
.
addToCorpusWithFile
user
cid
i
log''
)
...
...
src/Gargantext/Core/Ext/IMT.hs
View file @
d63df339
...
@@ -11,14 +11,17 @@ Portability : POSIX
...
@@ -11,14 +11,17 @@ Portability : POSIX
module
Gargantext.Core.Ext.IMT
where
module
Gargantext.Core.Ext.IMT
where
import
Gargantext.Prelude
import
Data.Either
(
Either
(
..
))
import
Data.Text
(
Text
,
splitOn
)
import
Data.Map
(
Map
)
import
Data.Map
(
Map
)
import
Data.Text
(
Text
,
splitOn
)
import
qualified
Data.Set
as
S
import
qualified
Data.Set
as
S
import
qualified
Data.List
as
DL
import
qualified
Data.List
as
DL
import
qualified
Data.Vector
as
DV
import
qualified
Data.Vector
as
DV
import
qualified
Data.Map
as
M
import
qualified
Data.Map
as
M
import
qualified
Prelude
as
Prelude
import
Gargantext.Prelude
import
Gargantext.Core.Text.Metrics.Utils
as
Utils
import
Gargantext.Core.Text.Metrics.Utils
as
Utils
import
Gargantext.Core.Text.Corpus.Parsers.CSV
as
CSV
import
Gargantext.Core.Text.Corpus.Parsers.CSV
as
CSV
...
@@ -98,8 +101,10 @@ schools = [ School
...
@@ -98,8 +101,10 @@ schools = [ School
mapIdSchool
::
Map
Text
Text
mapIdSchool
::
Map
Text
Text
mapIdSchool
=
M
.
fromList
$
Gargantext
.
Prelude
.
map
(
\
(
School
n
_
i
)
->
(
i
,
n
))
schools
mapIdSchool
=
M
.
fromList
$
Gargantext
.
Prelude
.
map
(
\
(
School
n
_
i
)
->
(
i
,
n
))
schools
hal_data
::
IO
(
DV
.
Vector
CsvHal
)
hal_data
::
IO
(
Either
Prelude
.
String
(
DV
.
Vector
CsvHal
))
hal_data
=
snd
<$>
CSV
.
readCsvHal
"doc/corpus_imt/Gargantext_Corpus.csv"
hal_data
=
do
r
<-
CSV
.
readCsvHal
"doc/corpus_imt/Gargantext_Corpus.csv"
pure
$
snd
<$>
r
names
::
S
.
Set
Text
names
::
S
.
Set
Text
names
=
S
.
fromList
$
Gargantext
.
Prelude
.
map
(
\
s
->
school_id
s
)
schools
names
=
S
.
fromList
$
Gargantext
.
Prelude
.
map
(
\
s
->
school_id
s
)
schools
...
...
src/Gargantext/Core/Text/Convert.hs
View file @
d63df339
...
@@ -16,15 +16,21 @@ Format Converter.
...
@@ -16,15 +16,21 @@ Format Converter.
module
Gargantext.Core.Text.Convert
(
risPress2csvWrite
)
module
Gargantext.Core.Text.Convert
(
risPress2csvWrite
)
where
where
import
Data.Either
(
Either
(
..
))
import
qualified
Data.Text
as
T
import
System.FilePath
(
FilePath
())
-- , takeExtension)
import
System.FilePath
(
FilePath
())
-- , takeExtension)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
writeDocs2Csv
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
writeDocs2Csv
)
import
Gargantext.Core.Text.Corpus.Parsers
(
parseFile
,
FileFormat
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers
(
parseFile
,
FileFormat
(
..
))
risPress2csvWrite
::
FilePath
->
IO
()
risPress2csvWrite
::
FilePath
->
IO
()
risPress2csvWrite
f
=
parseFile
RisPresse
(
f
<>
".ris"
)
risPress2csvWrite
f
=
do
>>=
\
hs
->
writeDocs2Csv
(
f
<>
".csv"
)
hs
eContents
<-
parseFile
RisPresse
(
f
<>
".ris"
)
case
eContents
of
Right
contents
->
writeDocs2Csv
(
f
<>
".csv"
)
contents
Left
e
->
panic
$
"Error: "
<>
(
T
.
pack
e
)
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
d63df339
...
@@ -36,18 +36,20 @@ import Data.String()
...
@@ -36,18 +36,20 @@ import Data.String()
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
Data.Text.Encoding
(
decodeUtf8
)
import
Data.Text.Encoding
(
decodeUtf8
)
import
Data.Tuple.Extra
(
both
,
first
,
second
)
import
Data.Tuple.Extra
(
both
,
first
,
second
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
parseHal
,
parseHal'
,
parseCsv
,
parseCsv'
)
import
Gargantext.Core.Text.Corpus.Parsers.RIS.Presse
(
presseEnrich
)
-- import Gargantext.Core.Text.Learn (detectLangDefault)
import
System.FilePath
(
FilePath
(),
takeExtension
)
import
System.FilePath
(
FilePath
(),
takeExtension
)
import
qualified
Data.ByteString
as
DB
import
qualified
Data.ByteString
as
DB
import
qualified
Data.ByteString.Char8
as
DBC
import
qualified
Data.ByteString.Char8
as
DBC
import
qualified
Data.ByteString.Lazy
as
DBL
import
qualified
Data.ByteString.Lazy
as
DBL
import
qualified
Data.Map
as
DM
import
qualified
Data.Map
as
DM
import
qualified
Data.Text
as
DT
import
qualified
Data.Text
as
DT
import
qualified
Prelude
as
Prelude
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
parseHal
,
parseHal'
,
parseCsv
,
parseCsv'
)
import
Gargantext.Core.Text.Corpus.Parsers.RIS.Presse
(
presseEnrich
)
-- import Gargantext.Core.Text.Learn (detectLangDefault)
import
qualified
Gargantext.Core.Text.Corpus.Parsers.Date
as
Date
import
qualified
Gargantext.Core.Text.Corpus.Parsers.Date
as
Date
import
qualified
Gargantext.Core.Text.Corpus.Parsers.RIS
as
RIS
import
qualified
Gargantext.Core.Text.Corpus.Parsers.RIS
as
RIS
import
qualified
Gargantext.Core.Text.Corpus.Parsers.WOS
as
WOS
import
qualified
Gargantext.Core.Text.Corpus.Parsers.WOS
as
WOS
...
@@ -75,30 +77,40 @@ data FileFormat = WOS | RIS | RisPresse
...
@@ -75,30 +77,40 @@ data FileFormat = WOS | RIS | RisPresse
-- | XML -- Not Implemented / see :
-- | XML -- Not Implemented / see :
parseFormat
::
FileFormat
->
DB
.
ByteString
->
IO
[
HyperdataDocument
]
parseFormat
::
FileFormat
->
DB
.
ByteString
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseFormat
CsvGargV3
bs
=
pure
$
parseCsv'
$
DBL
.
fromStrict
bs
parseFormat
CsvGargV3
bs
=
pure
$
parseCsv'
$
DBL
.
fromStrict
bs
parseFormat
CsvHal
bs
=
pure
$
parseHal'
$
DBL
.
fromStrict
bs
parseFormat
CsvHal
bs
=
pure
$
parseHal'
$
DBL
.
fromStrict
bs
parseFormat
RisPresse
bs
=
mapM
(
toDoc
RIS
)
parseFormat
RisPresse
bs
=
do
<$>
snd
docs
<-
mapM
(
toDoc
RIS
)
<$>
enrichWith
RisPresse
<$>
snd
$
partitionEithers
<$>
enrichWith
RisPresse
$
[
runParser'
RisPresse
bs
]
$
partitionEithers
parseFormat
WOS
bs
=
mapM
(
toDoc
WOS
)
$
[
runParser'
RisPresse
bs
]
<$>
snd
pure
$
Right
docs
<$>
enrichWith
WOS
parseFormat
WOS
bs
=
do
$
partitionEithers
docs
<-
mapM
(
toDoc
WOS
)
$
[
runParser'
WOS
bs
]
<$>
snd
<$>
enrichWith
WOS
$
partitionEithers
$
[
runParser'
WOS
bs
]
pure
$
Right
docs
parseFormat
_
_
=
undefined
parseFormat
_
_
=
undefined
-- | Parse file into documents
-- | Parse file into documents
-- TODO manage errors here
-- TODO manage errors here
-- TODO: to debug maybe add the filepath in error message
-- TODO: to debug maybe add the filepath in error message
parseFile
::
FileFormat
->
FilePath
->
IO
[
HyperdataDocument
]
parseFile
::
FileFormat
->
FilePath
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseFile
CsvHal
p
=
parseHal
p
parseFile
CsvHal
p
=
parseHal
p
parseFile
CsvGargV3
p
=
parseCsv
p
parseFile
CsvGargV3
p
=
parseCsv
p
parseFile
RisPresse
p
=
join
$
mapM
(
toDoc
RIS
)
<$>
snd
<$>
enrichWith
RisPresse
<$>
readFileWith
RIS
p
parseFile
RisPresse
p
=
do
parseFile
WOS
p
=
join
$
mapM
(
toDoc
WOS
)
<$>
snd
<$>
enrichWith
WOS
<$>
readFileWith
WOS
p
docs
<-
join
$
mapM
(
toDoc
RIS
)
<$>
snd
<$>
enrichWith
RisPresse
<$>
readFileWith
RIS
p
parseFile
ff
p
=
join
$
mapM
(
toDoc
ff
)
<$>
snd
<$>
enrichWith
ff
<$>
readFileWith
ff
p
pure
$
Right
docs
parseFile
WOS
p
=
do
docs
<-
join
$
mapM
(
toDoc
WOS
)
<$>
snd
<$>
enrichWith
WOS
<$>
readFileWith
WOS
p
pure
$
Right
docs
parseFile
ff
p
=
do
docs
<-
join
$
mapM
(
toDoc
ff
)
<$>
snd
<$>
enrichWith
ff
<$>
readFileWith
ff
p
pure
$
Right
docs
toDoc
::
FileFormat
->
[(
Text
,
Text
)]
->
IO
HyperdataDocument
toDoc
::
FileFormat
->
[(
Text
,
Text
)]
->
IO
HyperdataDocument
-- TODO use language for RIS
-- TODO use language for RIS
...
...
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
View file @
d63df339
...
@@ -19,7 +19,7 @@ import qualified Data.ByteString as BS
...
@@ -19,7 +19,7 @@ import qualified Data.ByteString as BS
import
qualified
Data.ByteString.Lazy
as
BL
import
qualified
Data.ByteString.Lazy
as
BL
import
Data.Char
(
ord
)
import
Data.Char
(
ord
)
import
Data.Csv
import
Data.Csv
import
Data.Either
(
Either
(
Left
,
Right
))
import
Data.Either
(
Either
(
..
))
import
Data.Text
(
Text
,
pack
,
length
,
intercalate
)
import
Data.Text
(
Text
,
pack
,
length
,
intercalate
)
import
Data.Time.Segment
(
jour
)
import
Data.Time.Segment
(
jour
)
import
qualified
Data.Vector
as
V
import
qualified
Data.Vector
as
V
...
@@ -27,6 +27,8 @@ import Data.Vector (Vector)
...
@@ -27,6 +27,8 @@ import Data.Vector (Vector)
import
GHC.IO
(
FilePath
)
import
GHC.IO
(
FilePath
)
import
GHC.Word
(
Word8
)
import
GHC.Word
(
Word8
)
import
qualified
Prelude
as
Prelude
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
hiding
(
length
)
import
Gargantext.Prelude
hiding
(
length
)
import
Gargantext.Core.Text
import
Gargantext.Core.Text
...
@@ -192,52 +194,47 @@ delimiter :: Word8
...
@@ -192,52 +194,47 @@ delimiter :: Word8
delimiter
=
fromIntegral
$
ord
'
\t
'
delimiter
=
fromIntegral
$
ord
'
\t
'
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
readCsvOn'
::
[
CsvDoc
->
Text
]
->
FilePath
->
IO
[
Text
]
readCsvOn'
::
[
CsvDoc
->
Text
]
->
FilePath
->
IO
(
Either
Prelude
.
String
[
Text
])
readCsvOn'
fields
fp
=
V
.
toList
readCsvOn'
fields
fp
=
do
<$>
V
.
map
(
\
l
->
intercalate
(
pack
" "
)
$
map
(
\
field
->
field
l
)
fields
)
r
<-
readFile
fp
<$>
snd
pure
$
(
V
.
toList
<$>
readFile
fp
.
V
.
map
(
\
l
->
intercalate
(
pack
" "
)
$
map
(
\
field
->
field
l
)
fields
)
.
snd
)
<$>
r
------------------------------------------------------------------------
------------------------------------------------------------------------
readFileLazy
::
(
FromNamedRecord
a
)
=>
proxy
a
->
FilePath
->
IO
(
Header
,
Vector
a
)
readFileLazy
::
(
FromNamedRecord
a
)
=>
proxy
a
->
FilePath
->
IO
(
Either
Prelude
.
String
(
Header
,
Vector
a
)
)
readFileLazy
f
=
fmap
(
readByteStringLazy
f
)
.
BL
.
readFile
readFileLazy
f
=
fmap
(
readByteStringLazy
f
)
.
BL
.
readFile
readFileStrict
::
(
FromNamedRecord
a
)
=>
proxy
a
->
FilePath
->
IO
(
Header
,
Vector
a
)
readFileStrict
::
(
FromNamedRecord
a
)
=>
proxy
a
->
FilePath
->
IO
(
Either
Prelude
.
String
(
Header
,
Vector
a
)
)
readFileStrict
f
=
fmap
(
readByteStringStrict
f
)
.
BS
.
readFile
readFileStrict
f
=
fmap
(
readByteStringStrict
f
)
.
BS
.
readFile
readByteStringLazy
::
(
FromNamedRecord
a
)
=>
proxy
a
->
BL
.
ByteString
->
(
Header
,
Vector
a
)
readByteStringLazy
::
(
FromNamedRecord
a
)
=>
proxy
a
->
BL
.
ByteString
->
Either
Prelude
.
String
(
Header
,
Vector
a
)
readByteStringLazy
_f
bs
=
case
decodeByNameWith
csvDecodeOptions
bs
of
readByteStringLazy
_f
bs
=
decodeByNameWith
csvDecodeOptions
bs
Left
e
->
panic
(
pack
e
)
Right
csvDocs
->
csvDocs
readByteStringStrict
::
(
FromNamedRecord
a
)
=>
proxy
a
->
BS
.
ByteString
->
(
Header
,
Vector
a
)
readByteStringStrict
::
(
FromNamedRecord
a
)
=>
proxy
a
->
BS
.
ByteString
->
Either
Prelude
.
String
(
Header
,
Vector
a
)
readByteStringStrict
ff
=
(
readByteStringLazy
ff
)
.
BL
.
fromStrict
readByteStringStrict
ff
=
(
readByteStringLazy
ff
)
.
BL
.
fromStrict
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | TODO use readFileLazy
-- | TODO use readFileLazy
readFile
::
FilePath
->
IO
(
Header
,
Vector
CsvDoc
)
readFile
::
FilePath
->
IO
(
Either
Prelude
.
String
(
Header
,
Vector
CsvDoc
)
)
readFile
=
fmap
readCsvLazyBS
.
BL
.
readFile
readFile
=
fmap
readCsvLazyBS
.
BL
.
readFile
-- | TODO use readByteStringLazy
-- | TODO use readByteStringLazy
readCsvLazyBS
::
BL
.
ByteString
->
(
Header
,
Vector
CsvDoc
)
readCsvLazyBS
::
BL
.
ByteString
->
Either
Prelude
.
String
(
Header
,
Vector
CsvDoc
)
readCsvLazyBS
bs
=
case
decodeByNameWith
csvDecodeOptions
bs
of
readCsvLazyBS
bs
=
decodeByNameWith
csvDecodeOptions
bs
Left
e
->
panic
(
pack
e
)
Right
csvDocs
->
csvDocs
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | TODO use readFileLazy
-- | TODO use readFileLazy
readCsvHal
::
FilePath
->
IO
(
Header
,
Vector
CsvHal
)
readCsvHal
::
FilePath
->
IO
(
Either
Prelude
.
String
(
Header
,
Vector
CsvHal
)
)
readCsvHal
=
fmap
readCsvHalLazyBS
.
BL
.
readFile
readCsvHal
=
fmap
readCsvHalLazyBS
.
BL
.
readFile
-- | TODO use readByteStringLazy
-- | TODO use readByteStringLazy
readCsvHalLazyBS
::
BL
.
ByteString
->
(
Header
,
Vector
CsvHal
)
readCsvHalLazyBS
::
BL
.
ByteString
->
Either
Prelude
.
String
(
Header
,
Vector
CsvHal
)
readCsvHalLazyBS
bs
=
case
decodeByNameWith
csvDecodeOptions
bs
of
readCsvHalLazyBS
bs
=
decodeByNameWith
csvDecodeOptions
bs
Left
e
->
panic
(
pack
e
)
Right
csvDocs
->
csvDocs
readCsvHalBSStrict
::
BS
.
ByteString
->
(
Header
,
Vector
CsvHal
)
readCsvHalBSStrict
::
BS
.
ByteString
->
Either
Prelude
.
String
(
Header
,
Vector
CsvHal
)
readCsvHalBSStrict
=
readCsvHalLazyBS
.
BL
.
fromStrict
readCsvHalBSStrict
=
readCsvHalLazyBS
.
BL
.
fromStrict
------------------------------------------------------------------------
------------------------------------------------------------------------
...
@@ -382,18 +379,22 @@ csv2doc (CsvDoc title source
...
@@ -382,18 +379,22 @@ csv2doc (CsvDoc title source
Nothing
Nothing
------------------------------------------------------------------------
------------------------------------------------------------------------
parseHal
::
FilePath
->
IO
[
HyperdataDocument
]
parseHal
::
FilePath
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseHal
fp
=
V
.
toList
<$>
V
.
map
csvHal2doc
<$>
snd
<$>
readCsvHal
fp
parseHal
fp
=
do
r
<-
readCsvHal
fp
pure
$
(
V
.
toList
.
V
.
map
csvHal2doc
.
snd
)
<$>
r
parseHal'
::
BL
.
ByteString
->
[
HyperdataDocument
]
parseHal'
::
BL
.
ByteString
->
Either
Prelude
.
String
[
HyperdataDocument
]
parseHal'
=
V
.
toList
.
V
.
map
csvHal2doc
.
snd
.
readCsvHalLazyBS
parseHal'
bs
=
(
V
.
toList
.
V
.
map
csvHal2doc
.
snd
)
<$>
readCsvHalLazyBS
bs
------------------------------------------------------------------------
------------------------------------------------------------------------
parseCsv
::
FilePath
->
IO
[
HyperdataDocument
]
parseCsv
::
FilePath
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseCsv
fp
=
V
.
toList
<$>
V
.
map
csv2doc
<$>
snd
<$>
readFile
fp
parseCsv
fp
=
do
r
<-
readFile
fp
pure
$
(
V
.
toList
.
V
.
map
csv2doc
.
snd
)
<$>
r
parseCsv'
::
BL
.
ByteString
->
[
HyperdataDocument
]
parseCsv'
::
BL
.
ByteString
->
Either
Prelude
.
String
[
HyperdataDocument
]
parseCsv'
bs
=
V
.
toList
$
V
.
map
csv2doc
$
snd
$
readCsvLazyBS
bs
parseCsv'
bs
=
(
V
.
toList
.
V
.
map
csv2doc
.
snd
)
<$>
readCsvLazyBS
bs
------------------------------------------------------------------------
------------------------------------------------------------------------
-- Csv v3 weighted for phylo
-- Csv v3 weighted for phylo
...
@@ -425,4 +426,4 @@ readWeightedCsv fp =
...
@@ -425,4 +426,4 @@ readWeightedCsv fp =
case
decodeByNameWith
csvDecodeOptions
bs
of
case
decodeByNameWith
csvDecodeOptions
bs
of
Left
e
->
panic
(
pack
e
)
Left
e
->
panic
(
pack
e
)
Right
corpus
->
corpus
Right
corpus
->
corpus
)
$
BL
.
readFile
fp
)
$
BL
.
readFile
fp
\ No newline at end of file
src/Gargantext/Database/Action/Flow.hs
View file @
d63df339
...
@@ -56,7 +56,7 @@ import Data.Map (Map, lookup)
...
@@ -56,7 +56,7 @@ import Data.Map (Map, lookup)
import
Data.Maybe
(
catMaybes
)
import
Data.Maybe
(
catMaybes
)
import
Data.Monoid
import
Data.Monoid
import
Data.Swagger
import
Data.Swagger
import
Data.Text
(
splitOn
)
import
qualified
Data.Text
as
T
import
Data.Traversable
(
traverse
)
import
Data.Traversable
(
traverse
)
import
Data.Tuple.Extra
(
first
,
second
)
import
Data.Tuple.Extra
(
first
,
second
)
import
GHC.Generics
(
Generic
)
import
GHC.Generics
(
Generic
)
...
@@ -178,11 +178,12 @@ flowCorpusFile :: (FlowCmdM env err m)
...
@@ -178,11 +178,12 @@ flowCorpusFile :: (FlowCmdM env err m)
->
TermType
Lang
->
FileFormat
->
FilePath
->
TermType
Lang
->
FileFormat
->
FilePath
->
m
CorpusId
->
m
CorpusId
flowCorpusFile
u
n
l
la
ff
fp
=
do
flowCorpusFile
u
n
l
la
ff
fp
=
do
docs
<-
liftBase
(
splitEvery
500
eParsed
<-
liftBase
$
parseFile
ff
fp
<$>
take
l
case
eParsed
of
<$>
parseFile
ff
fp
Right
parsed
->
do
)
let
docs
=
splitEvery
500
$
take
l
parsed
flowCorpus
u
n
la
(
map
(
map
toHyperdataDocument
)
docs
)
flowCorpus
u
n
la
(
map
(
map
toHyperdataDocument
)
docs
)
Left
e
->
panic
$
"Error: "
<>
(
T
.
pack
e
)
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | TODO improve the needed type to create/update a corpus
-- | TODO improve the needed type to create/update a corpus
...
@@ -425,11 +426,11 @@ instance ExtractNgramsT HyperdataDocument
...
@@ -425,11 +426,11 @@ instance ExtractNgramsT HyperdataDocument
$
_hd_source
doc
$
_hd_source
doc
institutes
=
map
text2ngrams
institutes
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
map
toSchoolName
.
(
splitOn
", "
))
$
maybe
[
"Nothing"
]
(
map
toSchoolName
.
(
T
.
splitOn
", "
))
$
_hd_institutes
doc
$
_hd_institutes
doc
authors
=
map
text2ngrams
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
splitOn
", "
)
$
maybe
[
"Nothing"
]
(
T
.
splitOn
", "
)
$
_hd_authors
doc
$
_hd_authors
doc
terms'
<-
map
(
enrichedTerms
(
lang'
^.
tt_lang
)
CoreNLP
NP
)
terms'
<-
map
(
enrichedTerms
(
lang'
^.
tt_lang
)
CoreNLP
NP
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment