Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
50497ca2
Commit
50497ca2
authored
Aug 23, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
merge
parents
f81fa897
09e9fa50
Changes
30
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
30 changed files
with
940 additions
and
321 deletions
+940
-321
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+58
-49
CleanCsvCorpus.hs
bin/gargantext-cli/CleanCsvCorpus.hs
+14
-11
Main.hs
bin/gargantext-cli/Main.hs
+27
-30
Main.hs
bin/gargantext-phylo/Main.hs
+10
-9
docker-compose.yaml
devops/docker/docker-compose.yaml
+1
-1
search-api.org
docs/search-api.org
+288
-0
pkgs.nix
nix/pkgs.nix
+2
-0
package.yaml
package.yaml
+2
-0
Types.hs
src/Gargantext/API/Admin/Orchestrator/Types.hs
+2
-0
Settings.hs
src/Gargantext/API/Admin/Settings.hs
+1
-1
Job.hs
src/Gargantext/API/Job.hs
+21
-16
Node.hs
src/Gargantext/API/Node.hs
+3
-0
Export.hs
src/Gargantext/API/Node/Corpus/Export.hs
+5
-4
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+89
-84
Searx.hs
src/Gargantext/API/Node/Corpus/Searx.hs
+104
-0
Types.hs
src/Gargantext/API/Node/Corpus/Types.hs
+67
-0
File.hs
src/Gargantext/API/Node/File.hs
+8
-7
FrameCalcUpload.hs
src/Gargantext/API/Node/FrameCalcUpload.hs
+90
-0
New.hs
src/Gargantext/API/Node/New.hs
+6
-5
Routes.hs
src/Gargantext/API/Routes.hs
+18
-15
IMT.hs
src/Gargantext/Core/Ext/IMT.hs
+9
-4
Convert.hs
src/Gargantext/Core/Text/Convert.hs
+8
-2
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+33
-21
CSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
+57
-45
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+1
-1
Utils.hs
src/Gargantext/Data/HashMap/Strict/Utils.hs
+0
-3
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+9
-8
Prelude.hs
src/Gargantext/Database/Prelude.hs
+2
-1
Node.hs
src/Gargantext/Database/Query/Table/Node.hs
+2
-2
stack.yaml
stack.yaml
+3
-2
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
50497ca2
...
...
@@ -16,17 +16,30 @@ Adaptative Phylo binaries
module
Main
where
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Crypto.Hash.SHA256
(
hash
)
import
Data.Aeson
import
Data.Either
(
Either
(
..
))
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.String
(
String
)
import
GHC.IO
(
FilePath
)
import
qualified
Prelude
as
Prelude
import
System.Environment
import
System.Directory
(
listDirectory
,
doesFileExist
)
import
Data.Text
(
Text
,
unwords
,
unpack
,
replace
,
pack
)
import
Crypto.Hash.SHA256
(
hash
)
import
Data.Time.Calendar
(
fromGregorian
,
diffGregorianDurationClip
,
cdMonths
,
diffDays
,
showGregorian
)
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Text
as
T
import
Gargantext.Prelude
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
csv'_weight
)
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
parseFile
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
...
...
@@ -36,20 +49,6 @@ import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setCon
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
-- import Gargantext.API.Ngrams.Prelude (toTermList)
import
GHC.IO
(
FilePath
)
import
Prelude
(
Either
(
Left
,
Right
),
toInteger
)
import
System.Environment
import
System.Directory
(
listDirectory
,
doesFileExist
)
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Data.Time.Calendar
(
fromGregorian
,
diffGregorianDurationClip
,
cdMonths
,
diffDays
,
showGregorian
)
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.Vector
as
Vector
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
import
qualified
Data.Text
as
T
-- import Debug.Trace (trace)
data
PhyloStage
=
PhyloWithCliques
|
PhyloWithLinks
deriving
(
Show
)
...
...
@@ -84,13 +83,13 @@ toDays y m d = fromIntegral
toPhyloDate
::
Int
->
Int
->
Int
->
TimeUnit
->
Date
toPhyloDate
y
m
d
tu
=
case
tu
of
Year
_
_
_
->
y
Month
_
_
_
->
toMonths
(
toInteger
y
)
m
d
Week
_
_
_
->
div
(
toDays
(
toInteger
y
)
m
d
)
7
Day
_
_
_
->
toDays
(
toInteger
y
)
m
d
Month
_
_
_
->
toMonths
(
Prelude
.
toInteger
y
)
m
d
Week
_
_
_
->
div
(
toDays
(
Prelude
.
toInteger
y
)
m
d
)
7
Day
_
_
_
->
toDays
(
Prelude
.
toInteger
y
)
m
d
toPhyloDate'
::
Int
->
Int
->
Int
->
Text
toPhyloDate'
y
m
d
=
pack
$
showGregorian
$
fromGregorian
(
toInteger
y
)
m
d
toPhyloDate'
y
m
d
=
pack
$
showGregorian
$
fromGregorian
(
Prelude
.
toInteger
y
)
m
d
--------------
...
...
@@ -113,43 +112,53 @@ termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList p
-- | To transform a Wos file (or [file]) into a list of Docs
wosToDocs
::
Int
->
Patterns
->
TimeUnit
->
FilePath
->
IO
([
Document
])
wosToDocs
::
Int
->
Patterns
->
TimeUnit
->
FilePath
->
IO
[
Document
]
wosToDocs
limit
patterns
time
path
=
do
files
<-
getFilesFromPath
path
take
limit
<$>
map
(
\
d
->
let
title
=
fromJust
$
_hd_title
d
abstr
=
if
(
isJust
$
_hd_abstract
d
)
then
fromJust
$
_hd_abstract
d
else
""
in
Document
(
toPhyloDate
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
)
time
)
(
toPhyloDate'
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
))
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
<$>
concat
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
&&
(
isJust
$
_hd_title
d
))
<$>
parseFile
WOS
(
path
<>
file
)
)
files
files
<-
getFilesFromPath
path
let
parseFile'
file
=
do
eParsed
<-
parseFile
WOS
(
path
<>
file
)
case
eParsed
of
Right
ps
->
pure
ps
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
take
limit
<$>
map
(
\
d
->
let
title
=
fromJust
$
_hd_title
d
abstr
=
if
(
isJust
$
_hd_abstract
d
)
then
fromJust
$
_hd_abstract
d
else
""
in
Document
(
toPhyloDate
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
)
time
)
(
toPhyloDate'
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
))
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
<$>
concat
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
&&
(
isJust
$
_hd_title
d
))
<$>
parseFile'
file
)
files
-- To transform a Csv file into a list of Document
csvToDocs
::
CorpusParser
->
Patterns
->
TimeUnit
->
FilePath
->
IO
([
Document
])
csvToDocs
::
CorpusParser
->
Patterns
->
TimeUnit
->
FilePath
->
IO
[
Document
]
csvToDocs
parser
patterns
time
path
=
case
parser
of
Wos
_
->
undefined
Csv
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
)
time
)
(
toPhyloDate'
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
))
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
[]
)
<$>
snd
<$>
Csv
.
readFile
path
Csv
limit
->
do
eR
<-
Csv
.
readFile
path
case
eR
of
Right
r
->
pure
$
Vector
.
toList
$
Vector
.
take
limit
$
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
Csv
.
unIntOrDec
$
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
)
time
)
(
toPhyloDate'
(
Csv
.
unIntOrDec
$
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
))
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
[]
)
$
snd
r
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
Csv'
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
)
time
)
...
...
bin/gargantext-cli/CleanCsvCorpus.hs
View file @
50497ca2
...
...
@@ -15,6 +15,7 @@ compress the contexts around the main terms of the query.
module
CleanCsvCorpus
where
--import GHC.IO (FilePath)
import
Data.Either
(
Either
(
..
))
import
Data.SearchEngine
as
S
import
qualified
Data.Set
as
S
import
Data.Text
(
pack
)
...
...
@@ -39,17 +40,19 @@ main = do
--let q = ["water", "scarcity", "morocco", "shortage","flood"]
let
q
=
[
"gratuit"
,
"gratuité"
,
"culture"
,
"culturel"
]
(
h
,
csvDocs
)
<-
CSV
.
readFile
rPath
eDocs
<-
CSV
.
readFile
rPath
case
eDocs
of
Right
(
h
,
csvDocs
)
->
do
putStrLn
$
"Number of documents before:"
<>
show
(
V
.
length
csvDocs
)
putStrLn
$
"Mean size of docs:"
<>
show
(
CSV
.
docsSize
csvDocs
)
putStrLn
$
"Number of documents before:"
<>
show
(
V
.
length
csvDocs
)
putStrLn
$
"Mean size of docs:"
<>
show
(
CSV
.
docsSize
csvDocs
)
let
docs
=
CSV
.
toDocs
csvDocs
let
engine
=
insertDocs
docs
initialDocSearchEngine
let
docIds
=
S
.
query
engine
(
map
pack
q
)
let
docs'
=
CSV
.
fromDocs
$
filterDocs
docIds
(
V
.
fromList
docs
)
let
docs
=
CSV
.
toDocs
csvDocs
let
engine
=
insertDocs
docs
initialDocSearchEngine
let
docIds
=
S
.
query
engine
(
map
pack
q
)
let
docs'
=
CSV
.
fromDocs
$
filterDocs
docIds
(
V
.
fromList
docs
)
putStrLn
$
"Number of documents after:"
<>
show
(
V
.
length
docs'
)
putStrLn
$
"Mean size of docs:"
<>
show
(
CSV
.
docsSize
docs'
)
putStrLn
$
"Number of documents after:"
<>
show
(
V
.
length
docs'
)
putStrLn
$
"Mean size of docs:"
<>
show
(
CSV
.
docsSize
docs'
)
CSV
.
writeFile
wPath
(
h
,
docs'
)
CSV
.
writeFile
wPath
(
h
,
docs'
)
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
bin/gargantext-cli/Main.hs
View file @
50497ca2
...
...
@@ -17,30 +17,24 @@ Main specifications to index a corpus with a term list
module
Main
where
import
Data.ByteString.Lazy
(
writeFile
)
import
Data.Text
(
pack
)
import
qualified
Data.Text
as
DT
import
Data.Tuple.Extra
(
both
)
import
qualified
Data.Vector
as
DV
import
Control.Concurrent.Async
as
CCA
(
mapConcurrently
)
import
Control.Concurrent
(
getNumCapabilities
,
myThreadId
,
threadCapability
)
import
Control.Monad
(
zipWithM
)
import
Control.Monad.IO.Class
import
Data.Map
(
Map
)
import
qualified
Data.Map
as
DM
import
GHC.Generics
import
Data.Aeson
import
Data.
Text
(
Text
)
import
Data.ByteString.Lazy
(
writeFile
)
import
Data.
Either
(
Either
(
..
)
)
import
Data.List
(
cycle
,
concat
,
unwords
)
import
Data.List.Split
(
chunksOf
)
import
Data.Map
(
Map
)
import
qualified
Data.Map
as
DM
import
Data.Text
(
pack
,
Text
)
import
qualified
Data.Text
as
DT
import
Data.Tuple.Extra
(
both
)
import
qualified
Data.Vector
as
DV
import
GHC.Generics
import
System.IO
(
hPutStr
,
hFlush
,
stderr
)
import
System.Environment
import
Control.Concurrent.Async
as
CCA
(
mapConcurrently
)
import
Control.Concurrent
(
getNumCapabilities
,
myThreadId
,
threadCapability
)
import
Gargantext.Prelude
import
Gargantext.Core
...
...
@@ -48,7 +42,7 @@ import Gargantext.Core.Types
import
Gargantext.Core.Text.Terms
import
Gargantext.Core.Text.Context
import
Gargantext.Core.Text.Terms.WithList
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
readFile
,
csv_title
,
csv_abstract
,
csv_publication_year
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
readFile
,
csv_title
,
csv_abstract
,
csv_publication_year
,
unIntOrDec
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Terms
(
terms
)
import
Gargantext.Core.Text.Metrics.Count
(
coocOnContexts
,
Coocs
)
...
...
@@ -92,22 +86,25 @@ main = do
[
corpusFile
,
termListFile
,
outputFile
]
<-
getArgs
--corpus :: IO (DM.IntMap [[Text]])
corpus
<-
DM
.
fromListWith
(
<>
)
.
DV
.
toList
.
DV
.
map
(
\
n
->
(
csv_publication_year
n
,
[(
csv_title
n
)
<>
" "
<>
(
csv_abstract
n
)]))
.
snd
<$>
readFile
corpusFile
eCorpusFile
<-
readFile
corpusFile
case
eCorpusFile
of
Right
cf
->
do
let
corpus
=
DM
.
fromListWith
(
<>
)
.
DV
.
toList
.
DV
.
map
(
\
n
->
(
unIntOrDec
$
csv_publication_year
n
,
[(
csv_title
n
)
<>
" "
<>
(
csv_abstract
n
)]))
.
snd
$
cf
-- termListMap :: [Text]
termList
<-
csvMapTermList
termListFile
-- termListMap :: [Text]
termList
<-
csvMapTermList
termListFile
putStrLn
$
show
$
length
termList
putStrLn
$
show
$
length
termList
let
patterns
=
buildPatterns
termList
let
patterns
=
buildPatterns
termList
-- r <- mapConcurrentlyChunked (filterTermsAndCooc patterns) (DM.toList corpus)
r
<-
mapConcurrently
(
filterTermsAndCooc
patterns
)
(
DM
.
toList
corpus
)
writeFile
outputFile
$
encode
(
CoocByYears
r
)
-- r <- mapConcurrentlyChunked (filterTermsAndCooc patterns) (DM.toList corpus)
r
<-
mapConcurrently
(
filterTermsAndCooc
patterns
)
(
DM
.
toList
corpus
)
writeFile
outputFile
$
encode
(
CoocByYears
r
)
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
...
...
bin/gargantext-phylo/Main.hs
View file @
50497ca2
...
...
@@ -24,6 +24,16 @@ import Data.Maybe
import
Data.Text
(
Text
,
unwords
)
import
GHC.Generics
import
GHC.IO
(
FilePath
)
import
System.Directory
(
doesFileExist
)
import
System.Environment
import
qualified
Data.ByteString.Lazy
as
L
import
qualified
Data.List
as
DL
import
qualified
Data.Map
as
DM
import
qualified
Data.Text
as
DT
import
qualified
Data.Vector
as
DV
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
CSV
import
qualified
Prelude
as
P
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Prelude
import
Gargantext.Core.Text.Context
(
TermList
)
...
...
@@ -36,15 +46,6 @@ import Gargantext.Core.Viz.Phylo.LevelMaker
import
Gargantext.Core.Viz.Phylo.Tools
import
Gargantext.Core.Viz.Phylo.View.Export
import
Gargantext.Core.Viz.Phylo.View.ViewMaker
import
System.Directory
(
doesFileExist
)
import
System.Environment
import
qualified
Data.ByteString.Lazy
as
L
import
qualified
Data.List
as
DL
import
qualified
Data.Map
as
DM
import
qualified
Data.Text
as
DT
import
qualified
Data.Vector
as
DV
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
CSV
import
qualified
Prelude
as
P
--------------
...
...
devops/docker/docker-compose.yaml
View file @
50497ca2
...
...
@@ -21,7 +21,7 @@ services:
ports
:
-
8081:80
environment
:
PGADMIN_DEFAULT_EMAIL
:
admin@localhost
PGADMIN_DEFAULT_EMAIL
:
admin@localhost
.lan
PGADMIN_DEFAULT_PASSWORD
:
admin
depends_on
:
...
...
docs/search-api.org
0 → 100644
View file @
50497ca2
This diff is collapsed.
Click to expand it.
nix/pkgs.nix
View file @
50497ca2
...
...
@@ -12,6 +12,8 @@ rec {
git
gmp
gsl
haskell-language-server
hlint
igraph
liblapack
lzma
...
...
package.yaml
View file @
50497ca2
...
...
@@ -188,6 +188,7 @@ library:
-
random
-
rdf4h
-
regex-compat
-
regex-tdfa
-
resource-pool
-
resourcet
-
safe
...
...
@@ -223,6 +224,7 @@ library:
-
transformers
-
transformers-base
-
unordered-containers
-
utf8-string
-
uuid
-
validity
-
vector
...
...
src/Gargantext/API/Admin/Orchestrator/Types.hs
View file @
50497ca2
...
...
@@ -101,6 +101,8 @@ data JobLog = JobLog
}
deriving
(
Show
,
Generic
)
makeLenses
''
J
obLog
instance
Arbitrary
JobLog
where
arbitrary
=
JobLog
<$>
arbitrary
...
...
src/Gargantext/API/Admin/Settings.hs
View file @
50497ca2
...
...
@@ -166,7 +166,7 @@ newEnv port file = do
when
(
port
/=
settings'
^.
appPort
)
$
panic
"TODO: conflicting settings of port"
config'
<-
readConfig
file
config'
<-
readConfig
file
self_url
<-
parseBaseUrl
$
"http://0.0.0.0:"
<>
show
port
dbParam
<-
databaseParameters
file
pool
<-
newPool
dbParam
...
...
src/Gargantext/API/Job.hs
View file @
50497ca2
module
Gargantext.API.Job
where
import
Control.Lens
(
over
,
_Just
)
import
Data.IORef
import
Data.Maybe
import
Gargantext.Prelude
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
))
import
Gargantext.API.Admin.Orchestrator.Types
jobLogInit
::
Int
->
JobLog
...
...
@@ -16,25 +17,29 @@ jobLogInit rem =
,
_scst_events
=
Just
[]
}
jobLogSuccess
::
JobLog
->
JobLog
jobLogSuccess
(
JobLog
{
_scst_succeeded
=
mSucc
,
_scst_remaining
=
mRem
,
_scst_failed
=
mFail
,
_scst_events
=
evt
})
=
JobLog
{
_scst_succeeded
=
(
+
1
)
<$>
mSucc
,
_scst_remaining
=
(
\
x
->
x
-
1
)
<$>
mRem
,
_scst_failed
=
mFail
,
_scst_events
=
evt
}
jobLogSuccess
jl
=
over
(
scst_succeeded
.
_Just
)
(
+
1
)
$
over
(
scst_remaining
.
_Just
)
(
\
x
->
x
-
1
)
jl
jobLogFail
::
JobLog
->
JobLog
jobLogFail
(
JobLog
{
_scst_succeeded
=
mSucc
,
_scst_remaining
=
mRem
,
_scst_failed
=
mFail
,
_scst_events
=
evt
})
=
jobLogFail
jl
=
over
(
scst_failed
.
_Just
)
(
+
1
)
$
over
(
scst_remaining
.
_Just
)
(
\
x
->
x
-
1
)
jl
jobLogFailTotal
::
JobLog
->
JobLog
jobLogFailTotal
(
JobLog
{
_scst_succeeded
=
mSucc
,
_scst_remaining
=
mRem
,
_scst_failed
=
mFail
,
_scst_events
=
evt
})
=
JobLog
{
_scst_succeeded
=
mSucc
,
_scst_remaining
=
(
\
x
->
x
-
1
)
<$>
m
Rem
,
_scst_failed
=
(
+
1
)
<$>
m
Fail
,
_scst_remaining
=
new
Rem
,
_scst_failed
=
new
Fail
,
_scst_events
=
evt
}
where
(
newRem
,
newFail
)
=
case
mRem
of
Nothing
->
(
Nothing
,
mFail
)
Just
rem
->
(
Just
0
,
(
+
rem
)
<$>
mFail
)
jobLogEvt
::
JobLog
->
ScraperEvent
->
JobLog
jobLogEvt
jl
evt
=
over
(
scst_events
.
_Just
)
(
\
evts
->
(
evt
:
evts
))
jl
runJobLog
::
MonadBase
IO
m
=>
Int
->
(
JobLog
->
m
()
)
->
m
(
m
()
,
m
()
,
m
JobLog
)
runJobLog
num
logStatus
=
do
...
...
src/Gargantext/API/Node.hs
View file @
50497ca2
...
...
@@ -46,6 +46,7 @@ import Gargantext.API.Metrics
import
Gargantext.API.Ngrams
(
TableNgramsApi
,
apiNgramsTableCorpus
)
import
Gargantext.API.Ngrams.Types
(
TabType
(
..
))
import
Gargantext.API.Node.File
import
Gargantext.API.Node.FrameCalcUpload
(
FrameCalcUploadAPI
,
frameCalcUploadAPI
)
import
Gargantext.API.Node.New
import
Gargantext.API.Prelude
import
Gargantext.API.Table
...
...
@@ -123,6 +124,7 @@ type NodeAPI a = Get '[JSON] (Node a)
:<|>
"rename"
:>
RenameApi
:<|>
PostNodeApi
-- TODO move to children POST
:<|>
PostNodeAsync
:<|>
FrameCalcUploadAPI
:<|>
ReqBody
'[
J
SON
]
a
:>
Put
'[
J
SON
]
Int
:<|>
"update"
:>
Update
.
API
:<|>
Delete
'[
J
SON
]
Int
...
...
@@ -205,6 +207,7 @@ nodeAPI p uId id' = withAccess (Proxy :: Proxy (NodeAPI a)) Proxy uId (PathNode
:<|>
rename
id'
:<|>
postNode
uId
id'
:<|>
postNodeAsyncAPI
uId
id'
:<|>
frameCalcUploadAPI
uId
id'
:<|>
putNode
id'
:<|>
Update
.
api
uId
id'
:<|>
Action
.
deleteNode
(
RootId
$
NodeId
uId
)
id'
...
...
src/Gargantext/API/Node/Corpus/Export.hs
View file @
50497ca2
...
...
@@ -19,6 +19,11 @@ module Gargantext.API.Node.Corpus.Export
import
Data.Map
(
Map
)
import
Data.Maybe
(
fromMaybe
)
import
Data.Set
(
Set
)
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.HashMap.Strict
as
HashMap
import
Gargantext.API.Node.Corpus.Export.Types
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Ngrams.Tools
(
filterListWithRoot
,
mapTermListRoot
,
getRepo
)
...
...
@@ -36,10 +41,6 @@ import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Schema.Node
(
_node_id
,
_node_hyperdata
)
import
Gargantext.Prelude
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.HashMap.Strict
as
HashMap
--------------------------------------------------
-- | Hashes are ordered by Set
...
...
src/Gargantext/API/Node/Corpus/New.hs
View file @
50497ca2
...
...
@@ -27,6 +27,8 @@ import Data.Swagger
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
T
import
GHC.Generics
(
Generic
)
import
qualified
Prelude
as
Prelude
import
Protolude
(
readFile
)
import
Servant
import
Servant.Job.Utils
(
jsonOptions
)
-- import Servant.Multipart
...
...
@@ -36,25 +38,27 @@ import Test.QuickCheck.Arbitrary
import
Gargantext.Prelude
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
qualified
Gargantext.API.Admin.Orchestrator.Types
as
T
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Job
(
jobLogSuccess
,
jobLogFailTotal
)
import
Gargantext.API.Node.Corpus.New.File
import
Gargantext.API.Node.Corpus.Searx
import
Gargantext.API.Node.Corpus.Types
import
Gargantext.API.Node.Types
import
Gargantext.Core
(
Lang
(
..
)
{-, allLangs-}
)
import
Gargantext.Database.Action.Mail
(
sendMail
)
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
qualified
Gargantext.Core.Text.Corpus.Parsers
as
Parser
(
FileFormat
(
..
),
parseFormat
)
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Database.Action.Flow
(
FlowCmdM
,
flowCorpus
,
getDataText
,
flowDataText
,
TermType
(
..
)
,
DataOrigin
(
..
)
{-, allDataOrigins-}
)
import
Gargantext.Database.Action.
User
(
getUserId
)
import
Gargantext.Database.Action.Flow
(
FlowCmdM
,
flowCorpus
,
getDataText
,
flowDataText
,
TermType
(
..
)
{-, allDataOrigins-}
)
import
Gargantext.Database.Action.
Mail
(
sendMail
)
import
Gargantext.Database.Action.Node
(
mkNodeWithParent
)
import
Gargantext.Database.Action.User
(
getUserId
)
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
NodeType
(
..
),
UserId
)
import
Gargantext.Database.Query.Table.Node
(
getNodeWith
)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
qualified
Gargantext.Database.GargDB
as
GargDB
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
qualified
Gargantext.Core.Text.Corpus.Parsers
as
Parser
(
FileFormat
(
..
),
parseFormat
)
------------------------------------------------------------------------
{-
...
...
@@ -125,28 +129,11 @@ info :: FlowCmdM env err m => UserId -> m ApiInfo
info
_u
=
pure
$
ApiInfo
API
.
externalAPIs
------------------------------------------------------------------------
data
Database
=
Empty
|
PubMed
|
HAL
|
IsTex
|
Isidore
deriving
(
Eq
,
Show
,
Generic
)
deriveJSON
(
unPrefix
""
)
''
D
atabase
instance
ToSchema
Database
database2origin
::
Database
->
DataOrigin
database2origin
Empty
=
InternalOrigin
T
.
IsTex
database2origin
PubMed
=
ExternalOrigin
T
.
PubMed
database2origin
HAL
=
ExternalOrigin
T
.
HAL
database2origin
IsTex
=
ExternalOrigin
T
.
IsTex
database2origin
Isidore
=
ExternalOrigin
T
.
Isidore
------------------------------------------------------------------------
data
WithQuery
=
WithQuery
{
_wq_query
::
!
Text
,
_wq_databases
::
!
Database
,
_wq_datafield
::
!
Datafield
,
_wq_lang
::
!
Lang
,
_wq_node_id
::
!
Int
}
...
...
@@ -190,36 +177,51 @@ addToCorpusWithQuery :: FlowCmdM env err m
->
Maybe
Integer
->
(
JobLog
->
m
()
)
->
m
JobLog
addToCorpusWithQuery
user
cid
(
WithQuery
q
dbs
l
_nid
)
maybeLimit
logStatus
=
do
addToCorpusWithQuery
user
cid
(
WithQuery
q
dbs
datafield
l
_nid
)
maybeLimit
logStatus
=
do
-- TODO ...
logStatus
JobLog
{
_scst_succeeded
=
Just
0
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
5
,
_scst_events
=
Just
[]
}
printDebug
"addToCorpusWithQuery"
(
cid
,
dbs
)
-- TODO add cid
-- TODO if cid is folder -> create Corpus
-- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private
txts
<-
mapM
(
\
db
->
getDataText
db
(
Multi
l
)
q
maybeLimit
)
[
database2origin
dbs
]
logStatus
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
1
,
_scst_events
=
Just
[]
}
cids
<-
mapM
(
\
txt
->
flowDataText
user
txt
(
Multi
l
)
cid
)
txts
printDebug
"corpus id"
cids
printDebug
"sending email"
(
"xxxxxxxxxxxxxxxxxxxxx"
::
Text
)
sendMail
user
-- TODO ...
pure
JobLog
{
_scst_succeeded
=
Just
3
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_remaining
=
Just
3
,
_scst_events
=
Just
[]
}
printDebug
"[addToCorpusWithQuery] (cid, dbs)"
(
cid
,
dbs
)
printDebug
"[addToCorpusWithQuery] datafield"
datafield
case
datafield
of
Web
->
do
printDebug
"[addToCorpusWithQuery] processing web request"
datafield
_
<-
triggerSearxSearch
cid
q
l
pure
JobLog
{
_scst_succeeded
=
Just
3
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
}
_
->
do
-- TODO add cid
-- TODO if cid is folder -> create Corpus
-- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private
txts
<-
mapM
(
\
db
->
getDataText
db
(
Multi
l
)
q
maybeLimit
)
[
database2origin
dbs
]
logStatus
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
1
,
_scst_events
=
Just
[]
}
cids
<-
mapM
(
\
txt
->
flowDataText
user
txt
(
Multi
l
)
cid
)
txts
printDebug
"corpus id"
cids
printDebug
"sending email"
(
"xxxxxxxxxxxxxxxxxxxxx"
::
Text
)
sendMail
user
-- TODO ...
pure
JobLog
{
_scst_succeeded
=
Just
3
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
}
type
AddWithForm
=
Summary
"Add with FormUrlEncoded to corpus endpoint"
...
...
@@ -235,16 +237,12 @@ addToCorpusWithForm :: FlowCmdM env err m
->
CorpusId
->
NewWithForm
->
(
JobLog
->
m
()
)
->
JobLog
->
m
JobLog
addToCorpusWithForm
user
cid
(
NewWithForm
ft
d
l
_n
)
logStatus
=
do
addToCorpusWithForm
user
cid
(
NewWithForm
ft
d
l
_n
)
logStatus
jobLog
=
do
printDebug
"[addToCorpusWithForm] Parsing corpus: "
cid
printDebug
"[addToCorpusWithForm] fileType"
ft
logStatus
JobLog
{
_scst_succeeded
=
Just
0
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
2
,
_scst_events
=
Just
[]
}
logStatus
jobLog
let
parse
=
case
ft
of
CSV_HAL
->
Parser
.
parseFormat
Parser
.
CsvHal
...
...
@@ -253,34 +251,41 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus = do
PresseRIS
->
Parser
.
parseFormat
Parser
.
RisPresse
-- TODO granularity of the logStatus
docs
<-
liftBase
$
splitEvery
500
<$>
take
1000000
<$>
parse
(
cs
d
)
printDebug
"Parsing corpus finished : "
cid
logStatus
JobLog
{
_scst_succeeded
=
Just
1
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
1
,
_scst_events
=
Just
[]
}
printDebug
"Starting extraction : "
cid
-- TODO granularity of the logStatus
_cid'
<-
flowCorpus
user
(
Right
[
cid
])
(
Multi
$
fromMaybe
EN
l
)
(
map
(
map
toHyperdataDocument
)
docs
)
printDebug
"Extraction finished : "
cid
printDebug
"sending email"
(
"xxxxxxxxxxxxxxxxxxxxx"
::
Text
)
sendMail
user
pure
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
}
eDocs
<-
liftBase
$
parse
$
cs
d
case
eDocs
of
Right
docs'
->
do
let
docs
=
splitEvery
500
$
take
1000000
docs'
printDebug
"Parsing corpus finished : "
cid
logStatus
jobLog2
printDebug
"Starting extraction : "
cid
-- TODO granularity of the logStatus
_cid'
<-
flowCorpus
user
(
Right
[
cid
])
(
Multi
$
fromMaybe
EN
l
)
(
map
(
map
toHyperdataDocument
)
docs
)
printDebug
"Extraction finished : "
cid
printDebug
"sending email"
(
"xxxxxxxxxxxxxxxxxxxxx"
::
Text
)
sendMail
user
logStatus
jobLog3
pure
$
jobLog3
Left
e
->
do
printDebug
"Error"
e
logStatus
jobLogE
pure
jobLogE
where
jobLog2
=
jobLogSuccess
jobLog
jobLog3
=
jobLogSuccess
jobLog2
jobLogE
=
jobLogFailTotal
jobLog
parseCsvGargV3Path
::
[
Char
]
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseCsvGargV3Path
fp
=
do
contents
<-
readFile
fp
Parser
.
parseFormat
Parser
.
CsvGargV3
$
cs
contents
{-
addToCorpusWithFile :: FlowCmdM env err m
...
...
src/Gargantext/API/Node/Corpus/Searx.hs
0 → 100644
View file @
50497ca2
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.API.Node.Corpus.Searx
where
import
Control.Lens
(
view
)
import
qualified
Data.Aeson
as
Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
qualified
Data.Text
as
T
import
GHC.Generics
(
Generic
)
import
Network.HTTP.Client
import
Network.HTTP.Client.TLS
import
qualified
Prelude
as
Prelude
import
Protolude
(
encodeUtf8
,
Text
,
Either
)
import
Gargantext.Prelude
import
Gargantext.Prelude.Config
import
Gargantext.Core
(
Lang
(
..
))
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Database.Action.Flow
(
FlowCmdM
)
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
)
import
Gargantext.Database.Prelude
(
hasConfig
)
data
SearxResult
=
SearxResult
{
_sr_url
::
Text
,
_sr_title
::
Text
,
_sr_content
::
Maybe
Text
,
_sr_engine
::
Text
,
_sr_score
::
Double
,
_sr_category
::
Text
,
_sr_pretty_url
::
Text
}
deriving
(
Show
,
Eq
,
Generic
)
-- , _sr_parsed_url
-- , _sr_engines
-- , _sr_positions
$
(
deriveJSON
(
unPrefix
"_sr_"
)
''
S
earxResult
)
data
SearxResponse
=
SearxResponse
{
_srs_query
::
Text
,
_srs_number_of_results
::
Int
,
_srs_results
::
[
SearxResult
]
}
deriving
(
Show
,
Eq
,
Generic
)
-- , _srs_answers
-- , _srs_corrections
-- , _srs_infoboxes
-- , _srs_suggestions :: [Text]
-- , _srs_unresponsive_engines :: [Text] }
$
(
deriveJSON
(
unPrefix
"_srs_"
)
''
S
earxResponse
)
data
FetchSearxParams
=
FetchSearxParams
{
_fsp_manager
::
Manager
,
_fsp_pageno
::
Int
,
_fsp_query
::
Text
,
_fsp_url
::
Text
}
fetchSearxPage
::
FetchSearxParams
->
IO
(
Either
Prelude
.
String
SearxResponse
)
fetchSearxPage
(
FetchSearxParams
{
_fsp_manager
,
_fsp_pageno
,
_fsp_query
,
_fsp_url
})
=
do
-- searx search API:
-- https://searx.github.io/searx/dev/search_api.html?highlight=json
req
<-
parseRequest
$
T
.
unpack
_fsp_url
let
request
=
urlEncodedBody
[
--("category_general", "1")
(
"q"
,
encodeUtf8
_fsp_query
)
,
(
"pageno"
,
encodeUtf8
$
T
.
pack
$
show
_fsp_pageno
)
--, ("time_range", "None")
--, ("language", "en-US") -- TODO
,
(
"format"
,
"json"
)
]
req
res
<-
httpLbs
request
_fsp_manager
let
dec
=
Aeson
.
eitherDecode
$
responseBody
res
::
(
Either
Prelude
.
String
SearxResponse
)
pure
dec
triggerSearxSearch
::
(
MonadBase
IO
m
,
FlowCmdM
env
err
m
)
=>
CorpusId
->
API
.
Query
->
Lang
->
m
()
triggerSearxSearch
cid
q
l
=
do
printDebug
"[triggerSearxSearch] cid"
cid
printDebug
"[triggerSearxSearch] q"
q
printDebug
"[triggerSearxSearch] l"
l
cfg
<-
view
hasConfig
let
surl
=
_gc_frame_searx_url
cfg
printDebug
"[triggerSearxSearch] surl"
surl
manager
<-
liftBase
$
newManager
tlsManagerSettings
res
<-
liftBase
$
fetchSearxPage
$
FetchSearxParams
{
_fsp_manager
=
manager
,
_fsp_pageno
=
1
,
_fsp_query
=
q
,
_fsp_url
=
surl
}
printDebug
"[triggerSearxSearch] res"
res
pure
()
src/Gargantext/API/Node/Corpus/Types.hs
0 → 100644
View file @
50497ca2
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.API.Node.Corpus.Types
where
import
Control.Lens
hiding
(
elements
,
Empty
)
import
Control.Monad.Fail
(
fail
)
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Monoid
(
mempty
)
import
Data.Swagger
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
T
import
GHC.Generics
(
Generic
)
import
Text.Regex.TDFA
((
=~
))
import
Protolude
((
++
))
import
Gargantext.Prelude
import
qualified
Gargantext.API.Admin.Orchestrator.Types
as
T
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Database.Action.Flow
(
DataOrigin
(
..
))
data
Database
=
Empty
|
PubMed
|
HAL
|
IsTex
|
Isidore
deriving
(
Eq
,
Show
,
Generic
)
deriveJSON
(
unPrefix
""
)
''
D
atabase
instance
ToSchema
Database
database2origin
::
Database
->
DataOrigin
database2origin
Empty
=
InternalOrigin
T
.
IsTex
database2origin
PubMed
=
ExternalOrigin
T
.
PubMed
database2origin
HAL
=
ExternalOrigin
T
.
HAL
database2origin
IsTex
=
ExternalOrigin
T
.
IsTex
database2origin
Isidore
=
ExternalOrigin
T
.
Isidore
------------------------------------------------------------------------
data
Datafield
=
Gargantext
|
External
(
Maybe
Database
)
|
Web
|
Files
deriving
(
Eq
,
Show
,
Generic
)
instance
FromJSON
Datafield
where
parseJSON
=
withText
"Datafield"
$
\
text
->
case
text
of
"Gargantext"
->
pure
Gargantext
"Web"
->
pure
Web
"Files"
->
pure
Files
v
->
let
(
preExternal
,
_
,
postExternal
)
=
v
=~
(
"External "
::
Text
)
::
(
Text
,
Text
,
Text
)
in
if
preExternal
==
""
then
do
db
<-
parseJSON
$
String
postExternal
pure
$
External
db
else
fail
$
"Cannot match patterh 'External <db>' for string "
++
(
T
.
unpack
v
)
instance
ToJSON
Datafield
where
toJSON
(
External
db
)
=
toJSON
$
"External "
++
(
show
db
)
toJSON
s
=
toJSON
$
show
s
instance
ToSchema
Datafield
where
declareNamedSchema
_
=
do
return
$
NamedSchema
(
Just
"Datafield"
)
$
mempty
&
type_
?~
SwaggerObject
src/Gargantext/API/Node/File.hs
View file @
50497ca2
...
...
@@ -9,6 +9,14 @@ import Control.Lens ((^.))
import
Data.Swagger
import
Data.Text
import
GHC.Generics
(
Generic
)
import
Servant
import
Servant.Job.Async
(
JobFunction
(
..
),
serveJobsAPI
)
import
qualified
Data.ByteString
as
BS
import
qualified
Data.ByteString.Lazy
as
BSL
import
qualified
Data.MIME.Types
as
DMT
import
qualified
Gargantext.Database.GargDB
as
GargDB
import
qualified
Network.HTTP.Media
as
M
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Node.Types
...
...
@@ -22,13 +30,6 @@ import Gargantext.Database.Query.Table.Node (getNodeWith)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Prelude
import
Servant
import
Servant.Job.Async
(
JobFunction
(
..
),
serveJobsAPI
)
import
qualified
Data.ByteString
as
BS
import
qualified
Data.ByteString.Lazy
as
BSL
import
qualified
Data.MIME.Types
as
DMT
import
qualified
Gargantext.Database.GargDB
as
GargDB
import
qualified
Network.HTTP.Media
as
M
data
RESPONSE
deriving
Typeable
...
...
src/Gargantext/API/Node/FrameCalcUpload.hs
0 → 100644
View file @
50497ca2
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TypeOperators #-}
module
Gargantext.API.Node.FrameCalcUpload
where
import
Control.Lens
((
^.
))
import
Data.Aeson
import
qualified
Data.ByteString.Lazy
as
BSL
import
qualified
Data.ByteString.UTF8
as
BSU8
import
Data.Swagger
import
qualified
Data.Text
as
T
import
GHC.Generics
(
Generic
)
import
Network.HTTP.Client
(
newManager
,
httpLbs
,
parseRequest
,
responseBody
)
import
Network.HTTP.Client.TLS
(
tlsManagerSettings
)
import
Servant
import
Servant.Job.Async
import
Web.FormUrlEncoded
(
FromForm
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Job
(
jobLogInit
,
jobLogSuccess
,
jobLogFail
)
import
Gargantext.API.Node.Corpus.New
(
addToCorpusWithForm
)
import
Gargantext.API.Node.Corpus.New.File
(
FileType
(
..
))
import
Gargantext.API.Node.Types
(
NewWithForm
(
..
))
import
Gargantext.API.Prelude
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Database.Action.Flow.Types
import
Gargantext.Database.Admin.Types.Hyperdata.Frame
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Table.Node
(
getClosestParentIdByType
,
getNodeWith
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Prelude
data
FrameCalcUpload
=
FrameCalcUpload
()
deriving
(
Generic
)
instance
FromForm
FrameCalcUpload
instance
FromJSON
FrameCalcUpload
instance
ToJSON
FrameCalcUpload
instance
ToSchema
FrameCalcUpload
type
FrameCalcUploadAPI
=
Summary
" FrameCalc upload"
:>
"add"
:>
"framecalc"
:>
"async"
:>
AsyncJobs
JobLog
'[
J
SON
]
FrameCalcUpload
JobLog
frameCalcUploadAPI
::
UserId
->
NodeId
->
GargServer
FrameCalcUploadAPI
frameCalcUploadAPI
uId
nId
=
serveJobsAPI
$
JobFunction
(
\
p
logs
->
frameCalcUploadAsync
uId
nId
p
(
liftBase
.
logs
)
(
jobLogInit
5
)
)
frameCalcUploadAsync
::
FlowCmdM
env
err
m
=>
UserId
->
NodeId
->
FrameCalcUpload
->
(
JobLog
->
m
()
)
->
JobLog
->
m
JobLog
frameCalcUploadAsync
uId
nId
_f
logStatus
jobLog
=
do
logStatus
jobLog
-- printDebug "[frameCalcUploadAsync] uId" uId
-- printDebug "[frameCalcUploadAsync] nId" nId
node
<-
getNodeWith
nId
(
Proxy
::
Proxy
HyperdataFrame
)
let
(
HyperdataFrame
{
_hf_base
=
base
,
_hf_frame_id
=
frame_id
})
=
node
^.
node_hyperdata
let
csvUrl
=
base
<>
"/"
<>
frame_id
<>
".csv"
-- printDebug "[frameCalcUploadAsync] csvUrl" csvUrl
res
<-
liftBase
$
do
manager
<-
newManager
tlsManagerSettings
req
<-
parseRequest
$
T
.
unpack
csvUrl
httpLbs
req
manager
let
body
=
T
.
pack
$
BSU8
.
toString
$
BSL
.
toStrict
$
responseBody
res
mCId
<-
getClosestParentIdByType
nId
NodeCorpus
-- printDebug "[frameCalcUploadAsync] mCId" mCId
jobLog2
<-
case
mCId
of
Nothing
->
pure
$
jobLogFail
jobLog
Just
cId
->
addToCorpusWithForm
(
RootId
(
NodeId
uId
))
cId
(
NewWithForm
CSV
body
Nothing
"calc-upload.csv"
)
logStatus
jobLog
pure
$
jobLogSuccess
jobLog2
src/Gargantext/API/Node/New.hs
View file @
50497ca2
...
...
@@ -24,6 +24,12 @@ import Data.Aeson
import
Data.Swagger
import
Data.Text
(
Text
)
import
GHC.Generics
(
Generic
)
import
Servant
import
Servant.Job.Async
import
Test.QuickCheck
(
elements
)
import
Test.QuickCheck.Arbitrary
import
Web.FormUrlEncoded
(
FromForm
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Prelude
import
Gargantext.Database.Action.Flow.Types
...
...
@@ -34,11 +40,6 @@ import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import
Gargantext.Database.Query.Table.Node.User
import
Gargantext.Database.Schema.Node
import
Gargantext.Prelude
import
Servant
import
Servant.Job.Async
import
Test.QuickCheck
(
elements
)
import
Test.QuickCheck.Arbitrary
import
Web.FormUrlEncoded
(
FromForm
)
------------------------------------------------------------------------
data
PostNode
=
PostNode
{
pn_name
::
Text
...
...
src/Gargantext/API/Routes.hs
View file @
50497ca2
...
...
@@ -27,10 +27,24 @@ import Control.Concurrent (threadDelay)
import
Control.Lens
(
view
)
import
Data.Text
(
Text
)
import
Data.Validity
import
Servant
import
Servant.Auth
as
SA
import
Servant.Auth.Swagger
()
import
Servant.Job.Async
import
Servant.Swagger.UI
import
qualified
Gargantext.API.Ngrams.List
as
List
import
qualified
Gargantext.API.Node.Contact
as
Contact
import
qualified
Gargantext.API.Node.Corpus.Annuaire
as
Annuaire
import
qualified
Gargantext.API.Node.Corpus.Export
as
Export
import
qualified
Gargantext.API.Node.Corpus.Export.Types
as
Export
import
qualified
Gargantext.API.Node.Corpus.New
as
New
import
qualified
Gargantext.API.Public
as
Public
import
Gargantext.API.Admin.Auth.Types
(
AuthRequest
,
AuthResponse
,
AuthenticatedUser
(
..
),
PathId
(
..
))
import
Gargantext.API.Admin.Auth
(
withAccess
)
import
Gargantext.API.Admin.FrontEnd
(
FrontEndAPI
)
import
Gargantext.API.Count
(
CountAPI
,
count
,
Query
)
import
Gargantext.API.Job
(
jobLogInit
)
import
Gargantext.API.Ngrams
(
TableNgramsApi
,
apiNgramsTableDoc
)
import
Gargantext.API.Node
import
Gargantext.API.Prelude
...
...
@@ -41,18 +55,7 @@ import Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Prelude
import
Gargantext.Prelude.Config
(
gc_max_docs_scrapers
)
import
Servant
import
Servant.Auth
as
SA
import
Servant.Auth.Swagger
()
import
Servant.Job.Async
import
Servant.Swagger.UI
import
qualified
Gargantext.API.Ngrams.List
as
List
import
qualified
Gargantext.API.Node.Contact
as
Contact
import
qualified
Gargantext.API.Node.Corpus.Annuaire
as
Annuaire
import
qualified
Gargantext.API.Node.Corpus.Export
as
Export
import
qualified
Gargantext.API.Node.Corpus.Export.Types
as
Export
import
qualified
Gargantext.API.Node.Corpus.New
as
New
import
qualified
Gargantext.API.Public
as
Public
type
GargAPI
=
"api"
:>
Summary
"API "
:>
GargAPIVersion
-- | TODO :<|> Summary "Latest API" :> GargAPI'
...
...
@@ -282,9 +285,9 @@ addCorpusWithForm user cid =
JobFunction
(
\
i
log'
->
let
log''
x
=
do
printDebug
"
addToCorpusWithForm
"
x
printDebug
"
[addToCorpusWithForm]
"
x
liftBase
$
log'
x
in
New
.
addToCorpusWithForm
user
cid
i
log''
)
in
New
.
addToCorpusWithForm
user
cid
i
log''
(
jobLogInit
3
)
)
addCorpusWithFile
::
User
->
GargServer
New
.
AddWithFile
addCorpusWithFile
user
cid
=
...
...
@@ -292,7 +295,7 @@ addCorpusWithFile user cid =
JobFunction
(
\
i
log'
->
let
log''
x
=
do
printDebug
"
addToCorpusWithFile
"
x
printDebug
"
[addToCorpusWithFile]
"
x
liftBase
$
log'
x
in
New
.
addToCorpusWithFile
user
cid
i
log''
)
...
...
src/Gargantext/Core/Ext/IMT.hs
View file @
50497ca2
...
...
@@ -11,14 +11,17 @@ Portability : POSIX
module
Gargantext.Core.Ext.IMT
where
import
Gargantext.Prelude
import
Data.Text
(
Text
,
splitOn
)
import
Data.Either
(
Either
(
..
))
import
Data.Map
(
Map
)
import
Data.Text
(
Text
,
splitOn
)
import
qualified
Data.Set
as
S
import
qualified
Data.List
as
DL
import
qualified
Data.Vector
as
DV
import
qualified
Data.Map
as
M
import
qualified
Prelude
as
Prelude
import
Gargantext.Prelude
import
Gargantext.Core.Text.Metrics.Utils
as
Utils
import
Gargantext.Core.Text.Corpus.Parsers.CSV
as
CSV
...
...
@@ -98,8 +101,10 @@ schools = [ School
mapIdSchool
::
Map
Text
Text
mapIdSchool
=
M
.
fromList
$
Gargantext
.
Prelude
.
map
(
\
(
School
n
_
i
)
->
(
i
,
n
))
schools
hal_data
::
IO
(
DV
.
Vector
CsvHal
)
hal_data
=
snd
<$>
CSV
.
readCsvHal
"doc/corpus_imt/Gargantext_Corpus.csv"
hal_data
::
IO
(
Either
Prelude
.
String
(
DV
.
Vector
CsvHal
))
hal_data
=
do
r
<-
CSV
.
readCsvHal
"doc/corpus_imt/Gargantext_Corpus.csv"
pure
$
snd
<$>
r
names
::
S
.
Set
Text
names
=
S
.
fromList
$
Gargantext
.
Prelude
.
map
(
\
s
->
school_id
s
)
schools
...
...
src/Gargantext/Core/Text/Convert.hs
View file @
50497ca2
...
...
@@ -16,15 +16,21 @@ Format Converter.
module
Gargantext.Core.Text.Convert
(
risPress2csvWrite
)
where
import
Data.Either
(
Either
(
..
))
import
qualified
Data.Text
as
T
import
System.FilePath
(
FilePath
())
-- , takeExtension)
import
Gargantext.Prelude
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
writeDocs2Csv
)
import
Gargantext.Core.Text.Corpus.Parsers
(
parseFile
,
FileFormat
(
..
))
risPress2csvWrite
::
FilePath
->
IO
()
risPress2csvWrite
f
=
parseFile
RisPresse
(
f
<>
".ris"
)
>>=
\
hs
->
writeDocs2Csv
(
f
<>
".csv"
)
hs
risPress2csvWrite
f
=
do
eContents
<-
parseFile
RisPresse
(
f
<>
".ris"
)
case
eContents
of
Right
contents
->
writeDocs2Csv
(
f
<>
".csv"
)
contents
Left
e
->
panic
$
"Error: "
<>
(
T
.
pack
e
)
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
50497ca2
...
...
@@ -36,18 +36,20 @@ import Data.String()
import
Data.Text
(
Text
)
import
Data.Text.Encoding
(
decodeUtf8
)
import
Data.Tuple.Extra
(
both
,
first
,
second
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
parseHal
,
parseHal'
,
parseCsv
,
parseCsv'
)
import
Gargantext.Core.Text.Corpus.Parsers.RIS.Presse
(
presseEnrich
)
-- import Gargantext.Core.Text.Learn (detectLangDefault)
import
System.FilePath
(
FilePath
(),
takeExtension
)
import
qualified
Data.ByteString
as
DB
import
qualified
Data.ByteString.Char8
as
DBC
import
qualified
Data.ByteString.Lazy
as
DBL
import
qualified
Data.Map
as
DM
import
qualified
Data.Text
as
DT
import
qualified
Prelude
as
Prelude
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
parseHal
,
parseHal'
,
parseCsv
,
parseCsv'
)
import
Gargantext.Core.Text.Corpus.Parsers.RIS.Presse
(
presseEnrich
)
-- import Gargantext.Core.Text.Learn (detectLangDefault)
import
qualified
Gargantext.Core.Text.Corpus.Parsers.Date
as
Date
import
qualified
Gargantext.Core.Text.Corpus.Parsers.RIS
as
RIS
import
qualified
Gargantext.Core.Text.Corpus.Parsers.WOS
as
WOS
...
...
@@ -75,30 +77,40 @@ data FileFormat = WOS | RIS | RisPresse
-- | XML -- Not Implemented / see :
parseFormat
::
FileFormat
->
DB
.
ByteString
->
IO
[
HyperdataDocument
]
parseFormat
::
FileFormat
->
DB
.
ByteString
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseFormat
CsvGargV3
bs
=
pure
$
parseCsv'
$
DBL
.
fromStrict
bs
parseFormat
CsvHal
bs
=
pure
$
parseHal'
$
DBL
.
fromStrict
bs
parseFormat
RisPresse
bs
=
mapM
(
toDoc
RIS
)
<$>
snd
<$>
enrichWith
RisPresse
$
partitionEithers
$
[
runParser'
RisPresse
bs
]
parseFormat
WOS
bs
=
mapM
(
toDoc
WOS
)
<$>
snd
<$>
enrichWith
WOS
$
partitionEithers
$
[
runParser'
WOS
bs
]
parseFormat
RisPresse
bs
=
do
docs
<-
mapM
(
toDoc
RIS
)
<$>
snd
<$>
enrichWith
RisPresse
$
partitionEithers
$
[
runParser'
RisPresse
bs
]
pure
$
Right
docs
parseFormat
WOS
bs
=
do
docs
<-
mapM
(
toDoc
WOS
)
<$>
snd
<$>
enrichWith
WOS
$
partitionEithers
$
[
runParser'
WOS
bs
]
pure
$
Right
docs
parseFormat
_
_
=
undefined
-- | Parse file into documents
-- TODO manage errors here
-- TODO: to debug maybe add the filepath in error message
parseFile
::
FileFormat
->
FilePath
->
IO
[
HyperdataDocument
]
parseFile
::
FileFormat
->
FilePath
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseFile
CsvHal
p
=
parseHal
p
parseFile
CsvGargV3
p
=
parseCsv
p
parseFile
RisPresse
p
=
join
$
mapM
(
toDoc
RIS
)
<$>
snd
<$>
enrichWith
RisPresse
<$>
readFileWith
RIS
p
parseFile
WOS
p
=
join
$
mapM
(
toDoc
WOS
)
<$>
snd
<$>
enrichWith
WOS
<$>
readFileWith
WOS
p
parseFile
ff
p
=
join
$
mapM
(
toDoc
ff
)
<$>
snd
<$>
enrichWith
ff
<$>
readFileWith
ff
p
parseFile
RisPresse
p
=
do
docs
<-
join
$
mapM
(
toDoc
RIS
)
<$>
snd
<$>
enrichWith
RisPresse
<$>
readFileWith
RIS
p
pure
$
Right
docs
parseFile
WOS
p
=
do
docs
<-
join
$
mapM
(
toDoc
WOS
)
<$>
snd
<$>
enrichWith
WOS
<$>
readFileWith
WOS
p
pure
$
Right
docs
parseFile
ff
p
=
do
docs
<-
join
$
mapM
(
toDoc
ff
)
<$>
snd
<$>
enrichWith
ff
<$>
readFileWith
ff
p
pure
$
Right
docs
toDoc
::
FileFormat
->
[(
Text
,
Text
)]
->
IO
HyperdataDocument
-- TODO use language for RIS
...
...
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
View file @
50497ca2
...
...
@@ -19,7 +19,7 @@ import qualified Data.ByteString as BS
import
qualified
Data.ByteString.Lazy
as
BL
import
Data.Char
(
ord
)
import
Data.Csv
import
Data.Either
(
Either
(
Left
,
Right
))
import
Data.Either
(
Either
(
..
))
import
Data.Text
(
Text
,
pack
,
length
,
intercalate
)
import
Data.Time.Segment
(
jour
)
import
qualified
Data.Vector
as
V
...
...
@@ -27,6 +27,8 @@ import Data.Vector (Vector)
import
GHC.IO
(
FilePath
)
import
GHC.Word
(
Word8
)
import
qualified
Prelude
as
Prelude
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
hiding
(
length
)
import
Gargantext.Core.Text
...
...
@@ -83,7 +85,7 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
-- | Types Conversions
toDocs
::
Vector
CsvDoc
->
[
CsvGargV3
]
toDocs
v
=
V
.
toList
$
V
.
zipWith
(
\
nId
(
CsvDoc
t
s
py
pm
pd
abst
auth
)
$
V
.
zipWith
(
\
nId
(
CsvDoc
t
s
(
IntOrDec
py
)
pm
pd
abst
auth
)
->
CsvGargV3
nId
t
s
py
pm
pd
abst
auth
)
(
V
.
enumFromN
1
(
V
.
length
v''
))
v''
where
...
...
@@ -94,7 +96,7 @@ toDocs v = V.toList
fromDocs
::
Vector
CsvGargV3
->
Vector
CsvDoc
fromDocs
docs
=
V
.
map
fromDocs'
docs
where
fromDocs'
(
CsvGargV3
_
t
s
py
pm
pd
abst
auth
)
=
(
CsvDoc
t
s
py
pm
pd
abst
auth
)
fromDocs'
(
CsvGargV3
_
t
s
py
pm
pd
abst
auth
)
=
(
CsvDoc
t
s
(
IntOrDec
py
)
pm
pd
abst
auth
)
---------------------------------------------------------------
-- | Split a document in its context
...
...
@@ -137,10 +139,21 @@ docsSize csvDoc = mean ls
---------------------------------------------------------------
newtype
IntOrDec
=
IntOrDec
Int
deriving
(
Show
,
Eq
,
Read
)
unIntOrDec
::
IntOrDec
->
Int
unIntOrDec
(
IntOrDec
i
)
=
i
instance
FromField
IntOrDec
where
parseField
s
=
case
runParser
(
parseField
s
::
Parser
Int
)
of
Left
_err
->
IntOrDec
<$>
Prelude
.
floor
<$>
(
parseField
s
::
Parser
Double
)
Right
n
->
pure
$
IntOrDec
n
instance
ToField
IntOrDec
where
toField
(
IntOrDec
i
)
=
toField
i
data
CsvDoc
=
CsvDoc
{
csv_title
::
!
Text
,
csv_source
::
!
Text
,
csv_publication_year
::
!
Int
,
csv_publication_year
::
!
Int
OrDec
,
csv_publication_month
::
!
Int
,
csv_publication_day
::
!
Int
,
csv_abstract
::
!
Text
...
...
@@ -149,13 +162,13 @@ data CsvDoc = CsvDoc
deriving
(
Show
)
instance
FromNamedRecord
CsvDoc
where
parseNamedRecord
r
=
CsvDoc
<$>
r
.:
"title"
<*>
r
.:
"source"
<*>
r
.:
"publication_year"
<*>
r
.:
"publication_month"
<*>
r
.:
"publication_day"
<*>
r
.:
"abstract"
<*>
r
.:
"authors"
parseNamedRecord
r
=
CsvDoc
<$>
(
r
.:
"title"
<|>
r
.:
"Title"
)
<*>
(
r
.:
"source"
<|>
r
.:
"Source"
)
<*>
(
r
.:
"publication_year"
<|>
r
.:
"Publication Year"
)
<*>
(
r
.:
"publication_month"
<|>
r
.:
"Publication Month"
)
<*>
(
r
.:
"publication_day"
<|>
r
.:
"Publication Day"
)
<*>
(
r
.:
"abstract"
<|>
r
.:
"Abstract"
)
<*>
(
r
.:
"authors"
<|>
r
.:
"Authors"
)
instance
ToNamedRecord
CsvDoc
where
toNamedRecord
(
CsvDoc
t
s
py
pm
pd
abst
aut
)
=
...
...
@@ -171,7 +184,7 @@ instance ToNamedRecord CsvDoc where
hyperdataDocument2csvDoc
::
HyperdataDocument
->
CsvDoc
hyperdataDocument2csvDoc
h
=
CsvDoc
(
m
$
_hd_title
h
)
(
m
$
_hd_source
h
)
(
mI
$
_hd_publication_year
h
)
(
IntOrDec
$
mI
$
_hd_publication_year
h
)
(
mI
$
_hd_publication_month
h
)
(
mI
$
_hd_publication_day
h
)
(
m
$
_hd_abstract
h
)
...
...
@@ -192,52 +205,47 @@ delimiter :: Word8
delimiter
=
fromIntegral
$
ord
'
\t
'
------------------------------------------------------------------------
------------------------------------------------------------------------
readCsvOn'
::
[
CsvDoc
->
Text
]
->
FilePath
->
IO
[
Text
]
readCsvOn'
fields
fp
=
V
.
toList
<$>
V
.
map
(
\
l
->
intercalate
(
pack
" "
)
$
map
(
\
field
->
field
l
)
fields
)
<$>
snd
<$>
readFile
fp
readCsvOn'
::
[
CsvDoc
->
Text
]
->
FilePath
->
IO
(
Either
Prelude
.
String
[
Text
])
readCsvOn'
fields
fp
=
do
r
<-
readFile
fp
pure
$
(
V
.
toList
.
V
.
map
(
\
l
->
intercalate
(
pack
" "
)
$
map
(
\
field
->
field
l
)
fields
)
.
snd
)
<$>
r
------------------------------------------------------------------------
readFileLazy
::
(
FromNamedRecord
a
)
=>
proxy
a
->
FilePath
->
IO
(
Header
,
Vector
a
)
readFileLazy
::
(
FromNamedRecord
a
)
=>
proxy
a
->
FilePath
->
IO
(
Either
Prelude
.
String
(
Header
,
Vector
a
)
)
readFileLazy
f
=
fmap
(
readByteStringLazy
f
)
.
BL
.
readFile
readFileStrict
::
(
FromNamedRecord
a
)
=>
proxy
a
->
FilePath
->
IO
(
Header
,
Vector
a
)
readFileStrict
::
(
FromNamedRecord
a
)
=>
proxy
a
->
FilePath
->
IO
(
Either
Prelude
.
String
(
Header
,
Vector
a
)
)
readFileStrict
f
=
fmap
(
readByteStringStrict
f
)
.
BS
.
readFile
readByteStringLazy
::
(
FromNamedRecord
a
)
=>
proxy
a
->
BL
.
ByteString
->
(
Header
,
Vector
a
)
readByteStringLazy
_f
bs
=
case
decodeByNameWith
csvDecodeOptions
bs
of
Left
e
->
panic
(
pack
e
)
Right
csvDocs
->
csvDocs
readByteStringLazy
::
(
FromNamedRecord
a
)
=>
proxy
a
->
BL
.
ByteString
->
Either
Prelude
.
String
(
Header
,
Vector
a
)
readByteStringLazy
_f
bs
=
decodeByNameWith
csvDecodeOptions
bs
readByteStringStrict
::
(
FromNamedRecord
a
)
=>
proxy
a
->
BS
.
ByteString
->
(
Header
,
Vector
a
)
readByteStringStrict
::
(
FromNamedRecord
a
)
=>
proxy
a
->
BS
.
ByteString
->
Either
Prelude
.
String
(
Header
,
Vector
a
)
readByteStringStrict
ff
=
(
readByteStringLazy
ff
)
.
BL
.
fromStrict
------------------------------------------------------------------------
-- | TODO use readFileLazy
readFile
::
FilePath
->
IO
(
Header
,
Vector
CsvDoc
)
readFile
::
FilePath
->
IO
(
Either
Prelude
.
String
(
Header
,
Vector
CsvDoc
)
)
readFile
=
fmap
readCsvLazyBS
.
BL
.
readFile
-- | TODO use readByteStringLazy
readCsvLazyBS
::
BL
.
ByteString
->
(
Header
,
Vector
CsvDoc
)
readCsvLazyBS
bs
=
case
decodeByNameWith
csvDecodeOptions
bs
of
Left
e
->
panic
(
pack
e
)
Right
csvDocs
->
csvDocs
readCsvLazyBS
::
BL
.
ByteString
->
Either
Prelude
.
String
(
Header
,
Vector
CsvDoc
)
readCsvLazyBS
bs
=
decodeByNameWith
csvDecodeOptions
bs
------------------------------------------------------------------------
-- | TODO use readFileLazy
readCsvHal
::
FilePath
->
IO
(
Header
,
Vector
CsvHal
)
readCsvHal
::
FilePath
->
IO
(
Either
Prelude
.
String
(
Header
,
Vector
CsvHal
)
)
readCsvHal
=
fmap
readCsvHalLazyBS
.
BL
.
readFile
-- | TODO use readByteStringLazy
readCsvHalLazyBS
::
BL
.
ByteString
->
(
Header
,
Vector
CsvHal
)
readCsvHalLazyBS
bs
=
case
decodeByNameWith
csvDecodeOptions
bs
of
Left
e
->
panic
(
pack
e
)
Right
csvDocs
->
csvDocs
readCsvHalLazyBS
::
BL
.
ByteString
->
Either
Prelude
.
String
(
Header
,
Vector
CsvHal
)
readCsvHalLazyBS
bs
=
decodeByNameWith
csvDecodeOptions
bs
readCsvHalBSStrict
::
BS
.
ByteString
->
(
Header
,
Vector
CsvHal
)
readCsvHalBSStrict
::
BS
.
ByteString
->
Either
Prelude
.
String
(
Header
,
Vector
CsvHal
)
readCsvHalBSStrict
=
readCsvHalLazyBS
.
BL
.
fromStrict
------------------------------------------------------------------------
...
...
@@ -360,7 +368,7 @@ csvHal2doc (CsvHal title source
csv2doc
::
CsvDoc
->
HyperdataDocument
csv2doc
(
CsvDoc
title
source
pub_year
pub_month
pub_day
(
IntOrDec
pub_year
)
pub_month
pub_day
abstract
authors
)
=
HyperdataDocument
(
Just
"CsvHal"
)
Nothing
Nothing
...
...
@@ -382,18 +390,22 @@ csv2doc (CsvDoc title source
Nothing
------------------------------------------------------------------------
parseHal
::
FilePath
->
IO
[
HyperdataDocument
]
parseHal
fp
=
V
.
toList
<$>
V
.
map
csvHal2doc
<$>
snd
<$>
readCsvHal
fp
parseHal
::
FilePath
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseHal
fp
=
do
r
<-
readCsvHal
fp
pure
$
(
V
.
toList
.
V
.
map
csvHal2doc
.
snd
)
<$>
r
parseHal'
::
BL
.
ByteString
->
[
HyperdataDocument
]
parseHal'
=
V
.
toList
.
V
.
map
csvHal2doc
.
snd
.
readCsvHalLazyBS
parseHal'
::
BL
.
ByteString
->
Either
Prelude
.
String
[
HyperdataDocument
]
parseHal'
bs
=
(
V
.
toList
.
V
.
map
csvHal2doc
.
snd
)
<$>
readCsvHalLazyBS
bs
------------------------------------------------------------------------
parseCsv
::
FilePath
->
IO
[
HyperdataDocument
]
parseCsv
fp
=
V
.
toList
<$>
V
.
map
csv2doc
<$>
snd
<$>
readFile
fp
parseCsv
::
FilePath
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseCsv
fp
=
do
r
<-
readFile
fp
pure
$
(
V
.
toList
.
V
.
map
csv2doc
.
snd
)
<$>
r
parseCsv'
::
BL
.
ByteString
->
[
HyperdataDocument
]
parseCsv'
bs
=
V
.
toList
$
V
.
map
csv2doc
$
snd
$
readCsvLazyBS
bs
parseCsv'
::
BL
.
ByteString
->
Either
Prelude
.
String
[
HyperdataDocument
]
parseCsv'
bs
=
(
V
.
toList
.
V
.
map
csv2doc
.
snd
)
<$>
readCsvLazyBS
bs
------------------------------------------------------------------------
-- Csv v3 weighted for phylo
...
...
@@ -425,4 +437,4 @@ readWeightedCsv fp =
case
decodeByNameWith
csvDecodeOptions
bs
of
Left
e
->
panic
(
pack
e
)
Right
corpus
->
corpus
)
$
BL
.
readFile
fp
\ No newline at end of file
)
$
BL
.
readFile
fp
src/Gargantext/Core/Text/Terms.hs
View file @
50497ca2
...
...
@@ -69,7 +69,7 @@ data TermType lang
,
_tt_ngramsSize
::
!
Int
,
_tt_model
::
!
(
Maybe
(
Tries
Token
()
))
}
deriving
Generic
deriving
(
Generic
)
makeLenses
''
T
ermType
--group :: [Text] -> [Text]
...
...
src/Gargantext/Data/HashMap/Strict/Utils.hs
View file @
50497ca2
...
...
@@ -23,9 +23,6 @@ partitionWithKey :: (Ord a, Hashable k) => (k -> a -> Bool) -> HashMap k a -> (H
partitionWithKey
p
m
=
(
HashMap
.
filterWithKey
p
m
,
HashMap
.
filterWithKey
(
\
k
->
not
.
p
k
)
m
)
mapKeys
::
(
Ord
k2
,
Hashable
k2
)
=>
(
k1
->
k2
)
->
HashMap
k1
a
->
HashMap
k2
a
mapKeys
f
=
HashMap
.
fromList
.
HashMap
.
foldrWithKey
(
\
k
x
xs
->
(
f
k
,
x
)
:
xs
)
[]
------------------------------------------------------------------------
-- getKeyWithMaxValue :: Hashable k => HashMap k a -> Maybe k
getKeysOrderedByValueMaxFirst
::
(
Ord
k
,
Hashable
k
,
Ord
a
)
=>
HashMap
k
a
->
[
k
]
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
50497ca2
...
...
@@ -56,7 +56,7 @@ import Data.Map (Map, lookup)
import
Data.Maybe
(
catMaybes
)
import
Data.Monoid
import
Data.Swagger
import
Data.Text
(
splitOn
)
import
qualified
Data.Text
as
T
import
Data.Traversable
(
traverse
)
import
Data.Tuple.Extra
(
first
,
second
)
import
GHC.Generics
(
Generic
)
...
...
@@ -178,11 +178,12 @@ flowCorpusFile :: (FlowCmdM env err m)
->
TermType
Lang
->
FileFormat
->
FilePath
->
m
CorpusId
flowCorpusFile
u
n
l
la
ff
fp
=
do
docs
<-
liftBase
(
splitEvery
500
<$>
take
l
<$>
parseFile
ff
fp
)
flowCorpus
u
n
la
(
map
(
map
toHyperdataDocument
)
docs
)
eParsed
<-
liftBase
$
parseFile
ff
fp
case
eParsed
of
Right
parsed
->
do
let
docs
=
splitEvery
500
$
take
l
parsed
flowCorpus
u
n
la
(
map
(
map
toHyperdataDocument
)
docs
)
Left
e
->
panic
$
"Error: "
<>
(
T
.
pack
e
)
------------------------------------------------------------------------
-- | TODO improve the needed type to create/update a corpus
...
...
@@ -425,11 +426,11 @@ instance ExtractNgramsT HyperdataDocument
$
_hd_source
doc
institutes
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
map
toSchoolName
.
(
splitOn
", "
))
$
maybe
[
"Nothing"
]
(
map
toSchoolName
.
(
T
.
splitOn
", "
))
$
_hd_institutes
doc
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
splitOn
", "
)
$
maybe
[
"Nothing"
]
(
T
.
splitOn
", "
)
$
_hd_authors
doc
terms'
<-
map
(
enrichedTerms
(
lang'
^.
tt_lang
)
CoreNLP
NP
)
...
...
src/Gargantext/Database/Prelude.hs
View file @
50497ca2
...
...
@@ -31,7 +31,6 @@ import Database.PostgreSQL.Simple (Connection, connect)
import
Database.PostgreSQL.Simple.FromField
(
Conversion
,
ResultError
(
ConversionFailed
),
fromField
,
returnError
)
import
Database.PostgreSQL.Simple.Internal
(
Field
)
import
Gargantext.Prelude
import
Gargantext.Prelude.Config
(
GargConfig
())
import
Opaleye
(
Query
,
Unpackspec
,
showSqlForPostgres
,
FromFields
,
Select
,
runQuery
,
PGJsonb
,
QueryRunnerColumnDefault
)
import
Opaleye.Aggregate
(
countRows
)
import
System.IO
(
FilePath
)
...
...
@@ -41,6 +40,8 @@ import qualified Data.ByteString as DB
import
qualified
Data.List
as
DL
import
qualified
Database.PostgreSQL.Simple
as
PGS
import
Gargantext.Prelude.Config
(
GargConfig
())
-------------------------------------------------------
class
HasConnectionPool
env
where
connPool
::
Getter
env
(
Pool
Connection
)
...
...
src/Gargantext/Database/Query/Table/Node.hs
View file @
50497ca2
...
...
@@ -121,7 +121,7 @@ getClosestParentIdByType :: HasDBid NodeType
getClosestParentIdByType
nId
nType
=
do
result
<-
runPGSQuery
query
(
nId
,
0
::
Int
)
case
result
of
[
DPS
.
Only
parentId
,
DPS
.
Only
pTypename
]
->
do
[
(
NodeId
parentId
,
pTypename
)
]
->
do
if
toDBid
nType
==
pTypename
then
pure
$
Just
$
NodeId
parentId
else
...
...
@@ -131,7 +131,7 @@ getClosestParentIdByType nId nType = do
query
::
DPS
.
Query
query
=
[
sql
|
SELECT n2.id, n2.typename
FROM nodes n1
FROM nodes n1
JOIN nodes n2 ON n1.parent_id = n2.id
WHERE n1.id = ? AND 0 = ?;
|]
...
...
stack.yaml
View file @
50497ca2
resolver
:
url
:
https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/1
7/10
.yaml
url
:
https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/1
8/4
.yaml
flags
:
{}
extra-package-dbs
:
[]
packages
:
...
...
@@ -97,6 +97,7 @@ extra-deps:
-
json-stream-0.4.2.4@sha256:8b7f17d54a6e1e6311756270f8bcf51e91bab4300945400de66118470dcf51b9,4716
-
located-base-0.1.1.1@sha256:7c6395f2b6fbf2d5f76c3514f774423838c0ea94e1c6a5530dd3c94b30c9d1c8,1904
-
logging-effect-1.3.12@sha256:72d168dd09887649ba9501627219b6027cbec2d5541931555b7885b133785ce3,1679
-
MissingH-1.4.3.0@sha256:32f9892ec98cd21df4f4d3ed8d95a3831ae74287ea0641d6f09b2dc6ef061d39,4859
-
monoid-extras-0.5.1@sha256:438dbfd7b4dce47d8f0ca577f56caf94bd1e21391afa545cad09fe7cf2e5793d,2333
-
rake-0.0.1@sha256:3380f6567fb17505d1095b7f32222c0b631fa04126ad39726c84262da99c08b3,2025
-
servant-cassava-0.10.1@sha256:07e7b6ca67cf57dcb4a0041a399a25d058844505837c6479e01d62be59d01fdf,1665
...
...
@@ -108,4 +109,4 @@ extra-deps:
-
xmlbf-xeno-0.2@sha256:39f70fced6052524c290cf595f114661c721452e65fc3e0953a44e7682a6a6b0,950
# need Vector.uncons
-
vector-0.12.3.0@sha256:0ae2c1ba86f0077910be242ec6802cc3d7725fe7b2bea6987201aa3737b239b5,7953
\ No newline at end of file
-
vector-0.12.3.0@sha256:0ae2c1ba86f0077910be242ec6802cc3d7725fe7b2bea6987201aa3737b239b5,7953
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment