Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Julien Moutinho
haskell-gargantext
Commits
2b1c8e4e
Commit
2b1c8e4e
authored
Aug 24, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
[FIX] merge
parents
c81f4315
5bee1178
Changes
32
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
32 changed files
with
1013 additions
and
329 deletions
+1013
-329
README.md
README.md
+1
-1
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+58
-49
CleanCsvCorpus.hs
bin/gargantext-cli/CleanCsvCorpus.hs
+14
-11
Main.hs
bin/gargantext-cli/Main.hs
+27
-30
Main.hs
bin/gargantext-phylo/Main.hs
+10
-9
docker-compose.yaml
devops/docker/docker-compose.yaml
+1
-1
search-api.org
docs/search-api.org
+288
-0
gargantext.ini_toModify
gargantext.ini_toModify
+0
-3
pkgs.nix
nix/pkgs.nix
+2
-0
package.yaml
package.yaml
+3
-1
Types.hs
src/Gargantext/API/Admin/Orchestrator/Types.hs
+2
-0
Job.hs
src/Gargantext/API/Job.hs
+21
-16
Types.hs
src/Gargantext/API/Ngrams/List/Types.hs
+54
-0
Node.hs
src/Gargantext/API/Node.hs
+3
-0
Export.hs
src/Gargantext/API/Node/Corpus/Export.hs
+5
-4
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+96
-89
Searx.hs
src/Gargantext/API/Node/Corpus/Searx.hs
+110
-0
Types.hs
src/Gargantext/API/Node/Corpus/Types.hs
+67
-0
File.hs
src/Gargantext/API/Node/File.hs
+8
-7
FrameCalcUpload.hs
src/Gargantext/API/Node/FrameCalcUpload.hs
+90
-0
New.hs
src/Gargantext/API/Node/New.hs
+6
-5
Routes.hs
src/Gargantext/API/Routes.hs
+18
-15
IMT.hs
src/Gargantext/Core/Ext/IMT.hs
+9
-4
Convert.hs
src/Gargantext/Core/Text/Convert.hs
+8
-2
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+33
-21
CSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
+57
-45
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+1
-1
Utils.hs
src/Gargantext/Data/HashMap/Strict/Utils.hs
+0
-3
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+9
-8
Prelude.hs
src/Gargantext/Database/Prelude.hs
+2
-1
Node.hs
src/Gargantext/Database/Query/Table/Node.hs
+2
-2
stack.yaml
stack.yaml
+8
-1
No files found.
README.md
View file @
2b1c8e4e
...
@@ -140,7 +140,7 @@ docker build -t cgenie/stack-build:lts-17.13-garg .
...
@@ -140,7 +140,7 @@ docker build -t cgenie/stack-build:lts-17.13-garg .
then run:
then run:
```
sh
```
sh
stack
--docker
run
gargantext-init
--
gargantext.ini
stack
--docker
exec
gargantext-init
--
gargantext.ini
```
```
### Importing data
### Importing data
...
...
bin/gargantext-adaptative-phylo/Main.hs
View file @
2b1c8e4e
...
@@ -16,17 +16,30 @@ Adaptative Phylo binaries
...
@@ -16,17 +16,30 @@ Adaptative Phylo binaries
module
Main
where
module
Main
where
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Crypto.Hash.SHA256
(
hash
)
import
Data.Aeson
import
Data.Aeson
import
Data.Either
(
Either
(
..
))
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.String
(
String
)
import
Data.String
(
String
)
import
GHC.IO
(
FilePath
)
import
qualified
Prelude
as
Prelude
import
System.Environment
import
System.Directory
(
listDirectory
,
doesFileExist
)
import
Data.Text
(
Text
,
unwords
,
unpack
,
replace
,
pack
)
import
Data.Text
(
Text
,
unwords
,
unpack
,
replace
,
pack
)
import
Crypto.Hash.SHA256
(
hash
)
import
Data.Time.Calendar
(
fromGregorian
,
diffGregorianDurationClip
,
cdMonths
,
diffDays
,
showGregorian
)
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Text
as
T
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
csv'_weight
)
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
csv'_weight
)
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
parseFile
)
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
parseFile
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
...
@@ -36,20 +49,6 @@ import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setCon
...
@@ -36,20 +49,6 @@ import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setCon
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
-- import Gargantext.API.Ngrams.Prelude (toTermList)
-- import Gargantext.API.Ngrams.Prelude (toTermList)
import
GHC.IO
(
FilePath
)
import
Prelude
(
Either
(
Left
,
Right
),
toInteger
)
import
System.Environment
import
System.Directory
(
listDirectory
,
doesFileExist
)
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Data.Time.Calendar
(
fromGregorian
,
diffGregorianDurationClip
,
cdMonths
,
diffDays
,
showGregorian
)
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.Vector
as
Vector
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
import
qualified
Data.Text
as
T
-- import Debug.Trace (trace)
-- import Debug.Trace (trace)
data
PhyloStage
=
PhyloWithCliques
|
PhyloWithLinks
deriving
(
Show
)
data
PhyloStage
=
PhyloWithCliques
|
PhyloWithLinks
deriving
(
Show
)
...
@@ -84,13 +83,13 @@ toDays y m d = fromIntegral
...
@@ -84,13 +83,13 @@ toDays y m d = fromIntegral
toPhyloDate
::
Int
->
Int
->
Int
->
TimeUnit
->
Date
toPhyloDate
::
Int
->
Int
->
Int
->
TimeUnit
->
Date
toPhyloDate
y
m
d
tu
=
case
tu
of
toPhyloDate
y
m
d
tu
=
case
tu
of
Year
_
_
_
->
y
Year
_
_
_
->
y
Month
_
_
_
->
toMonths
(
toInteger
y
)
m
d
Month
_
_
_
->
toMonths
(
Prelude
.
toInteger
y
)
m
d
Week
_
_
_
->
div
(
toDays
(
toInteger
y
)
m
d
)
7
Week
_
_
_
->
div
(
toDays
(
Prelude
.
toInteger
y
)
m
d
)
7
Day
_
_
_
->
toDays
(
toInteger
y
)
m
d
Day
_
_
_
->
toDays
(
Prelude
.
toInteger
y
)
m
d
toPhyloDate'
::
Int
->
Int
->
Int
->
Text
toPhyloDate'
::
Int
->
Int
->
Int
->
Text
toPhyloDate'
y
m
d
=
pack
$
showGregorian
$
fromGregorian
(
toInteger
y
)
m
d
toPhyloDate'
y
m
d
=
pack
$
showGregorian
$
fromGregorian
(
Prelude
.
toInteger
y
)
m
d
--------------
--------------
...
@@ -113,9 +112,14 @@ termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList p
...
@@ -113,9 +112,14 @@ termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList p
-- | To transform a Wos file (or [file]) into a list of Docs
-- | To transform a Wos file (or [file]) into a list of Docs
wosToDocs
::
Int
->
Patterns
->
TimeUnit
->
FilePath
->
IO
([
Document
])
wosToDocs
::
Int
->
Patterns
->
TimeUnit
->
FilePath
->
IO
[
Document
]
wosToDocs
limit
patterns
time
path
=
do
wosToDocs
limit
patterns
time
path
=
do
files
<-
getFilesFromPath
path
files
<-
getFilesFromPath
path
let
parseFile'
file
=
do
eParsed
<-
parseFile
WOS
(
path
<>
file
)
case
eParsed
of
Right
ps
->
pure
ps
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
take
limit
take
limit
<$>
map
(
\
d
->
let
title
=
fromJust
$
_hd_title
d
<$>
map
(
\
d
->
let
title
=
fromJust
$
_hd_title
d
abstr
=
if
(
isJust
$
_hd_abstract
d
)
abstr
=
if
(
isJust
$
_hd_abstract
d
)
...
@@ -134,22 +138,27 @@ wosToDocs limit patterns time path = do
...
@@ -134,22 +138,27 @@ wosToDocs limit patterns time path = do
<$>
mapConcurrently
(
\
file
->
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
&&
(
isJust
$
_hd_title
d
))
&&
(
isJust
$
_hd_title
d
))
<$>
parseFile
WOS
(
path
<>
file
)
)
files
<$>
parseFile'
file
)
files
-- To transform a Csv file into a list of Document
-- To transform a Csv file into a list of Document
csvToDocs
::
CorpusParser
->
Patterns
->
TimeUnit
->
FilePath
->
IO
([
Document
])
csvToDocs
::
CorpusParser
->
Patterns
->
TimeUnit
->
FilePath
->
IO
[
Document
]
csvToDocs
parser
patterns
time
path
=
csvToDocs
parser
patterns
time
path
=
case
parser
of
case
parser
of
Wos
_
->
undefined
Wos
_
->
undefined
Csv
limit
->
Vector
.
toList
Csv
limit
->
do
<$>
Vector
.
take
limit
eR
<-
Csv
.
readFile
path
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
)
time
)
case
eR
of
(
toPhyloDate'
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
))
Right
r
->
pure
$
Vector
.
toList
$
Vector
.
take
limit
$
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
Csv
.
unIntOrDec
$
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
)
time
)
(
toPhyloDate'
(
Csv
.
unIntOrDec
$
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
))
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
Nothing
[]
[]
)
<$>
snd
<$>
Csv
.
readFile
path
)
$
snd
r
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
Csv'
limit
->
Vector
.
toList
Csv'
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
)
time
)
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
)
time
)
...
...
bin/gargantext-cli/CleanCsvCorpus.hs
View file @
2b1c8e4e
...
@@ -15,6 +15,7 @@ compress the contexts around the main terms of the query.
...
@@ -15,6 +15,7 @@ compress the contexts around the main terms of the query.
module
CleanCsvCorpus
where
module
CleanCsvCorpus
where
--import GHC.IO (FilePath)
--import GHC.IO (FilePath)
import
Data.Either
(
Either
(
..
))
import
Data.SearchEngine
as
S
import
Data.SearchEngine
as
S
import
qualified
Data.Set
as
S
import
qualified
Data.Set
as
S
import
Data.Text
(
pack
)
import
Data.Text
(
pack
)
...
@@ -39,8 +40,9 @@ main = do
...
@@ -39,8 +40,9 @@ main = do
--let q = ["water", "scarcity", "morocco", "shortage","flood"]
--let q = ["water", "scarcity", "morocco", "shortage","flood"]
let
q
=
[
"gratuit"
,
"gratuité"
,
"culture"
,
"culturel"
]
let
q
=
[
"gratuit"
,
"gratuité"
,
"culture"
,
"culturel"
]
(
h
,
csvDocs
)
<-
CSV
.
readFile
rPath
eDocs
<-
CSV
.
readFile
rPath
case
eDocs
of
Right
(
h
,
csvDocs
)
->
do
putStrLn
$
"Number of documents before:"
<>
show
(
V
.
length
csvDocs
)
putStrLn
$
"Number of documents before:"
<>
show
(
V
.
length
csvDocs
)
putStrLn
$
"Mean size of docs:"
<>
show
(
CSV
.
docsSize
csvDocs
)
putStrLn
$
"Mean size of docs:"
<>
show
(
CSV
.
docsSize
csvDocs
)
...
@@ -53,3 +55,4 @@ main = do
...
@@ -53,3 +55,4 @@ main = do
putStrLn
$
"Mean size of docs:"
<>
show
(
CSV
.
docsSize
docs'
)
putStrLn
$
"Mean size of docs:"
<>
show
(
CSV
.
docsSize
docs'
)
CSV
.
writeFile
wPath
(
h
,
docs'
)
CSV
.
writeFile
wPath
(
h
,
docs'
)
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
bin/gargantext-cli/Main.hs
View file @
2b1c8e4e
...
@@ -17,30 +17,24 @@ Main specifications to index a corpus with a term list
...
@@ -17,30 +17,24 @@ Main specifications to index a corpus with a term list
module
Main
where
module
Main
where
import
Data.ByteString.Lazy
(
writeFile
)
import
Control.Concurrent.Async
as
CCA
(
mapConcurrently
)
import
Control.Concurrent
(
getNumCapabilities
,
myThreadId
,
threadCapability
)
import
Data.Text
(
pack
)
import
qualified
Data.Text
as
DT
import
Data.Tuple.Extra
(
both
)
import
qualified
Data.Vector
as
DV
import
Control.Monad
(
zipWithM
)
import
Control.Monad
(
zipWithM
)
import
Control.Monad.IO.Class
import
Control.Monad.IO.Class
import
Data.Map
(
Map
)
import
qualified
Data.Map
as
DM
import
GHC.Generics
import
Data.Aeson
import
Data.Aeson
import
Data.ByteString.Lazy
(
writeFile
)
import
Data.
Text
(
Text
)
import
Data.
Either
(
Either
(
..
)
)
import
Data.List
(
cycle
,
concat
,
unwords
)
import
Data.List
(
cycle
,
concat
,
unwords
)
import
Data.List.Split
(
chunksOf
)
import
Data.List.Split
(
chunksOf
)
import
Data.Map
(
Map
)
import
qualified
Data.Map
as
DM
import
Data.Text
(
pack
,
Text
)
import
qualified
Data.Text
as
DT
import
Data.Tuple.Extra
(
both
)
import
qualified
Data.Vector
as
DV
import
GHC.Generics
import
System.IO
(
hPutStr
,
hFlush
,
stderr
)
import
System.IO
(
hPutStr
,
hFlush
,
stderr
)
import
System.Environment
import
System.Environment
import
Control.Concurrent.Async
as
CCA
(
mapConcurrently
)
import
Control.Concurrent
(
getNumCapabilities
,
myThreadId
,
threadCapability
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Core
import
Gargantext.Core
...
@@ -48,7 +42,7 @@ import Gargantext.Core.Types
...
@@ -48,7 +42,7 @@ import Gargantext.Core.Types
import
Gargantext.Core.Text.Terms
import
Gargantext.Core.Text.Terms
import
Gargantext.Core.Text.Context
import
Gargantext.Core.Text.Context
import
Gargantext.Core.Text.Terms.WithList
import
Gargantext.Core.Text.Terms.WithList
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
readFile
,
csv_title
,
csv_abstract
,
csv_publication_year
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
readFile
,
csv_title
,
csv_abstract
,
csv_publication_year
,
unIntOrDec
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Terms
(
terms
)
import
Gargantext.Core.Text.Terms
(
terms
)
import
Gargantext.Core.Text.Metrics.Count
(
coocOnContexts
,
Coocs
)
import
Gargantext.Core.Text.Metrics.Count
(
coocOnContexts
,
Coocs
)
...
@@ -92,11 +86,13 @@ main = do
...
@@ -92,11 +86,13 @@ main = do
[
corpusFile
,
termListFile
,
outputFile
]
<-
getArgs
[
corpusFile
,
termListFile
,
outputFile
]
<-
getArgs
--corpus :: IO (DM.IntMap [[Text]])
--corpus :: IO (DM.IntMap [[Text]])
corpus
<-
DM
.
fromListWith
(
<>
)
eCorpusFile
<-
readFile
corpusFile
case
eCorpusFile
of
Right
cf
->
do
let
corpus
=
DM
.
fromListWith
(
<>
)
.
DV
.
toList
.
DV
.
toList
.
DV
.
map
(
\
n
->
(
csv_publication_year
n
,
[(
csv_title
n
)
<>
" "
<>
(
csv_abstract
n
)]))
.
DV
.
map
(
\
n
->
(
unIntOrDec
$
csv_publication_year
n
,
[(
csv_title
n
)
<>
" "
<>
(
csv_abstract
n
)]))
.
snd
.
snd
$
cf
<$>
readFile
corpusFile
-- termListMap :: [Text]
-- termListMap :: [Text]
termList
<-
csvMapTermList
termListFile
termList
<-
csvMapTermList
termListFile
...
@@ -108,6 +104,7 @@ main = do
...
@@ -108,6 +104,7 @@ main = do
-- r <- mapConcurrentlyChunked (filterTermsAndCooc patterns) (DM.toList corpus)
-- r <- mapConcurrentlyChunked (filterTermsAndCooc patterns) (DM.toList corpus)
r
<-
mapConcurrently
(
filterTermsAndCooc
patterns
)
(
DM
.
toList
corpus
)
r
<-
mapConcurrently
(
filterTermsAndCooc
patterns
)
(
DM
.
toList
corpus
)
writeFile
outputFile
$
encode
(
CoocByYears
r
)
writeFile
outputFile
$
encode
(
CoocByYears
r
)
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
...
...
bin/gargantext-phylo/Main.hs
View file @
2b1c8e4e
...
@@ -24,6 +24,16 @@ import Data.Maybe
...
@@ -24,6 +24,16 @@ import Data.Maybe
import
Data.Text
(
Text
,
unwords
)
import
Data.Text
(
Text
,
unwords
)
import
GHC.Generics
import
GHC.Generics
import
GHC.IO
(
FilePath
)
import
GHC.IO
(
FilePath
)
import
System.Directory
(
doesFileExist
)
import
System.Environment
import
qualified
Data.ByteString.Lazy
as
L
import
qualified
Data.List
as
DL
import
qualified
Data.Map
as
DM
import
qualified
Data.Text
as
DT
import
qualified
Data.Vector
as
DV
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
CSV
import
qualified
Prelude
as
P
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Context
(
TermList
)
...
@@ -36,15 +46,6 @@ import Gargantext.Core.Viz.Phylo.LevelMaker
...
@@ -36,15 +46,6 @@ import Gargantext.Core.Viz.Phylo.LevelMaker
import
Gargantext.Core.Viz.Phylo.Tools
import
Gargantext.Core.Viz.Phylo.Tools
import
Gargantext.Core.Viz.Phylo.View.Export
import
Gargantext.Core.Viz.Phylo.View.Export
import
Gargantext.Core.Viz.Phylo.View.ViewMaker
import
Gargantext.Core.Viz.Phylo.View.ViewMaker
import
System.Directory
(
doesFileExist
)
import
System.Environment
import
qualified
Data.ByteString.Lazy
as
L
import
qualified
Data.List
as
DL
import
qualified
Data.Map
as
DM
import
qualified
Data.Text
as
DT
import
qualified
Data.Vector
as
DV
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
CSV
import
qualified
Prelude
as
P
--------------
--------------
...
...
devops/docker/docker-compose.yaml
View file @
2b1c8e4e
...
@@ -21,7 +21,7 @@ services:
...
@@ -21,7 +21,7 @@ services:
ports
:
ports
:
-
8081:80
-
8081:80
environment
:
environment
:
PGADMIN_DEFAULT_EMAIL
:
admin@localhost
PGADMIN_DEFAULT_EMAIL
:
admin@localhost
.lan
PGADMIN_DEFAULT_PASSWORD
:
admin
PGADMIN_DEFAULT_PASSWORD
:
admin
depends_on
:
depends_on
:
...
...
docs/search-api.org
0 → 100644
View file @
2b1c8e4e
This diff is collapsed.
Click to expand it.
gargantext.ini_toModify
View file @
2b1c8e4e
...
@@ -13,9 +13,6 @@ MASTER_USER = gargantua
...
@@ -13,9 +13,6 @@ MASTER_USER = gargantua
SECRET_KEY = PASSWORD_TO_CHANGE
SECRET_KEY = PASSWORD_TO_CHANGE
# Data path to local files
DATA_FILEPATH = FILEPATH_TO_CHANGE
# Data path to local files
# Data path to local files
DATA_FILEPATH = FILEPATH_TO_CHANGE
DATA_FILEPATH = FILEPATH_TO_CHANGE
...
...
nix/pkgs.nix
View file @
2b1c8e4e
...
@@ -12,6 +12,8 @@ rec {
...
@@ -12,6 +12,8 @@ rec {
git
git
gmp
gmp
gsl
gsl
haskell-language-server
hlint
igraph
igraph
liblapack
liblapack
lzma
lzma
...
...
package.yaml
View file @
2b1c8e4e
name
:
gargantext
name
:
gargantext
version
:
'
0.0.2.9.6'
version
:
'
0.0.2.9.6
.1
'
synopsis
:
Search, map, share
synopsis
:
Search, map, share
description
:
Please see README.md
description
:
Please see README.md
category
:
Data
category
:
Data
...
@@ -189,6 +189,7 @@ library:
...
@@ -189,6 +189,7 @@ library:
-
random
-
random
-
rdf4h
-
rdf4h
-
regex-compat
-
regex-compat
-
regex-tdfa
-
resource-pool
-
resource-pool
-
resourcet
-
resourcet
-
safe
-
safe
...
@@ -224,6 +225,7 @@ library:
...
@@ -224,6 +225,7 @@ library:
-
transformers
-
transformers
-
transformers-base
-
transformers-base
-
unordered-containers
-
unordered-containers
-
utf8-string
-
uuid
-
uuid
-
validity
-
validity
-
vector
-
vector
...
...
src/Gargantext/API/Admin/Orchestrator/Types.hs
View file @
2b1c8e4e
...
@@ -101,6 +101,8 @@ data JobLog = JobLog
...
@@ -101,6 +101,8 @@ data JobLog = JobLog
}
}
deriving
(
Show
,
Generic
)
deriving
(
Show
,
Generic
)
makeLenses
''
J
obLog
instance
Arbitrary
JobLog
where
instance
Arbitrary
JobLog
where
arbitrary
=
JobLog
arbitrary
=
JobLog
<$>
arbitrary
<$>
arbitrary
...
...
src/Gargantext/API/Job.hs
View file @
2b1c8e4e
module
Gargantext.API.Job
where
module
Gargantext.API.Job
where
import
Control.Lens
(
over
,
_Just
)
import
Data.IORef
import
Data.IORef
import
Data.Maybe
import
Data.Maybe
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
))
import
Gargantext.API.Admin.Orchestrator.Types
jobLogInit
::
Int
->
JobLog
jobLogInit
::
Int
->
JobLog
...
@@ -16,25 +17,29 @@ jobLogInit rem =
...
@@ -16,25 +17,29 @@ jobLogInit rem =
,
_scst_events
=
Just
[]
}
,
_scst_events
=
Just
[]
}
jobLogSuccess
::
JobLog
->
JobLog
jobLogSuccess
::
JobLog
->
JobLog
jobLogSuccess
(
JobLog
{
_scst_succeeded
=
mSucc
jobLogSuccess
jl
=
over
(
scst_succeeded
.
_Just
)
(
+
1
)
$
,
_scst_remaining
=
mRem
over
(
scst_remaining
.
_Just
)
(
\
x
->
x
-
1
)
jl
,
_scst_failed
=
mFail
,
_scst_events
=
evt
})
=
JobLog
{
_scst_succeeded
=
(
+
1
)
<$>
mSucc
,
_scst_remaining
=
(
\
x
->
x
-
1
)
<$>
mRem
,
_scst_failed
=
mFail
,
_scst_events
=
evt
}
jobLogFail
::
JobLog
->
JobLog
jobLogFail
::
JobLog
->
JobLog
jobLogFail
(
JobLog
{
_scst_succeeded
=
mSucc
jobLogFail
jl
=
over
(
scst_failed
.
_Just
)
(
+
1
)
$
over
(
scst_remaining
.
_Just
)
(
\
x
->
x
-
1
)
jl
jobLogFailTotal
::
JobLog
->
JobLog
jobLogFailTotal
(
JobLog
{
_scst_succeeded
=
mSucc
,
_scst_remaining
=
mRem
,
_scst_remaining
=
mRem
,
_scst_failed
=
mFail
,
_scst_failed
=
mFail
,
_scst_events
=
evt
})
=
,
_scst_events
=
evt
})
=
JobLog
{
_scst_succeeded
=
mSucc
JobLog
{
_scst_succeeded
=
mSucc
,
_scst_remaining
=
(
\
x
->
x
-
1
)
<$>
m
Rem
,
_scst_remaining
=
new
Rem
,
_scst_failed
=
(
+
1
)
<$>
m
Fail
,
_scst_failed
=
new
Fail
,
_scst_events
=
evt
}
,
_scst_events
=
evt
}
where
(
newRem
,
newFail
)
=
case
mRem
of
Nothing
->
(
Nothing
,
mFail
)
Just
rem
->
(
Just
0
,
(
+
rem
)
<$>
mFail
)
jobLogEvt
::
JobLog
->
ScraperEvent
->
JobLog
jobLogEvt
jl
evt
=
over
(
scst_events
.
_Just
)
(
\
evts
->
(
evt
:
evts
))
jl
runJobLog
::
MonadBase
IO
m
=>
Int
->
(
JobLog
->
m
()
)
->
m
(
m
()
,
m
()
,
m
JobLog
)
runJobLog
::
MonadBase
IO
m
=>
Int
->
(
JobLog
->
m
()
)
->
m
(
m
()
,
m
()
,
m
JobLog
)
runJobLog
num
logStatus
=
do
runJobLog
num
logStatus
=
do
...
...
src/Gargantext/API/Ngrams/List/Types.hs
0 → 100644
View file @
2b1c8e4e
module
Gargantext.API.Ngrams.List.Types
where
--{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
--import Control.Lens hiding (elements, Indexed)
import
Data.Aeson
import
Data.Swagger
(
ToSchema
,
declareNamedSchema
,
genericDeclareNamedSchema
)
import
Data.Text
import
Servant.Job.Utils
(
jsonOptions
)
import
Web.FormUrlEncoded
(
FromForm
)
import
Protolude
import
Gargantext.API.Ngrams.Types
(
NgramsList
)
import
Gargantext.API.Node.Corpus.New.File
(
FileType
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefixSwagger
)
------------------------------------------------------------------------
data
WithFile
=
WithFile
{
_wf_filetype
::
!
FileType
,
_wf_data
::
!
NgramsList
,
_wf_name
::
!
Text
}
deriving
(
Eq
,
Show
,
Generic
)
--makeLenses ''WithFile
instance
FromForm
WithFile
instance
FromJSON
WithFile
where
parseJSON
=
genericParseJSON
$
jsonOptions
"_wf_"
instance
ToJSON
WithFile
where
toJSON
=
genericToJSON
$
jsonOptions
"_wf_"
instance
ToSchema
WithFile
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_wf_"
)
------------------------------------------------------------------------
data
WithTextFile
=
WithTextFile
{
_wtf_filetype
::
!
FileType
,
_wtf_data
::
!
Text
,
_wtf_name
::
!
Text
}
deriving
(
Eq
,
Show
,
Generic
)
--makeLenses ''WithTextFile
instance
FromForm
WithTextFile
instance
FromJSON
WithTextFile
where
parseJSON
=
genericParseJSON
$
jsonOptions
"_wtf_"
instance
ToJSON
WithTextFile
where
toJSON
=
genericToJSON
$
jsonOptions
"_wtf_"
instance
ToSchema
WithTextFile
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_wtf_"
)
src/Gargantext/API/Node.hs
View file @
2b1c8e4e
...
@@ -46,6 +46,7 @@ import Gargantext.API.Metrics
...
@@ -46,6 +46,7 @@ import Gargantext.API.Metrics
import
Gargantext.API.Ngrams
(
TableNgramsApi
,
apiNgramsTableCorpus
)
import
Gargantext.API.Ngrams
(
TableNgramsApi
,
apiNgramsTableCorpus
)
import
Gargantext.API.Ngrams.Types
(
TabType
(
..
))
import
Gargantext.API.Ngrams.Types
(
TabType
(
..
))
import
Gargantext.API.Node.File
import
Gargantext.API.Node.File
import
Gargantext.API.Node.FrameCalcUpload
(
FrameCalcUploadAPI
,
frameCalcUploadAPI
)
import
Gargantext.API.Node.New
import
Gargantext.API.Node.New
import
Gargantext.API.Prelude
import
Gargantext.API.Prelude
import
Gargantext.API.Table
import
Gargantext.API.Table
...
@@ -123,6 +124,7 @@ type NodeAPI a = Get '[JSON] (Node a)
...
@@ -123,6 +124,7 @@ type NodeAPI a = Get '[JSON] (Node a)
:<|>
"rename"
:>
RenameApi
:<|>
"rename"
:>
RenameApi
:<|>
PostNodeApi
-- TODO move to children POST
:<|>
PostNodeApi
-- TODO move to children POST
:<|>
PostNodeAsync
:<|>
PostNodeAsync
:<|>
FrameCalcUploadAPI
:<|>
ReqBody
'[
J
SON
]
a
:>
Put
'[
J
SON
]
Int
:<|>
ReqBody
'[
J
SON
]
a
:>
Put
'[
J
SON
]
Int
:<|>
"update"
:>
Update
.
API
:<|>
"update"
:>
Update
.
API
:<|>
Delete
'[
J
SON
]
Int
:<|>
Delete
'[
J
SON
]
Int
...
@@ -205,6 +207,7 @@ nodeAPI p uId id' = withAccess (Proxy :: Proxy (NodeAPI a)) Proxy uId (PathNode
...
@@ -205,6 +207,7 @@ nodeAPI p uId id' = withAccess (Proxy :: Proxy (NodeAPI a)) Proxy uId (PathNode
:<|>
rename
id'
:<|>
rename
id'
:<|>
postNode
uId
id'
:<|>
postNode
uId
id'
:<|>
postNodeAsyncAPI
uId
id'
:<|>
postNodeAsyncAPI
uId
id'
:<|>
frameCalcUploadAPI
uId
id'
:<|>
putNode
id'
:<|>
putNode
id'
:<|>
Update
.
api
uId
id'
:<|>
Update
.
api
uId
id'
:<|>
Action
.
deleteNode
(
RootId
$
NodeId
uId
)
id'
:<|>
Action
.
deleteNode
(
RootId
$
NodeId
uId
)
id'
...
...
src/Gargantext/API/Node/Corpus/Export.hs
View file @
2b1c8e4e
...
@@ -19,6 +19,11 @@ module Gargantext.API.Node.Corpus.Export
...
@@ -19,6 +19,11 @@ module Gargantext.API.Node.Corpus.Export
import
Data.Map
(
Map
)
import
Data.Map
(
Map
)
import
Data.Maybe
(
fromMaybe
)
import
Data.Maybe
(
fromMaybe
)
import
Data.Set
(
Set
)
import
Data.Set
(
Set
)
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.HashMap.Strict
as
HashMap
import
Gargantext.API.Node.Corpus.Export.Types
import
Gargantext.API.Node.Corpus.Export.Types
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Ngrams.Tools
(
filterListWithRoot
,
mapTermListRoot
,
getRepo'
)
import
Gargantext.API.Ngrams.Tools
(
filterListWithRoot
,
mapTermListRoot
,
getRepo'
)
...
@@ -37,10 +42,6 @@ import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
...
@@ -37,10 +42,6 @@ import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Schema.Node
(
_node_id
,
_node_hyperdata
)
import
Gargantext.Database.Schema.Node
(
_node_id
,
_node_hyperdata
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.HashMap.Strict
as
HashMap
--------------------------------------------------
--------------------------------------------------
-- | Hashes are ordered by Set
-- | Hashes are ordered by Set
...
...
src/Gargantext/API/Node/Corpus/New.hs
View file @
2b1c8e4e
...
@@ -18,8 +18,6 @@ New corpus means either:
...
@@ -18,8 +18,6 @@ New corpus means either:
module
Gargantext.API.Node.Corpus.New
module
Gargantext.API.Node.Corpus.New
where
where
-- import Servant.Multipart
-- import Test.QuickCheck (elements)
import
Control.Lens
hiding
(
elements
,
Empty
)
import
Control.Lens
hiding
(
elements
,
Empty
)
import
Data.Aeson
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Aeson.TH
(
deriveJSON
)
...
@@ -27,15 +25,31 @@ import Data.Either
...
@@ -27,15 +25,31 @@ import Data.Either
import
Data.Maybe
(
fromMaybe
)
import
Data.Maybe
(
fromMaybe
)
import
Data.Swagger
import
Data.Swagger
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
T
import
GHC.Generics
(
Generic
)
import
GHC.Generics
(
Generic
)
import
qualified
Prelude
as
Prelude
import
Protolude
(
readFile
)
import
Servant
import
Servant.Job.Utils
(
jsonOptions
)
-- import Servant.Multipart
-- import Test.QuickCheck (elements)
import
Test.QuickCheck.Arbitrary
import
Gargantext.Prelude
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Job
(
jobLogSuccess
,
jobLogFailTotal
)
import
Gargantext.API.Node.Corpus.New.File
import
Gargantext.API.Node.Corpus.New.File
import
Gargantext.API.Node.Corpus.Searx
import
Gargantext.API.Node.Corpus.Types
import
Gargantext.API.Node.Types
import
Gargantext.API.Node.Types
import
Gargantext.Core
(
Lang
(
..
)
{-, allLangs-}
)
import
Gargantext.Core
(
Lang
(
..
)
{-, allLangs-}
)
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
qualified
Gargantext.Core.Text.Corpus.Parsers
as
Parser
(
FileFormat
(
..
),
parseFormat
)
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Database.Action.Flow
(
flowCorpus
,
getDataText
,
flowDataText
,
TermType
(
..
)
,
DataOrigin
(
..
)
{-, allDataOrigins-}
)
import
Gargantext.Database.Action.Flow
(
flowCorpus
,
getDataText
,
flowDataText
,
TermType
(
..
)
{-, allDataOrigins-}
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Action.Mail
(
sendMail
)
import
Gargantext.Database.Action.Mail
(
sendMail
)
import
Gargantext.Database.Action.Node
(
mkNodeWithParent
)
import
Gargantext.Database.Action.Node
(
mkNodeWithParent
)
...
@@ -45,14 +59,6 @@ import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..), UserId)
...
@@ -45,14 +59,6 @@ import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..), UserId)
import
Gargantext.Database.Query.Table.Node
(
getNodeWith
)
import
Gargantext.Database.Query.Table.Node
(
getNodeWith
)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Prelude
import
Servant
import
Servant.Job.Utils
(
jsonOptions
)
import
Test.QuickCheck.Arbitrary
import
qualified
Data.Text
as
T
import
qualified
Gargantext.API.Admin.Orchestrator.Types
as
T
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
qualified
Gargantext.Core.Text.Corpus.Parsers
as
Parser
(
FileFormat
(
..
),
parseFormat
)
import
qualified
Gargantext.Database.GargDB
as
GargDB
import
qualified
Gargantext.Database.GargDB
as
GargDB
------------------------------------------------------------------------
------------------------------------------------------------------------
...
@@ -124,28 +130,11 @@ info :: FlowCmdM env err m => UserId -> m ApiInfo
...
@@ -124,28 +130,11 @@ info :: FlowCmdM env err m => UserId -> m ApiInfo
info
_u
=
pure
$
ApiInfo
API
.
externalAPIs
info
_u
=
pure
$
ApiInfo
API
.
externalAPIs
------------------------------------------------------------------------
------------------------------------------------------------------------
data
Database
=
Empty
|
PubMed
|
HAL
|
IsTex
|
Isidore
deriving
(
Eq
,
Show
,
Generic
)
deriveJSON
(
unPrefix
""
)
''
D
atabase
instance
ToSchema
Database
database2origin
::
Database
->
DataOrigin
database2origin
Empty
=
InternalOrigin
T
.
IsTex
database2origin
PubMed
=
ExternalOrigin
T
.
PubMed
database2origin
HAL
=
ExternalOrigin
T
.
HAL
database2origin
IsTex
=
ExternalOrigin
T
.
IsTex
database2origin
Isidore
=
ExternalOrigin
T
.
Isidore
------------------------------------------------------------------------
------------------------------------------------------------------------
data
WithQuery
=
WithQuery
data
WithQuery
=
WithQuery
{
_wq_query
::
!
Text
{
_wq_query
::
!
Text
,
_wq_databases
::
!
Database
,
_wq_databases
::
!
Database
,
_wq_datafield
::
!
Datafield
,
_wq_lang
::
!
Lang
,
_wq_lang
::
!
Lang
,
_wq_node_id
::
!
Int
,
_wq_node_id
::
!
Int
}
}
...
@@ -189,14 +178,29 @@ addToCorpusWithQuery :: FlowCmdM env err m
...
@@ -189,14 +178,29 @@ addToCorpusWithQuery :: FlowCmdM env err m
->
Maybe
Integer
->
Maybe
Integer
->
(
JobLog
->
m
()
)
->
(
JobLog
->
m
()
)
->
m
JobLog
->
m
JobLog
addToCorpusWithQuery
user
cid
(
WithQuery
q
dbs
l
_nid
)
maybeLimit
logStatus
=
do
addToCorpusWithQuery
user
cid
(
WithQuery
q
dbs
datafield
l
_nid
)
maybeLimit
logStatus
=
do
-- TODO ...
-- TODO ...
logStatus
JobLog
{
_scst_succeeded
=
Just
0
logStatus
JobLog
{
_scst_succeeded
=
Just
0
,
_scst_failed
=
Just
0
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
5
,
_scst_remaining
=
Just
3
,
_scst_events
=
Just
[]
,
_scst_events
=
Just
[]
}
}
printDebug
"addToCorpusWithQuery"
(
cid
,
dbs
)
printDebug
"[addToCorpusWithQuery] (cid, dbs)"
(
cid
,
dbs
)
printDebug
"[addToCorpusWithQuery] datafield"
datafield
case
datafield
of
Web
->
do
printDebug
"[addToCorpusWithQuery] processing web request"
datafield
_
<-
triggerSearxSearch
cid
q
l
pure
JobLog
{
_scst_succeeded
=
Just
3
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
}
_
->
do
-- TODO add cid
-- TODO add cid
-- TODO if cid is folder -> create Corpus
-- TODO if cid is folder -> create Corpus
-- if cid is corpus -> add to corpus
-- if cid is corpus -> add to corpus
...
@@ -234,16 +238,12 @@ addToCorpusWithForm :: FlowCmdM env err m
...
@@ -234,16 +238,12 @@ addToCorpusWithForm :: FlowCmdM env err m
->
CorpusId
->
CorpusId
->
NewWithForm
->
NewWithForm
->
(
JobLog
->
m
()
)
->
(
JobLog
->
m
()
)
->
JobLog
->
m
JobLog
->
m
JobLog
addToCorpusWithForm
user
cid
(
NewWithForm
ft
d
l
_n
)
logStatus
=
do
addToCorpusWithForm
user
cid
(
NewWithForm
ft
d
l
_n
)
logStatus
jobLog
=
do
printDebug
"[addToCorpusWithForm] Parsing corpus: "
cid
printDebug
"[addToCorpusWithForm] Parsing corpus: "
cid
printDebug
"[addToCorpusWithForm] fileType"
ft
printDebug
"[addToCorpusWithForm] fileType"
ft
logStatus
JobLog
{
_scst_succeeded
=
Just
0
logStatus
jobLog
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
2
,
_scst_events
=
Just
[]
}
let
let
parse
=
case
ft
of
parse
=
case
ft
of
CSV_HAL
->
Parser
.
parseFormat
Parser
.
CsvHal
CSV_HAL
->
Parser
.
parseFormat
Parser
.
CsvHal
...
@@ -252,17 +252,13 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus = do
...
@@ -252,17 +252,13 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus = do
PresseRIS
->
Parser
.
parseFormat
Parser
.
RisPresse
PresseRIS
->
Parser
.
parseFormat
Parser
.
RisPresse
-- TODO granularity of the logStatus
-- TODO granularity of the logStatus
docs
<-
liftBase
$
splitEvery
500
eDocs
<-
liftBase
$
parse
$
cs
d
<$>
take
1000000
case
eDocs
of
<$>
parse
(
cs
d
)
Right
docs'
->
do
let
docs
=
splitEvery
500
$
take
1000000
docs'
printDebug
"Parsing corpus finished : "
cid
printDebug
"Parsing corpus finished : "
cid
logStatus
JobLog
{
_scst_succeeded
=
Just
1
logStatus
jobLog2
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
1
,
_scst_events
=
Just
[]
}
printDebug
"Starting extraction : "
cid
printDebug
"Starting extraction : "
cid
-- TODO granularity of the logStatus
-- TODO granularity of the logStatus
...
@@ -275,11 +271,22 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus = do
...
@@ -275,11 +271,22 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus = do
printDebug
"sending email"
(
"xxxxxxxxxxxxxxxxxxxxx"
::
Text
)
printDebug
"sending email"
(
"xxxxxxxxxxxxxxxxxxxxx"
::
Text
)
sendMail
user
sendMail
user
pure
JobLog
{
_scst_succeeded
=
Just
2
logStatus
jobLog3
,
_scst_failed
=
Just
0
pure
$
jobLog3
,
_scst_remaining
=
Just
0
Left
e
->
do
,
_scst_events
=
Just
[]
printDebug
"Error"
e
}
logStatus
jobLogE
pure
jobLogE
where
jobLog2
=
jobLogSuccess
jobLog
jobLog3
=
jobLogSuccess
jobLog2
jobLogE
=
jobLogFailTotal
jobLog
parseCsvGargV3Path
::
[
Char
]
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseCsvGargV3Path
fp
=
do
contents
<-
readFile
fp
Parser
.
parseFormat
Parser
.
CsvGargV3
$
cs
contents
{-
{-
addToCorpusWithFile :: FlowCmdM env err m
addToCorpusWithFile :: FlowCmdM env err m
...
...
src/Gargantext/API/Node/Corpus/Searx.hs
0 → 100644
View file @
2b1c8e4e
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.API.Node.Corpus.Searx
where
import
Control.Lens
(
view
)
import
qualified
Data.Aeson
as
Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
qualified
Data.Text
as
T
import
GHC.Generics
(
Generic
)
import
Network.HTTP.Client
import
Network.HTTP.Client.TLS
import
qualified
Prelude
as
Prelude
import
Protolude
(
encodeUtf8
,
Text
,
Either
)
import
Gargantext.Prelude
import
Gargantext.Prelude.Config
import
Gargantext.Core
(
Lang
(
..
))
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
)
import
Gargantext.Database.Prelude
(
hasConfig
)
data
SearxResult
=
SearxResult
{
_sr_url
::
Text
,
_sr_title
::
Text
,
_sr_content
::
Maybe
Text
,
_sr_engine
::
Text
,
_sr_score
::
Double
,
_sr_category
::
Text
,
_sr_pretty_url
::
Text
}
deriving
(
Show
,
Eq
,
Generic
)
-- , _sr_parsed_url
-- , _sr_engines
-- , _sr_positions
$
(
deriveJSON
(
unPrefix
"_sr_"
)
''
S
earxResult
)
data
SearxResponse
=
SearxResponse
{
_srs_query
::
Text
,
_srs_number_of_results
::
Int
,
_srs_results
::
[
SearxResult
]
}
deriving
(
Show
,
Eq
,
Generic
)
-- , _srs_answers
-- , _srs_corrections
-- , _srs_infoboxes
-- , _srs_suggestions :: [Text]
-- , _srs_unresponsive_engines :: [Text] }
$
(
deriveJSON
(
unPrefix
"_srs_"
)
''
S
earxResponse
)
data
FetchSearxParams
=
FetchSearxParams
{
_fsp_language
::
Lang
,
_fsp_manager
::
Manager
,
_fsp_pageno
::
Int
,
_fsp_query
::
Text
,
_fsp_url
::
Text
}
fetchSearxPage
::
FetchSearxParams
->
IO
(
Either
Prelude
.
String
SearxResponse
)
fetchSearxPage
(
FetchSearxParams
{
_fsp_language
,
_fsp_manager
,
_fsp_pageno
,
_fsp_query
,
_fsp_url
})
=
do
-- searx search API:
-- https://searx.github.io/searx/dev/search_api.html?highlight=json
req
<-
parseRequest
$
T
.
unpack
_fsp_url
let
request
=
urlEncodedBody
[
--("category_general", "1")
(
"q"
,
encodeUtf8
_fsp_query
)
,
(
"categories"
,
"news"
)
-- https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/70#note_3976
,
(
"pageno"
,
encodeUtf8
$
T
.
pack
$
show
_fsp_pageno
)
--, ("time_range", "None")
,
(
"language"
,
encodeUtf8
$
T
.
pack
$
show
_fsp_language
)
,
(
"format"
,
"json"
)
]
req
res
<-
httpLbs
request
_fsp_manager
let
dec
=
Aeson
.
eitherDecode
$
responseBody
res
::
(
Either
Prelude
.
String
SearxResponse
)
pure
dec
triggerSearxSearch
::
(
MonadBase
IO
m
,
FlowCmdM
env
err
m
)
=>
CorpusId
->
API
.
Query
->
Lang
->
m
()
triggerSearxSearch
cid
q
l
=
do
printDebug
"[triggerSearxSearch] cid"
cid
printDebug
"[triggerSearxSearch] q"
q
printDebug
"[triggerSearxSearch] l"
l
cfg
<-
view
hasConfig
let
surl
=
_gc_frame_searx_url
cfg
printDebug
"[triggerSearxSearch] surl"
surl
manager
<-
liftBase
$
newManager
tlsManagerSettings
res
<-
liftBase
$
fetchSearxPage
$
FetchSearxParams
{
_fsp_language
=
l
,
_fsp_manager
=
manager
,
_fsp_pageno
=
1
,
_fsp_query
=
q
,
_fsp_url
=
surl
}
printDebug
"[triggerSearxSearch] res"
res
pure
()
src/Gargantext/API/Node/Corpus/Types.hs
0 → 100644
View file @
2b1c8e4e
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.API.Node.Corpus.Types
where
import
Control.Lens
hiding
(
elements
,
Empty
)
import
Control.Monad.Fail
(
fail
)
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Monoid
(
mempty
)
import
Data.Swagger
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
T
import
GHC.Generics
(
Generic
)
import
Text.Regex.TDFA
((
=~
))
import
Protolude
((
++
))
import
Gargantext.Prelude
import
qualified
Gargantext.API.Admin.Orchestrator.Types
as
T
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Database.Action.Flow
(
DataOrigin
(
..
))
data
Database
=
Empty
|
PubMed
|
HAL
|
IsTex
|
Isidore
deriving
(
Eq
,
Show
,
Generic
)
deriveJSON
(
unPrefix
""
)
''
D
atabase
instance
ToSchema
Database
database2origin
::
Database
->
DataOrigin
database2origin
Empty
=
InternalOrigin
T
.
IsTex
database2origin
PubMed
=
ExternalOrigin
T
.
PubMed
database2origin
HAL
=
ExternalOrigin
T
.
HAL
database2origin
IsTex
=
ExternalOrigin
T
.
IsTex
database2origin
Isidore
=
ExternalOrigin
T
.
Isidore
------------------------------------------------------------------------
data
Datafield
=
Gargantext
|
External
(
Maybe
Database
)
|
Web
|
Files
deriving
(
Eq
,
Show
,
Generic
)
instance
FromJSON
Datafield
where
parseJSON
=
withText
"Datafield"
$
\
text
->
case
text
of
"Gargantext"
->
pure
Gargantext
"Web"
->
pure
Web
"Files"
->
pure
Files
v
->
let
(
preExternal
,
_
,
postExternal
)
=
v
=~
(
"External "
::
Text
)
::
(
Text
,
Text
,
Text
)
in
if
preExternal
==
""
then
do
db
<-
parseJSON
$
String
postExternal
pure
$
External
db
else
fail
$
"Cannot match patterh 'External <db>' for string "
++
(
T
.
unpack
v
)
instance
ToJSON
Datafield
where
toJSON
(
External
db
)
=
toJSON
$
"External "
++
(
show
db
)
toJSON
s
=
toJSON
$
show
s
instance
ToSchema
Datafield
where
declareNamedSchema
_
=
do
return
$
NamedSchema
(
Just
"Datafield"
)
$
mempty
&
type_
?~
SwaggerObject
src/Gargantext/API/Node/File.hs
View file @
2b1c8e4e
...
@@ -9,6 +9,14 @@ import Control.Lens ((^.))
...
@@ -9,6 +9,14 @@ import Control.Lens ((^.))
import
Data.Swagger
import
Data.Swagger
import
Data.Text
import
Data.Text
import
GHC.Generics
(
Generic
)
import
GHC.Generics
(
Generic
)
import
Servant
import
Servant.Job.Async
(
JobFunction
(
..
),
serveJobsAPI
)
import
qualified
Data.ByteString
as
BS
import
qualified
Data.ByteString.Lazy
as
BSL
import
qualified
Data.MIME.Types
as
DMT
import
qualified
Gargantext.Database.GargDB
as
GargDB
import
qualified
Network.HTTP.Media
as
M
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Node.Types
import
Gargantext.API.Node.Types
...
@@ -22,13 +30,6 @@ import Gargantext.Database.Query.Table.Node (getNodeWith)
...
@@ -22,13 +30,6 @@ import Gargantext.Database.Query.Table.Node (getNodeWith)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Servant
import
Servant.Job.Async
(
JobFunction
(
..
),
serveJobsAPI
)
import
qualified
Data.ByteString
as
BS
import
qualified
Data.ByteString.Lazy
as
BSL
import
qualified
Data.MIME.Types
as
DMT
import
qualified
Gargantext.Database.GargDB
as
GargDB
import
qualified
Network.HTTP.Media
as
M
data
RESPONSE
deriving
Typeable
data
RESPONSE
deriving
Typeable
...
...
src/Gargantext/API/Node/FrameCalcUpload.hs
0 → 100644
View file @
2b1c8e4e
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TypeOperators #-}
module
Gargantext.API.Node.FrameCalcUpload
where
import
Control.Lens
((
^.
))
import
Data.Aeson
import
qualified
Data.ByteString.Lazy
as
BSL
import
qualified
Data.ByteString.UTF8
as
BSU8
import
Data.Swagger
import
qualified
Data.Text
as
T
import
GHC.Generics
(
Generic
)
import
Network.HTTP.Client
(
newManager
,
httpLbs
,
parseRequest
,
responseBody
)
import
Network.HTTP.Client.TLS
(
tlsManagerSettings
)
import
Servant
import
Servant.Job.Async
import
Web.FormUrlEncoded
(
FromForm
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Job
(
jobLogInit
,
jobLogSuccess
,
jobLogFail
)
import
Gargantext.API.Node.Corpus.New
(
addToCorpusWithForm
)
import
Gargantext.API.Node.Corpus.New.File
(
FileType
(
..
))
import
Gargantext.API.Node.Types
(
NewWithForm
(
..
))
import
Gargantext.API.Prelude
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Database.Action.Flow.Types
import
Gargantext.Database.Admin.Types.Hyperdata.Frame
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Table.Node
(
getClosestParentIdByType
,
getNodeWith
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Prelude
data
FrameCalcUpload
=
FrameCalcUpload
()
deriving
(
Generic
)
instance
FromForm
FrameCalcUpload
instance
FromJSON
FrameCalcUpload
instance
ToJSON
FrameCalcUpload
instance
ToSchema
FrameCalcUpload
type
FrameCalcUploadAPI
=
Summary
" FrameCalc upload"
:>
"add"
:>
"framecalc"
:>
"async"
:>
AsyncJobs
JobLog
'[
J
SON
]
FrameCalcUpload
JobLog
frameCalcUploadAPI
::
UserId
->
NodeId
->
GargServer
FrameCalcUploadAPI
frameCalcUploadAPI
uId
nId
=
serveJobsAPI
$
JobFunction
(
\
p
logs
->
frameCalcUploadAsync
uId
nId
p
(
liftBase
.
logs
)
(
jobLogInit
5
)
)
frameCalcUploadAsync
::
FlowCmdM
env
err
m
=>
UserId
->
NodeId
->
FrameCalcUpload
->
(
JobLog
->
m
()
)
->
JobLog
->
m
JobLog
frameCalcUploadAsync
uId
nId
_f
logStatus
jobLog
=
do
logStatus
jobLog
-- printDebug "[frameCalcUploadAsync] uId" uId
-- printDebug "[frameCalcUploadAsync] nId" nId
node
<-
getNodeWith
nId
(
Proxy
::
Proxy
HyperdataFrame
)
let
(
HyperdataFrame
{
_hf_base
=
base
,
_hf_frame_id
=
frame_id
})
=
node
^.
node_hyperdata
let
csvUrl
=
base
<>
"/"
<>
frame_id
<>
".csv"
-- printDebug "[frameCalcUploadAsync] csvUrl" csvUrl
res
<-
liftBase
$
do
manager
<-
newManager
tlsManagerSettings
req
<-
parseRequest
$
T
.
unpack
csvUrl
httpLbs
req
manager
let
body
=
T
.
pack
$
BSU8
.
toString
$
BSL
.
toStrict
$
responseBody
res
mCId
<-
getClosestParentIdByType
nId
NodeCorpus
-- printDebug "[frameCalcUploadAsync] mCId" mCId
jobLog2
<-
case
mCId
of
Nothing
->
pure
$
jobLogFail
jobLog
Just
cId
->
addToCorpusWithForm
(
RootId
(
NodeId
uId
))
cId
(
NewWithForm
CSV
body
Nothing
"calc-upload.csv"
)
logStatus
jobLog
pure
$
jobLogSuccess
jobLog2
src/Gargantext/API/Node/New.hs
View file @
2b1c8e4e
...
@@ -24,6 +24,12 @@ import Data.Aeson
...
@@ -24,6 +24,12 @@ import Data.Aeson
import
Data.Swagger
import
Data.Swagger
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
GHC.Generics
(
Generic
)
import
GHC.Generics
(
Generic
)
import
Servant
import
Servant.Job.Async
import
Test.QuickCheck
(
elements
)
import
Test.QuickCheck.Arbitrary
import
Web.FormUrlEncoded
(
FromForm
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Prelude
import
Gargantext.API.Prelude
import
Gargantext.Database.Action.Flow.Types
import
Gargantext.Database.Action.Flow.Types
...
@@ -34,11 +40,6 @@ import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
...
@@ -34,11 +40,6 @@ import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import
Gargantext.Database.Query.Table.Node.User
import
Gargantext.Database.Query.Table.Node.User
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Schema.Node
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Servant
import
Servant.Job.Async
import
Test.QuickCheck
(
elements
)
import
Test.QuickCheck.Arbitrary
import
Web.FormUrlEncoded
(
FromForm
)
------------------------------------------------------------------------
------------------------------------------------------------------------
data
PostNode
=
PostNode
{
pn_name
::
Text
data
PostNode
=
PostNode
{
pn_name
::
Text
...
...
src/Gargantext/API/Routes.hs
View file @
2b1c8e4e
...
@@ -27,10 +27,24 @@ import Control.Concurrent (threadDelay)
...
@@ -27,10 +27,24 @@ import Control.Concurrent (threadDelay)
import
Control.Lens
(
view
)
import
Control.Lens
(
view
)
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
Data.Validity
import
Data.Validity
import
Servant
import
Servant.Auth
as
SA
import
Servant.Auth.Swagger
()
import
Servant.Job.Async
import
Servant.Swagger.UI
import
qualified
Gargantext.API.Ngrams.List
as
List
import
qualified
Gargantext.API.Node.Contact
as
Contact
import
qualified
Gargantext.API.Node.Corpus.Annuaire
as
Annuaire
import
qualified
Gargantext.API.Node.Corpus.Export
as
Export
import
qualified
Gargantext.API.Node.Corpus.Export.Types
as
Export
import
qualified
Gargantext.API.Node.Corpus.New
as
New
import
qualified
Gargantext.API.Public
as
Public
import
Gargantext.API.Admin.Auth.Types
(
AuthRequest
,
AuthResponse
,
AuthenticatedUser
(
..
),
PathId
(
..
))
import
Gargantext.API.Admin.Auth.Types
(
AuthRequest
,
AuthResponse
,
AuthenticatedUser
(
..
),
PathId
(
..
))
import
Gargantext.API.Admin.Auth
(
withAccess
)
import
Gargantext.API.Admin.Auth
(
withAccess
)
import
Gargantext.API.Admin.FrontEnd
(
FrontEndAPI
)
import
Gargantext.API.Admin.FrontEnd
(
FrontEndAPI
)
import
Gargantext.API.Count
(
CountAPI
,
count
,
Query
)
import
Gargantext.API.Count
(
CountAPI
,
count
,
Query
)
import
Gargantext.API.Job
(
jobLogInit
)
import
Gargantext.API.Ngrams
(
TableNgramsApi
,
apiNgramsTableDoc
)
import
Gargantext.API.Ngrams
(
TableNgramsApi
,
apiNgramsTableDoc
)
import
Gargantext.API.Node
import
Gargantext.API.Node
import
Gargantext.API.Prelude
import
Gargantext.API.Prelude
...
@@ -41,18 +55,7 @@ import Gargantext.Database.Admin.Types.Hyperdata
...
@@ -41,18 +55,7 @@ import Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Prelude.Config
(
gc_max_docs_scrapers
)
import
Gargantext.Prelude.Config
(
gc_max_docs_scrapers
)
import
Servant
import
Servant.Auth
as
SA
import
Servant.Auth.Swagger
()
import
Servant.Job.Async
import
Servant.Swagger.UI
import
qualified
Gargantext.API.Ngrams.List
as
List
import
qualified
Gargantext.API.Node.Contact
as
Contact
import
qualified
Gargantext.API.Node.Corpus.Annuaire
as
Annuaire
import
qualified
Gargantext.API.Node.Corpus.Export
as
Export
import
qualified
Gargantext.API.Node.Corpus.Export.Types
as
Export
import
qualified
Gargantext.API.Node.Corpus.New
as
New
import
qualified
Gargantext.API.Public
as
Public
type
GargAPI
=
"api"
:>
Summary
"API "
:>
GargAPIVersion
type
GargAPI
=
"api"
:>
Summary
"API "
:>
GargAPIVersion
-- | TODO :<|> Summary "Latest API" :> GargAPI'
-- | TODO :<|> Summary "Latest API" :> GargAPI'
...
@@ -282,9 +285,9 @@ addCorpusWithForm user cid =
...
@@ -282,9 +285,9 @@ addCorpusWithForm user cid =
JobFunction
(
\
i
log'
->
JobFunction
(
\
i
log'
->
let
let
log''
x
=
do
log''
x
=
do
printDebug
"
addToCorpusWithForm
"
x
printDebug
"
[addToCorpusWithForm]
"
x
liftBase
$
log'
x
liftBase
$
log'
x
in
New
.
addToCorpusWithForm
user
cid
i
log''
)
in
New
.
addToCorpusWithForm
user
cid
i
log''
(
jobLogInit
3
)
)
addCorpusWithFile
::
User
->
GargServer
New
.
AddWithFile
addCorpusWithFile
::
User
->
GargServer
New
.
AddWithFile
addCorpusWithFile
user
cid
=
addCorpusWithFile
user
cid
=
...
@@ -292,7 +295,7 @@ addCorpusWithFile user cid =
...
@@ -292,7 +295,7 @@ addCorpusWithFile user cid =
JobFunction
(
\
i
log'
->
JobFunction
(
\
i
log'
->
let
let
log''
x
=
do
log''
x
=
do
printDebug
"
addToCorpusWithFile
"
x
printDebug
"
[addToCorpusWithFile]
"
x
liftBase
$
log'
x
liftBase
$
log'
x
in
New
.
addToCorpusWithFile
user
cid
i
log''
)
in
New
.
addToCorpusWithFile
user
cid
i
log''
)
...
...
src/Gargantext/Core/Ext/IMT.hs
View file @
2b1c8e4e
...
@@ -11,14 +11,17 @@ Portability : POSIX
...
@@ -11,14 +11,17 @@ Portability : POSIX
module
Gargantext.Core.Ext.IMT
where
module
Gargantext.Core.Ext.IMT
where
import
Gargantext.Prelude
import
Data.Either
(
Either
(
..
))
import
Data.Text
(
Text
,
splitOn
)
import
Data.Map
(
Map
)
import
Data.Map
(
Map
)
import
Data.Text
(
Text
,
splitOn
)
import
qualified
Data.Set
as
S
import
qualified
Data.Set
as
S
import
qualified
Data.List
as
DL
import
qualified
Data.List
as
DL
import
qualified
Data.Vector
as
DV
import
qualified
Data.Vector
as
DV
import
qualified
Data.Map
as
M
import
qualified
Data.Map
as
M
import
qualified
Prelude
as
Prelude
import
Gargantext.Prelude
import
Gargantext.Core.Text.Metrics.Utils
as
Utils
import
Gargantext.Core.Text.Metrics.Utils
as
Utils
import
Gargantext.Core.Text.Corpus.Parsers.CSV
as
CSV
import
Gargantext.Core.Text.Corpus.Parsers.CSV
as
CSV
...
@@ -98,8 +101,10 @@ schools = [ School
...
@@ -98,8 +101,10 @@ schools = [ School
mapIdSchool
::
Map
Text
Text
mapIdSchool
::
Map
Text
Text
mapIdSchool
=
M
.
fromList
$
Gargantext
.
Prelude
.
map
(
\
(
School
n
_
i
)
->
(
i
,
n
))
schools
mapIdSchool
=
M
.
fromList
$
Gargantext
.
Prelude
.
map
(
\
(
School
n
_
i
)
->
(
i
,
n
))
schools
hal_data
::
IO
(
DV
.
Vector
CsvHal
)
hal_data
::
IO
(
Either
Prelude
.
String
(
DV
.
Vector
CsvHal
))
hal_data
=
snd
<$>
CSV
.
readCsvHal
"doc/corpus_imt/Gargantext_Corpus.csv"
hal_data
=
do
r
<-
CSV
.
readCsvHal
"doc/corpus_imt/Gargantext_Corpus.csv"
pure
$
snd
<$>
r
names
::
S
.
Set
Text
names
::
S
.
Set
Text
names
=
S
.
fromList
$
Gargantext
.
Prelude
.
map
(
\
s
->
school_id
s
)
schools
names
=
S
.
fromList
$
Gargantext
.
Prelude
.
map
(
\
s
->
school_id
s
)
schools
...
...
src/Gargantext/Core/Text/Convert.hs
View file @
2b1c8e4e
...
@@ -16,15 +16,21 @@ Format Converter.
...
@@ -16,15 +16,21 @@ Format Converter.
module
Gargantext.Core.Text.Convert
(
risPress2csvWrite
)
module
Gargantext.Core.Text.Convert
(
risPress2csvWrite
)
where
where
import
Data.Either
(
Either
(
..
))
import
qualified
Data.Text
as
T
import
System.FilePath
(
FilePath
())
-- , takeExtension)
import
System.FilePath
(
FilePath
())
-- , takeExtension)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
writeDocs2Csv
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
writeDocs2Csv
)
import
Gargantext.Core.Text.Corpus.Parsers
(
parseFile
,
FileFormat
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers
(
parseFile
,
FileFormat
(
..
))
risPress2csvWrite
::
FilePath
->
IO
()
risPress2csvWrite
::
FilePath
->
IO
()
risPress2csvWrite
f
=
parseFile
RisPresse
(
f
<>
".ris"
)
risPress2csvWrite
f
=
do
>>=
\
hs
->
writeDocs2Csv
(
f
<>
".csv"
)
hs
eContents
<-
parseFile
RisPresse
(
f
<>
".ris"
)
case
eContents
of
Right
contents
->
writeDocs2Csv
(
f
<>
".csv"
)
contents
Left
e
->
panic
$
"Error: "
<>
(
T
.
pack
e
)
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
2b1c8e4e
...
@@ -36,18 +36,20 @@ import Data.String()
...
@@ -36,18 +36,20 @@ import Data.String()
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
Data.Text.Encoding
(
decodeUtf8
)
import
Data.Text.Encoding
(
decodeUtf8
)
import
Data.Tuple.Extra
(
both
,
first
,
second
)
import
Data.Tuple.Extra
(
both
,
first
,
second
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
parseHal
,
parseHal'
,
parseCsv
,
parseCsv'
)
import
Gargantext.Core.Text.Corpus.Parsers.RIS.Presse
(
presseEnrich
)
-- import Gargantext.Core.Text.Learn (detectLangDefault)
import
System.FilePath
(
FilePath
(),
takeExtension
)
import
System.FilePath
(
FilePath
(),
takeExtension
)
import
qualified
Data.ByteString
as
DB
import
qualified
Data.ByteString
as
DB
import
qualified
Data.ByteString.Char8
as
DBC
import
qualified
Data.ByteString.Char8
as
DBC
import
qualified
Data.ByteString.Lazy
as
DBL
import
qualified
Data.ByteString.Lazy
as
DBL
import
qualified
Data.Map
as
DM
import
qualified
Data.Map
as
DM
import
qualified
Data.Text
as
DT
import
qualified
Data.Text
as
DT
import
qualified
Prelude
as
Prelude
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
parseHal
,
parseHal'
,
parseCsv
,
parseCsv'
)
import
Gargantext.Core.Text.Corpus.Parsers.RIS.Presse
(
presseEnrich
)
-- import Gargantext.Core.Text.Learn (detectLangDefault)
import
qualified
Gargantext.Core.Text.Corpus.Parsers.Date
as
Date
import
qualified
Gargantext.Core.Text.Corpus.Parsers.Date
as
Date
import
qualified
Gargantext.Core.Text.Corpus.Parsers.RIS
as
RIS
import
qualified
Gargantext.Core.Text.Corpus.Parsers.RIS
as
RIS
import
qualified
Gargantext.Core.Text.Corpus.Parsers.WOS
as
WOS
import
qualified
Gargantext.Core.Text.Corpus.Parsers.WOS
as
WOS
...
@@ -75,30 +77,40 @@ data FileFormat = WOS | RIS | RisPresse
...
@@ -75,30 +77,40 @@ data FileFormat = WOS | RIS | RisPresse
-- | XML -- Not Implemented / see :
-- | XML -- Not Implemented / see :
parseFormat
::
FileFormat
->
DB
.
ByteString
->
IO
[
HyperdataDocument
]
parseFormat
::
FileFormat
->
DB
.
ByteString
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseFormat
CsvGargV3
bs
=
pure
$
parseCsv'
$
DBL
.
fromStrict
bs
parseFormat
CsvGargV3
bs
=
pure
$
parseCsv'
$
DBL
.
fromStrict
bs
parseFormat
CsvHal
bs
=
pure
$
parseHal'
$
DBL
.
fromStrict
bs
parseFormat
CsvHal
bs
=
pure
$
parseHal'
$
DBL
.
fromStrict
bs
parseFormat
RisPresse
bs
=
mapM
(
toDoc
RIS
)
parseFormat
RisPresse
bs
=
do
docs
<-
mapM
(
toDoc
RIS
)
<$>
snd
<$>
snd
<$>
enrichWith
RisPresse
<$>
enrichWith
RisPresse
$
partitionEithers
$
partitionEithers
$
[
runParser'
RisPresse
bs
]
$
[
runParser'
RisPresse
bs
]
parseFormat
WOS
bs
=
mapM
(
toDoc
WOS
)
pure
$
Right
docs
parseFormat
WOS
bs
=
do
docs
<-
mapM
(
toDoc
WOS
)
<$>
snd
<$>
snd
<$>
enrichWith
WOS
<$>
enrichWith
WOS
$
partitionEithers
$
partitionEithers
$
[
runParser'
WOS
bs
]
$
[
runParser'
WOS
bs
]
pure
$
Right
docs
parseFormat
_
_
=
undefined
parseFormat
_
_
=
undefined
-- | Parse file into documents
-- | Parse file into documents
-- TODO manage errors here
-- TODO manage errors here
-- TODO: to debug maybe add the filepath in error message
-- TODO: to debug maybe add the filepath in error message
parseFile
::
FileFormat
->
FilePath
->
IO
[
HyperdataDocument
]
parseFile
::
FileFormat
->
FilePath
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseFile
CsvHal
p
=
parseHal
p
parseFile
CsvHal
p
=
parseHal
p
parseFile
CsvGargV3
p
=
parseCsv
p
parseFile
CsvGargV3
p
=
parseCsv
p
parseFile
RisPresse
p
=
join
$
mapM
(
toDoc
RIS
)
<$>
snd
<$>
enrichWith
RisPresse
<$>
readFileWith
RIS
p
parseFile
RisPresse
p
=
do
parseFile
WOS
p
=
join
$
mapM
(
toDoc
WOS
)
<$>
snd
<$>
enrichWith
WOS
<$>
readFileWith
WOS
p
docs
<-
join
$
mapM
(
toDoc
RIS
)
<$>
snd
<$>
enrichWith
RisPresse
<$>
readFileWith
RIS
p
parseFile
ff
p
=
join
$
mapM
(
toDoc
ff
)
<$>
snd
<$>
enrichWith
ff
<$>
readFileWith
ff
p
pure
$
Right
docs
parseFile
WOS
p
=
do
docs
<-
join
$
mapM
(
toDoc
WOS
)
<$>
snd
<$>
enrichWith
WOS
<$>
readFileWith
WOS
p
pure
$
Right
docs
parseFile
ff
p
=
do
docs
<-
join
$
mapM
(
toDoc
ff
)
<$>
snd
<$>
enrichWith
ff
<$>
readFileWith
ff
p
pure
$
Right
docs
toDoc
::
FileFormat
->
[(
Text
,
Text
)]
->
IO
HyperdataDocument
toDoc
::
FileFormat
->
[(
Text
,
Text
)]
->
IO
HyperdataDocument
-- TODO use language for RIS
-- TODO use language for RIS
...
...
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
View file @
2b1c8e4e
...
@@ -19,7 +19,7 @@ import qualified Data.ByteString as BS
...
@@ -19,7 +19,7 @@ import qualified Data.ByteString as BS
import
qualified
Data.ByteString.Lazy
as
BL
import
qualified
Data.ByteString.Lazy
as
BL
import
Data.Char
(
ord
)
import
Data.Char
(
ord
)
import
Data.Csv
import
Data.Csv
import
Data.Either
(
Either
(
Left
,
Right
))
import
Data.Either
(
Either
(
..
))
import
Data.Text
(
Text
,
pack
,
length
,
intercalate
)
import
Data.Text
(
Text
,
pack
,
length
,
intercalate
)
import
Data.Time.Segment
(
jour
)
import
Data.Time.Segment
(
jour
)
import
qualified
Data.Vector
as
V
import
qualified
Data.Vector
as
V
...
@@ -27,6 +27,8 @@ import Data.Vector (Vector)
...
@@ -27,6 +27,8 @@ import Data.Vector (Vector)
import
GHC.IO
(
FilePath
)
import
GHC.IO
(
FilePath
)
import
GHC.Word
(
Word8
)
import
GHC.Word
(
Word8
)
import
qualified
Prelude
as
Prelude
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
hiding
(
length
)
import
Gargantext.Prelude
hiding
(
length
)
import
Gargantext.Core.Text
import
Gargantext.Core.Text
...
@@ -83,7 +85,7 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
...
@@ -83,7 +85,7 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
-- | Types Conversions
-- | Types Conversions
toDocs
::
Vector
CsvDoc
->
[
CsvGargV3
]
toDocs
::
Vector
CsvDoc
->
[
CsvGargV3
]
toDocs
v
=
V
.
toList
toDocs
v
=
V
.
toList
$
V
.
zipWith
(
\
nId
(
CsvDoc
t
s
py
pm
pd
abst
auth
)
$
V
.
zipWith
(
\
nId
(
CsvDoc
t
s
(
IntOrDec
py
)
pm
pd
abst
auth
)
->
CsvGargV3
nId
t
s
py
pm
pd
abst
auth
)
->
CsvGargV3
nId
t
s
py
pm
pd
abst
auth
)
(
V
.
enumFromN
1
(
V
.
length
v''
))
v''
(
V
.
enumFromN
1
(
V
.
length
v''
))
v''
where
where
...
@@ -94,7 +96,7 @@ toDocs v = V.toList
...
@@ -94,7 +96,7 @@ toDocs v = V.toList
fromDocs
::
Vector
CsvGargV3
->
Vector
CsvDoc
fromDocs
::
Vector
CsvGargV3
->
Vector
CsvDoc
fromDocs
docs
=
V
.
map
fromDocs'
docs
fromDocs
docs
=
V
.
map
fromDocs'
docs
where
where
fromDocs'
(
CsvGargV3
_
t
s
py
pm
pd
abst
auth
)
=
(
CsvDoc
t
s
py
pm
pd
abst
auth
)
fromDocs'
(
CsvGargV3
_
t
s
py
pm
pd
abst
auth
)
=
(
CsvDoc
t
s
(
IntOrDec
py
)
pm
pd
abst
auth
)
---------------------------------------------------------------
---------------------------------------------------------------
-- | Split a document in its context
-- | Split a document in its context
...
@@ -137,10 +139,21 @@ docsSize csvDoc = mean ls
...
@@ -137,10 +139,21 @@ docsSize csvDoc = mean ls
---------------------------------------------------------------
---------------------------------------------------------------
newtype
IntOrDec
=
IntOrDec
Int
deriving
(
Show
,
Eq
,
Read
)
unIntOrDec
::
IntOrDec
->
Int
unIntOrDec
(
IntOrDec
i
)
=
i
instance
FromField
IntOrDec
where
parseField
s
=
case
runParser
(
parseField
s
::
Parser
Int
)
of
Left
_err
->
IntOrDec
<$>
Prelude
.
floor
<$>
(
parseField
s
::
Parser
Double
)
Right
n
->
pure
$
IntOrDec
n
instance
ToField
IntOrDec
where
toField
(
IntOrDec
i
)
=
toField
i
data
CsvDoc
=
CsvDoc
data
CsvDoc
=
CsvDoc
{
csv_title
::
!
Text
{
csv_title
::
!
Text
,
csv_source
::
!
Text
,
csv_source
::
!
Text
,
csv_publication_year
::
!
Int
,
csv_publication_year
::
!
Int
OrDec
,
csv_publication_month
::
!
Int
,
csv_publication_month
::
!
Int
,
csv_publication_day
::
!
Int
,
csv_publication_day
::
!
Int
,
csv_abstract
::
!
Text
,
csv_abstract
::
!
Text
...
@@ -149,13 +162,13 @@ data CsvDoc = CsvDoc
...
@@ -149,13 +162,13 @@ data CsvDoc = CsvDoc
deriving
(
Show
)
deriving
(
Show
)
instance
FromNamedRecord
CsvDoc
where
instance
FromNamedRecord
CsvDoc
where
parseNamedRecord
r
=
CsvDoc
<$>
r
.:
"title"
parseNamedRecord
r
=
CsvDoc
<$>
(
r
.:
"title"
<|>
r
.:
"Title"
)
<*>
r
.:
"source"
<*>
(
r
.:
"source"
<|>
r
.:
"Source"
)
<*>
r
.:
"publication_year"
<*>
(
r
.:
"publication_year"
<|>
r
.:
"Publication Year"
)
<*>
r
.:
"publication_month"
<*>
(
r
.:
"publication_month"
<|>
r
.:
"Publication Month"
)
<*>
r
.:
"publication_day"
<*>
(
r
.:
"publication_day"
<|>
r
.:
"Publication Day"
)
<*>
r
.:
"abstract"
<*>
(
r
.:
"abstract"
<|>
r
.:
"Abstract"
)
<*>
r
.:
"authors"
<*>
(
r
.:
"authors"
<|>
r
.:
"Authors"
)
instance
ToNamedRecord
CsvDoc
where
instance
ToNamedRecord
CsvDoc
where
toNamedRecord
(
CsvDoc
t
s
py
pm
pd
abst
aut
)
=
toNamedRecord
(
CsvDoc
t
s
py
pm
pd
abst
aut
)
=
...
@@ -171,7 +184,7 @@ instance ToNamedRecord CsvDoc where
...
@@ -171,7 +184,7 @@ instance ToNamedRecord CsvDoc where
hyperdataDocument2csvDoc
::
HyperdataDocument
->
CsvDoc
hyperdataDocument2csvDoc
::
HyperdataDocument
->
CsvDoc
hyperdataDocument2csvDoc
h
=
CsvDoc
(
m
$
_hd_title
h
)
hyperdataDocument2csvDoc
h
=
CsvDoc
(
m
$
_hd_title
h
)
(
m
$
_hd_source
h
)
(
m
$
_hd_source
h
)
(
mI
$
_hd_publication_year
h
)
(
IntOrDec
$
mI
$
_hd_publication_year
h
)
(
mI
$
_hd_publication_month
h
)
(
mI
$
_hd_publication_month
h
)
(
mI
$
_hd_publication_day
h
)
(
mI
$
_hd_publication_day
h
)
(
m
$
_hd_abstract
h
)
(
m
$
_hd_abstract
h
)
...
@@ -192,52 +205,47 @@ delimiter :: Word8
...
@@ -192,52 +205,47 @@ delimiter :: Word8
delimiter
=
fromIntegral
$
ord
'
\t
'
delimiter
=
fromIntegral
$
ord
'
\t
'
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
readCsvOn'
::
[
CsvDoc
->
Text
]
->
FilePath
->
IO
[
Text
]
readCsvOn'
::
[
CsvDoc
->
Text
]
->
FilePath
->
IO
(
Either
Prelude
.
String
[
Text
])
readCsvOn'
fields
fp
=
V
.
toList
readCsvOn'
fields
fp
=
do
<$>
V
.
map
(
\
l
->
intercalate
(
pack
" "
)
$
map
(
\
field
->
field
l
)
fields
)
r
<-
readFile
fp
<$>
snd
pure
$
(
V
.
toList
<$>
readFile
fp
.
V
.
map
(
\
l
->
intercalate
(
pack
" "
)
$
map
(
\
field
->
field
l
)
fields
)
.
snd
)
<$>
r
------------------------------------------------------------------------
------------------------------------------------------------------------
readFileLazy
::
(
FromNamedRecord
a
)
=>
proxy
a
->
FilePath
->
IO
(
Header
,
Vector
a
)
readFileLazy
::
(
FromNamedRecord
a
)
=>
proxy
a
->
FilePath
->
IO
(
Either
Prelude
.
String
(
Header
,
Vector
a
)
)
readFileLazy
f
=
fmap
(
readByteStringLazy
f
)
.
BL
.
readFile
readFileLazy
f
=
fmap
(
readByteStringLazy
f
)
.
BL
.
readFile
readFileStrict
::
(
FromNamedRecord
a
)
=>
proxy
a
->
FilePath
->
IO
(
Header
,
Vector
a
)
readFileStrict
::
(
FromNamedRecord
a
)
=>
proxy
a
->
FilePath
->
IO
(
Either
Prelude
.
String
(
Header
,
Vector
a
)
)
readFileStrict
f
=
fmap
(
readByteStringStrict
f
)
.
BS
.
readFile
readFileStrict
f
=
fmap
(
readByteStringStrict
f
)
.
BS
.
readFile
readByteStringLazy
::
(
FromNamedRecord
a
)
=>
proxy
a
->
BL
.
ByteString
->
(
Header
,
Vector
a
)
readByteStringLazy
::
(
FromNamedRecord
a
)
=>
proxy
a
->
BL
.
ByteString
->
Either
Prelude
.
String
(
Header
,
Vector
a
)
readByteStringLazy
_f
bs
=
case
decodeByNameWith
csvDecodeOptions
bs
of
readByteStringLazy
_f
bs
=
decodeByNameWith
csvDecodeOptions
bs
Left
e
->
panic
(
pack
e
)
Right
csvDocs
->
csvDocs
readByteStringStrict
::
(
FromNamedRecord
a
)
=>
proxy
a
->
BS
.
ByteString
->
(
Header
,
Vector
a
)
readByteStringStrict
::
(
FromNamedRecord
a
)
=>
proxy
a
->
BS
.
ByteString
->
Either
Prelude
.
String
(
Header
,
Vector
a
)
readByteStringStrict
ff
=
(
readByteStringLazy
ff
)
.
BL
.
fromStrict
readByteStringStrict
ff
=
(
readByteStringLazy
ff
)
.
BL
.
fromStrict
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | TODO use readFileLazy
-- | TODO use readFileLazy
readFile
::
FilePath
->
IO
(
Header
,
Vector
CsvDoc
)
readFile
::
FilePath
->
IO
(
Either
Prelude
.
String
(
Header
,
Vector
CsvDoc
)
)
readFile
=
fmap
readCsvLazyBS
.
BL
.
readFile
readFile
=
fmap
readCsvLazyBS
.
BL
.
readFile
-- | TODO use readByteStringLazy
-- | TODO use readByteStringLazy
readCsvLazyBS
::
BL
.
ByteString
->
(
Header
,
Vector
CsvDoc
)
readCsvLazyBS
::
BL
.
ByteString
->
Either
Prelude
.
String
(
Header
,
Vector
CsvDoc
)
readCsvLazyBS
bs
=
case
decodeByNameWith
csvDecodeOptions
bs
of
readCsvLazyBS
bs
=
decodeByNameWith
csvDecodeOptions
bs
Left
e
->
panic
(
pack
e
)
Right
csvDocs
->
csvDocs
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | TODO use readFileLazy
-- | TODO use readFileLazy
readCsvHal
::
FilePath
->
IO
(
Header
,
Vector
CsvHal
)
readCsvHal
::
FilePath
->
IO
(
Either
Prelude
.
String
(
Header
,
Vector
CsvHal
)
)
readCsvHal
=
fmap
readCsvHalLazyBS
.
BL
.
readFile
readCsvHal
=
fmap
readCsvHalLazyBS
.
BL
.
readFile
-- | TODO use readByteStringLazy
-- | TODO use readByteStringLazy
readCsvHalLazyBS
::
BL
.
ByteString
->
(
Header
,
Vector
CsvHal
)
readCsvHalLazyBS
::
BL
.
ByteString
->
Either
Prelude
.
String
(
Header
,
Vector
CsvHal
)
readCsvHalLazyBS
bs
=
case
decodeByNameWith
csvDecodeOptions
bs
of
readCsvHalLazyBS
bs
=
decodeByNameWith
csvDecodeOptions
bs
Left
e
->
panic
(
pack
e
)
Right
csvDocs
->
csvDocs
readCsvHalBSStrict
::
BS
.
ByteString
->
(
Header
,
Vector
CsvHal
)
readCsvHalBSStrict
::
BS
.
ByteString
->
Either
Prelude
.
String
(
Header
,
Vector
CsvHal
)
readCsvHalBSStrict
=
readCsvHalLazyBS
.
BL
.
fromStrict
readCsvHalBSStrict
=
readCsvHalLazyBS
.
BL
.
fromStrict
------------------------------------------------------------------------
------------------------------------------------------------------------
...
@@ -360,7 +368,7 @@ csvHal2doc (CsvHal title source
...
@@ -360,7 +368,7 @@ csvHal2doc (CsvHal title source
csv2doc
::
CsvDoc
->
HyperdataDocument
csv2doc
::
CsvDoc
->
HyperdataDocument
csv2doc
(
CsvDoc
title
source
csv2doc
(
CsvDoc
title
source
pub_year
pub_month
pub_day
(
IntOrDec
pub_year
)
pub_month
pub_day
abstract
authors
)
=
HyperdataDocument
(
Just
"CsvHal"
)
abstract
authors
)
=
HyperdataDocument
(
Just
"CsvHal"
)
Nothing
Nothing
Nothing
Nothing
...
@@ -382,18 +390,22 @@ csv2doc (CsvDoc title source
...
@@ -382,18 +390,22 @@ csv2doc (CsvDoc title source
Nothing
Nothing
------------------------------------------------------------------------
------------------------------------------------------------------------
parseHal
::
FilePath
->
IO
[
HyperdataDocument
]
parseHal
::
FilePath
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseHal
fp
=
V
.
toList
<$>
V
.
map
csvHal2doc
<$>
snd
<$>
readCsvHal
fp
parseHal
fp
=
do
r
<-
readCsvHal
fp
pure
$
(
V
.
toList
.
V
.
map
csvHal2doc
.
snd
)
<$>
r
parseHal'
::
BL
.
ByteString
->
[
HyperdataDocument
]
parseHal'
::
BL
.
ByteString
->
Either
Prelude
.
String
[
HyperdataDocument
]
parseHal'
=
V
.
toList
.
V
.
map
csvHal2doc
.
snd
.
readCsvHalLazyBS
parseHal'
bs
=
(
V
.
toList
.
V
.
map
csvHal2doc
.
snd
)
<$>
readCsvHalLazyBS
bs
------------------------------------------------------------------------
------------------------------------------------------------------------
parseCsv
::
FilePath
->
IO
[
HyperdataDocument
]
parseCsv
::
FilePath
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseCsv
fp
=
V
.
toList
<$>
V
.
map
csv2doc
<$>
snd
<$>
readFile
fp
parseCsv
fp
=
do
r
<-
readFile
fp
pure
$
(
V
.
toList
.
V
.
map
csv2doc
.
snd
)
<$>
r
parseCsv'
::
BL
.
ByteString
->
[
HyperdataDocument
]
parseCsv'
::
BL
.
ByteString
->
Either
Prelude
.
String
[
HyperdataDocument
]
parseCsv'
bs
=
V
.
toList
$
V
.
map
csv2doc
$
snd
$
readCsvLazyBS
bs
parseCsv'
bs
=
(
V
.
toList
.
V
.
map
csv2doc
.
snd
)
<$>
readCsvLazyBS
bs
------------------------------------------------------------------------
------------------------------------------------------------------------
-- Csv v3 weighted for phylo
-- Csv v3 weighted for phylo
...
...
src/Gargantext/Core/Text/Terms.hs
View file @
2b1c8e4e
...
@@ -69,7 +69,7 @@ data TermType lang
...
@@ -69,7 +69,7 @@ data TermType lang
,
_tt_ngramsSize
::
!
Int
,
_tt_ngramsSize
::
!
Int
,
_tt_model
::
!
(
Maybe
(
Tries
Token
()
))
,
_tt_model
::
!
(
Maybe
(
Tries
Token
()
))
}
}
deriving
Generic
deriving
(
Generic
)
makeLenses
''
T
ermType
makeLenses
''
T
ermType
--group :: [Text] -> [Text]
--group :: [Text] -> [Text]
...
...
src/Gargantext/Data/HashMap/Strict/Utils.hs
View file @
2b1c8e4e
...
@@ -23,9 +23,6 @@ partitionWithKey :: (Ord a, Hashable k) => (k -> a -> Bool) -> HashMap k a -> (H
...
@@ -23,9 +23,6 @@ partitionWithKey :: (Ord a, Hashable k) => (k -> a -> Bool) -> HashMap k a -> (H
partitionWithKey
p
m
=
(
HashMap
.
filterWithKey
p
m
,
HashMap
.
filterWithKey
(
\
k
->
not
.
p
k
)
m
)
partitionWithKey
p
m
=
(
HashMap
.
filterWithKey
p
m
,
HashMap
.
filterWithKey
(
\
k
->
not
.
p
k
)
m
)
mapKeys
::
(
Ord
k2
,
Hashable
k2
)
=>
(
k1
->
k2
)
->
HashMap
k1
a
->
HashMap
k2
a
mapKeys
f
=
HashMap
.
fromList
.
HashMap
.
foldrWithKey
(
\
k
x
xs
->
(
f
k
,
x
)
:
xs
)
[]
------------------------------------------------------------------------
------------------------------------------------------------------------
-- getKeyWithMaxValue :: Hashable k => HashMap k a -> Maybe k
-- getKeyWithMaxValue :: Hashable k => HashMap k a -> Maybe k
getKeysOrderedByValueMaxFirst
::
(
Ord
k
,
Hashable
k
,
Ord
a
)
=>
HashMap
k
a
->
[
k
]
getKeysOrderedByValueMaxFirst
::
(
Ord
k
,
Hashable
k
,
Ord
a
)
=>
HashMap
k
a
->
[
k
]
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
2b1c8e4e
...
@@ -55,7 +55,7 @@ import Data.Map (Map, lookup)
...
@@ -55,7 +55,7 @@ import Data.Map (Map, lookup)
import
Data.Maybe
(
catMaybes
)
import
Data.Maybe
(
catMaybes
)
import
Data.Monoid
import
Data.Monoid
import
Data.Swagger
import
Data.Swagger
import
Data.Text
(
splitOn
)
import
qualified
Data.Text
as
T
import
Data.Traversable
(
traverse
)
import
Data.Traversable
(
traverse
)
import
Data.Tuple.Extra
(
first
,
second
)
import
Data.Tuple.Extra
(
first
,
second
)
import
GHC.Generics
(
Generic
)
import
GHC.Generics
(
Generic
)
...
@@ -177,11 +177,12 @@ flowCorpusFile :: (FlowCmdM env err m)
...
@@ -177,11 +177,12 @@ flowCorpusFile :: (FlowCmdM env err m)
->
TermType
Lang
->
FileFormat
->
FilePath
->
TermType
Lang
->
FileFormat
->
FilePath
->
m
CorpusId
->
m
CorpusId
flowCorpusFile
u
n
l
la
ff
fp
=
do
flowCorpusFile
u
n
l
la
ff
fp
=
do
docs
<-
liftBase
(
splitEvery
500
eParsed
<-
liftBase
$
parseFile
ff
fp
<$>
take
l
case
eParsed
of
<$>
parseFile
ff
fp
Right
parsed
->
do
)
let
docs
=
splitEvery
500
$
take
l
parsed
flowCorpus
u
n
la
(
map
(
map
toHyperdataDocument
)
docs
)
flowCorpus
u
n
la
(
map
(
map
toHyperdataDocument
)
docs
)
Left
e
->
panic
$
"Error: "
<>
(
T
.
pack
e
)
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | TODO improve the needed type to create/update a corpus
-- | TODO improve the needed type to create/update a corpus
...
@@ -424,11 +425,11 @@ instance ExtractNgramsT HyperdataDocument
...
@@ -424,11 +425,11 @@ instance ExtractNgramsT HyperdataDocument
$
_hd_source
doc
$
_hd_source
doc
institutes
=
map
text2ngrams
institutes
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
map
toSchoolName
.
(
splitOn
", "
))
$
maybe
[
"Nothing"
]
(
map
toSchoolName
.
(
T
.
splitOn
", "
))
$
_hd_institutes
doc
$
_hd_institutes
doc
authors
=
map
text2ngrams
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
splitOn
", "
)
$
maybe
[
"Nothing"
]
(
T
.
splitOn
", "
)
$
_hd_authors
doc
$
_hd_authors
doc
terms'
<-
map
(
enrichedTerms
(
lang'
^.
tt_lang
)
CoreNLP
NP
)
terms'
<-
map
(
enrichedTerms
(
lang'
^.
tt_lang
)
CoreNLP
NP
)
...
...
src/Gargantext/Database/Prelude.hs
View file @
2b1c8e4e
...
@@ -31,7 +31,6 @@ import Database.PostgreSQL.Simple (Connection, connect)
...
@@ -31,7 +31,6 @@ import Database.PostgreSQL.Simple (Connection, connect)
import
Database.PostgreSQL.Simple.FromField
(
Conversion
,
ResultError
(
ConversionFailed
),
fromField
,
returnError
)
import
Database.PostgreSQL.Simple.FromField
(
Conversion
,
ResultError
(
ConversionFailed
),
fromField
,
returnError
)
import
Database.PostgreSQL.Simple.Internal
(
Field
)
import
Database.PostgreSQL.Simple.Internal
(
Field
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Prelude.Config
(
GargConfig
())
import
Opaleye
(
Query
,
Unpackspec
,
showSqlForPostgres
,
FromFields
,
Select
,
runQuery
,
PGJsonb
,
QueryRunnerColumnDefault
)
import
Opaleye
(
Query
,
Unpackspec
,
showSqlForPostgres
,
FromFields
,
Select
,
runQuery
,
PGJsonb
,
QueryRunnerColumnDefault
)
import
Opaleye.Aggregate
(
countRows
)
import
Opaleye.Aggregate
(
countRows
)
import
System.IO
(
FilePath
)
import
System.IO
(
FilePath
)
...
@@ -41,6 +40,8 @@ import qualified Data.ByteString as DB
...
@@ -41,6 +40,8 @@ import qualified Data.ByteString as DB
import
qualified
Data.List
as
DL
import
qualified
Data.List
as
DL
import
qualified
Database.PostgreSQL.Simple
as
PGS
import
qualified
Database.PostgreSQL.Simple
as
PGS
import
Gargantext.Prelude.Config
(
GargConfig
())
-------------------------------------------------------
-------------------------------------------------------
class
HasConnectionPool
env
where
class
HasConnectionPool
env
where
connPool
::
Getter
env
(
Pool
Connection
)
connPool
::
Getter
env
(
Pool
Connection
)
...
...
src/Gargantext/Database/Query/Table/Node.hs
View file @
2b1c8e4e
...
@@ -121,7 +121,7 @@ getClosestParentIdByType :: HasDBid NodeType
...
@@ -121,7 +121,7 @@ getClosestParentIdByType :: HasDBid NodeType
getClosestParentIdByType
nId
nType
=
do
getClosestParentIdByType
nId
nType
=
do
result
<-
runPGSQuery
query
(
nId
,
0
::
Int
)
result
<-
runPGSQuery
query
(
nId
,
0
::
Int
)
case
result
of
case
result
of
[
DPS
.
Only
parentId
,
DPS
.
Only
pTypename
]
->
do
[
(
NodeId
parentId
,
pTypename
)
]
->
do
if
toDBid
nType
==
pTypename
then
if
toDBid
nType
==
pTypename
then
pure
$
Just
$
NodeId
parentId
pure
$
Just
$
NodeId
parentId
else
else
...
...
stack.yaml
View file @
2b1c8e4e
resolver
:
resolver
:
url
:
https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/1
7/10
.yaml
url
:
https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/1
8/4
.yaml
flags
:
{}
flags
:
{}
extra-package-dbs
:
[]
extra-package-dbs
:
[]
packages
:
packages
:
...
@@ -22,6 +22,9 @@ nix:
...
@@ -22,6 +22,9 @@ nix:
allow-newer
:
true
allow-newer
:
true
#ghc-options:
# "$everything": -haddock
extra-deps
:
extra-deps
:
-
git
:
https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
-
git
:
https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
commit
:
3e32ec3aca71eb326805355d3a99b9288dc342ee
commit
:
3e32ec3aca71eb326805355d3a99b9288dc342ee
...
@@ -94,6 +97,7 @@ extra-deps:
...
@@ -94,6 +97,7 @@ extra-deps:
-
json-stream-0.4.2.4@sha256:8b7f17d54a6e1e6311756270f8bcf51e91bab4300945400de66118470dcf51b9,4716
-
json-stream-0.4.2.4@sha256:8b7f17d54a6e1e6311756270f8bcf51e91bab4300945400de66118470dcf51b9,4716
-
located-base-0.1.1.1@sha256:7c6395f2b6fbf2d5f76c3514f774423838c0ea94e1c6a5530dd3c94b30c9d1c8,1904
-
located-base-0.1.1.1@sha256:7c6395f2b6fbf2d5f76c3514f774423838c0ea94e1c6a5530dd3c94b30c9d1c8,1904
-
logging-effect-1.3.12@sha256:72d168dd09887649ba9501627219b6027cbec2d5541931555b7885b133785ce3,1679
-
logging-effect-1.3.12@sha256:72d168dd09887649ba9501627219b6027cbec2d5541931555b7885b133785ce3,1679
-
MissingH-1.4.3.0@sha256:32f9892ec98cd21df4f4d3ed8d95a3831ae74287ea0641d6f09b2dc6ef061d39,4859
-
monoid-extras-0.5.1@sha256:438dbfd7b4dce47d8f0ca577f56caf94bd1e21391afa545cad09fe7cf2e5793d,2333
-
monoid-extras-0.5.1@sha256:438dbfd7b4dce47d8f0ca577f56caf94bd1e21391afa545cad09fe7cf2e5793d,2333
-
rake-0.0.1@sha256:3380f6567fb17505d1095b7f32222c0b631fa04126ad39726c84262da99c08b3,2025
-
rake-0.0.1@sha256:3380f6567fb17505d1095b7f32222c0b631fa04126ad39726c84262da99c08b3,2025
-
servant-cassava-0.10.1@sha256:07e7b6ca67cf57dcb4a0041a399a25d058844505837c6479e01d62be59d01fdf,1665
-
servant-cassava-0.10.1@sha256:07e7b6ca67cf57dcb4a0041a399a25d058844505837c6479e01d62be59d01fdf,1665
...
@@ -103,3 +107,6 @@ extra-deps:
...
@@ -103,3 +107,6 @@ extra-deps:
-
stemmer-0.5.2@sha256:823aec56249ec2619f60a2c0d1384b732894dbbbe642856d337ebfe9629a0efd,4082
-
stemmer-0.5.2@sha256:823aec56249ec2619f60a2c0d1384b732894dbbbe642856d337ebfe9629a0efd,4082
-
xmlbf-0.6.1@sha256:57867fcb39e0514d17b3328ff5de8d241a18482fc89bb742d9ed820a6a2a5187,1540
-
xmlbf-0.6.1@sha256:57867fcb39e0514d17b3328ff5de8d241a18482fc89bb742d9ed820a6a2a5187,1540
-
xmlbf-xeno-0.2@sha256:39f70fced6052524c290cf595f114661c721452e65fc3e0953a44e7682a6a6b0,950
-
xmlbf-xeno-0.2@sha256:39f70fced6052524c290cf595f114661c721452e65fc3e0953a44e7682a6a6b0,950
# need Vector.uncons
-
vector-0.12.3.0@sha256:0ae2c1ba86f0077910be242ec6802cc3d7725fe7b2bea6987201aa3737b239b5,7953
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment