Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
f35d84d9
Commit
f35d84d9
authored
Aug 26, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] merge
parent
9a29b3fc
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
113 additions
and
77 deletions
+113
-77
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+8
-2
Main.hs
bin/gargantext-cli/Main.hs
+2
-2
List.hs
src/Gargantext/API/Ngrams/List.hs
+66
-52
CSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
+35
-19
Json2Csv.hs
src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
+2
-2
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
f35d84d9
...
...
@@ -21,6 +21,7 @@ import Crypto.Hash.SHA256 (hash)
import
Data.Aeson
import
Data.Either
(
Either
(
..
))
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.Maybe
(
fromMaybe
)
import
Data.String
(
String
)
import
GHC.IO
(
FilePath
)
import
qualified
Prelude
as
Prelude
...
...
@@ -152,8 +153,13 @@ csvToDocs parser patterns time path =
Right
r
->
pure
$
Vector
.
toList
$
Vector
.
take
limit
$
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
Csv
.
unIntOrDec
$
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
)
time
)
(
toPhyloDate'
(
Csv
.
unIntOrDec
$
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
))
$
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
csv_publication_day
row
)
time
)
(
toPhyloDate'
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
csv_publication_day
row
))
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
[]
...
...
bin/gargantext-cli/Main.hs
View file @
f35d84d9
...
...
@@ -42,7 +42,7 @@ import Gargantext.Core.Types
import
Gargantext.Core.Text.Terms
import
Gargantext.Core.Text.Context
import
Gargantext.Core.Text.Terms.WithList
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
readFile
,
csv_title
,
csv_abstract
,
csv_publication_year
,
unIntOrDec
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
readFile
,
csv_title
,
csv_abstract
,
csv_publication_year
,
unIntOrDec
,
fromMIntOrDec
,
defaultYear
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Terms
(
terms
)
import
Gargantext.Core.Text.Metrics.Count
(
coocOnContexts
,
Coocs
)
...
...
@@ -91,7 +91,7 @@ main = do
Right
cf
->
do
let
corpus
=
DM
.
fromListWith
(
<>
)
.
DV
.
toList
.
DV
.
map
(
\
n
->
(
unIntOrDec
$
csv_publication_year
n
,
[(
csv_title
n
)
<>
" "
<>
(
csv_abstract
n
)]))
.
DV
.
map
(
\
n
->
(
fromMIntOrDec
defaultYear
$
csv_publication_year
n
,
[(
csv_title
n
)
<>
" "
<>
(
csv_abstract
n
)]))
.
snd
$
cf
-- termListMap :: [Text]
...
...
src/Gargantext/API/Ngrams/List.hs
View file @
f35d84d9
...
...
@@ -17,24 +17,23 @@ module Gargantext.API.Ngrams.List
import
Control.Lens
hiding
(
elements
,
Indexed
)
import
Data.Aeson
import
Data.Either
(
Either
(
..
))
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Map
(
toList
,
fromList
)
import
Data.Map
(
Map
,
toList
,
fromList
)
import
Data.Maybe
(
catMaybes
)
import
Data.Set
(
Set
)
import
Data.Swagger
(
ToSchema
,
declareNamedSchema
,
genericDeclareNamedSchema
)
import
Data.Text
(
Text
,
concat
,
pack
)
import
GHC.Generics
(
Generic
)
import
Data.Vector
(
Vector
)
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Ngrams
(
getNgramsTableMap
,
setListNgrams
)
import
Gargantext.API.Ngrams.Tools
(
getTermsWith
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.N
ode.Corpus.New.File
(
FileType
(
..
))
import
Gargantext.API.N
grams.List.Types
import
Gargantext.API.Prelude
(
GargServer
)
import
Gargantext.Core.NodeStory
import
Gargantext.Core.Text.Terms
(
ExtractedNgrams
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
buildPatterns
,
termsInText
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefixSwagger
)
import
Gargantext.Database.Action.Flow
(
saveDocNgramsWith
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getOccByNgramsOnlyFast'
)
...
...
@@ -48,29 +47,25 @@ import Gargantext.Prelude
import
Network.HTTP.Media
((
//
),
(
/:
))
import
Servant
import
Servant.Job.Async
import
Servant.Job.Utils
(
jsonOptions
)
import
Web.FormUrlEncoded
(
FromForm
)
import
qualified
Data.ByteString.Lazy
as
BSL
import
qualified
Data.Csv
as
Csv
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Text
as
Text
import
qualified
Data.Vector
as
Vec
import
qualified
Prelude
as
Prelude
import
qualified
Protolude
as
P
------------------------------------------------------------------------
-- | TODO refactor
{-
type API = Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
-- :<|> ReqBody '[JSON] NgramsList :> Post '[JSON] Bool
:<|> PostAPI
:<|> CSVPostAPI
data
HTML
instance
Accept
HTML
where
contentType
_
=
"text"
//
"html"
/:
(
"charset"
,
"utf-8"
)
instance
ToJSON
a
=>
MimeRender
HTML
a
where
mimeRender
_
=
encode
api :: ListId -> GargServer API
api l = get l :<|> postAsync l :<|> csvPostAsync l
-}
----------------------
type
GETAPI
=
Summary
"Get List"
...
...
@@ -80,6 +75,12 @@ type GETAPI = Summary "Get List"
getApi
::
GargServer
GETAPI
getApi
=
get
data
HTML
instance
Accept
HTML
where
contentType
_
=
"text"
//
"html"
/:
(
"charset"
,
"utf-8"
)
instance
ToJSON
a
=>
MimeRender
HTML
a
where
mimeRender
_
=
encode
----------------------
type
JSONAPI
=
Summary
"Update List"
:>
"lists"
...
...
@@ -100,15 +101,11 @@ type CSVAPI = Summary "Update List (legacy v3 CSV)"
:>
"add"
:>
"form"
:>
"async"
:>
AsyncJobs
JobLog
'[
F
ormUrlEncoded
]
WithFile
JobLog
:>
AsyncJobs
JobLog
'[
F
ormUrlEncoded
]
With
Text
File
JobLog
csvApi
::
GargServer
CSVAPI
csvApi
=
csvPostAsync
----------------------
------------------------------------------------------------------------
get
::
HasNodeStory
env
err
m
=>
ListId
->
m
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsList
)
...
...
@@ -142,16 +139,6 @@ post l m = do
pure
True
------------------------------------------------------------------------
csvPost
::
FlowCmdM
env
err
m
=>
ListId
->
NgramsList
->
m
Bool
csvPost
l
m
=
do
printDebug
"[csvPost] l"
l
printDebug
"[csvPost] m"
m
pure
True
-----------------------------------------------------------------------------
-- | Re-index documents of a corpus with new ngrams (called orphans here)
reIndexWith
::
(
HasNodeStory
env
err
m
,
FlowCmdM
env
err
m
...
...
@@ -254,6 +241,7 @@ postAsync' l (WithFile _ m _) logStatus = do
,
_scst_events
=
Just
[]
}
------------------------------------------------------------------------
type
CSVPostAPI
=
Summary
"Update List (legacy v3 CSV)"
:>
"csv"
:>
"add"
...
...
@@ -261,20 +249,61 @@ type CSVPostAPI = Summary "Update List (legacy v3 CSV)"
:>
"async"
:>
AsyncJobs
JobLog
'[
F
ormUrlEncoded
]
WithFile
JobLog
readCsvText
::
Text
->
[(
Text
,
Text
,
Text
)]
readCsvText
t
=
case
eDec
of
Left
_
->
[]
Right
dec
->
Vec
.
toList
dec
where
lt
=
BSL
.
fromStrict
$
P
.
encodeUtf8
t
eDec
=
Csv
.
decodeWith
(
Csv
.
defaultDecodeOptions
{
Csv
.
decDelimiter
=
fromIntegral
(
P
.
ord
'
\t
'
)
})
Csv
.
HasHeader
lt
::
Either
Prelude
.
String
(
Vector
(
Text
,
Text
,
Text
))
parseCsvData
::
[(
Text
,
Text
,
Text
)]
->
Map
NgramsTerm
NgramsRepoElement
parseCsvData
lst
=
Map
.
fromList
$
conv
<$>
lst
where
conv
(
_status
,
label
,
_forms
)
=
(
NgramsTerm
label
,
NgramsRepoElement
{
_nre_size
=
1
,
_nre_list
=
CandidateTerm
,
_nre_root
=
Nothing
,
_nre_parent
=
Nothing
,
_nre_children
=
MSet
Map
.
empty
})
csvPost
::
FlowCmdM
env
err
m
=>
ListId
->
Text
->
m
Bool
csvPost
l
m
=
do
printDebug
"[csvPost] l"
l
-- printDebug "[csvPost] m" m
-- status label forms
let
lst
=
readCsvText
m
let
p
=
parseCsvData
lst
--printDebug "[csvPost] lst" lst
printDebug
"[csvPost] p"
p
_
<-
setListNgrams
l
NgramsTerms
p
pure
True
------------------------------------------------------------------------
csvPostAsync
::
GargServer
CSVAPI
csvPostAsync
lId
=
serveJobsAPI
$
JobFunction
$
\
f
@
(
WithFile
ft
_
n
)
log'
->
do
printDebug
"[csvPostAsync] filetype"
ft
printDebug
"[csvPostAsync] name"
n
csvPostAsync'
lId
f
(
liftBase
.
log'
)
JobFunction
$
\
f
@
(
WithTextFile
ft
_
n
)
log'
->
do
let
log''
x
=
do
printDebug
"[csvPostAsync] filetype"
ft
printDebug
"[csvPostAsync] name"
n
liftBase
$
log'
x
csvPostAsync'
lId
f
log''
csvPostAsync'
::
FlowCmdM
env
err
m
=>
ListId
->
WithFile
->
With
Text
File
->
(
JobLog
->
m
()
)
->
m
JobLog
csvPostAsync'
l
(
WithFile
_
m
_
)
logStatus
=
do
csvPostAsync'
l
(
With
Text
File
_
m
_
)
logStatus
=
do
logStatus
JobLog
{
_scst_succeeded
=
Just
0
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
1
...
...
@@ -288,18 +317,3 @@ csvPostAsync' l (WithFile _ m _) logStatus = do
,
_scst_events
=
Just
[]
}
------------------------------------------------------------------------
data
WithFile
=
WithFile
{
_wf_filetype
::
!
FileType
,
_wf_data
::
!
NgramsList
,
_wf_name
::
!
Text
}
deriving
(
Eq
,
Show
,
Generic
)
makeLenses
''
W
ithFile
instance
FromForm
WithFile
instance
FromJSON
WithFile
where
parseJSON
=
genericParseJSON
$
jsonOptions
"_wf_"
instance
ToJSON
WithFile
where
toJSON
=
genericToJSON
$
jsonOptions
"_wf_"
instance
ToSchema
WithFile
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_wf_"
)
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
View file @
f35d84d9
...
...
@@ -20,6 +20,7 @@ import qualified Data.ByteString.Lazy as BL
import
Data.Char
(
ord
)
import
Data.Csv
import
Data.Either
(
Either
(
..
))
import
Data.Maybe
(
fromMaybe
)
import
Data.Text
(
Text
,
pack
,
length
,
intercalate
)
import
Data.Time.Segment
(
jour
)
import
qualified
Data.Vector
as
V
...
...
@@ -85,8 +86,10 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
-- | Types Conversions
toDocs
::
Vector
CsvDoc
->
[
CsvGargV3
]
toDocs
v
=
V
.
toList
$
V
.
zipWith
(
\
nId
(
CsvDoc
t
s
(
IntOrDec
py
)
pm
pd
abst
auth
)
->
CsvGargV3
nId
t
s
py
pm
pd
abst
auth
)
$
V
.
zipWith
(
\
nId
(
CsvDoc
t
s
mPy
pm
pd
abst
auth
)
->
CsvGargV3
nId
t
s
(
fromMIntOrDec
defaultYear
mPy
)
(
fromMaybe
defaultMonth
pm
)
(
fromMaybe
defaultDay
pd
)
abst
auth
)
(
V
.
enumFromN
1
(
V
.
length
v''
))
v''
where
v''
=
V
.
foldl
(
\
v'
sep
->
V
.
concatMap
(
splitDoc
(
docsSize
v'
)
sep
)
v'
)
v
seps
...
...
@@ -96,7 +99,7 @@ toDocs v = V.toList
fromDocs
::
Vector
CsvGargV3
->
Vector
CsvDoc
fromDocs
docs
=
V
.
map
fromDocs'
docs
where
fromDocs'
(
CsvGargV3
_
t
s
py
pm
pd
abst
auth
)
=
(
CsvDoc
t
s
(
IntOrDec
py
)
pm
pd
abst
auth
)
fromDocs'
(
CsvGargV3
_
t
s
py
pm
pd
abst
auth
)
=
(
CsvDoc
t
s
(
Just
$
IntOrDec
py
)
(
Just
pm
)
(
Just
pd
)
abst
auth
)
---------------------------------------------------------------
-- | Split a document in its context
...
...
@@ -150,12 +153,21 @@ instance FromField IntOrDec where
instance
ToField
IntOrDec
where
toField
(
IntOrDec
i
)
=
toField
i
fromMIntOrDec
::
Int
->
Maybe
IntOrDec
->
Int
fromMIntOrDec
default
'
m
Val
=
unIntOrDec
$
fromMaybe
(
IntOrDec
default
')
mVal
defaultYear
::
Int
defaultYear
=
1973
defaultMonth
::
Int
defaultMonth
=
1
defaultDay
::
Int
defaultDay
=
1
data
CsvDoc
=
CsvDoc
{
csv_title
::
!
Text
,
csv_source
::
!
Text
,
csv_publication_year
::
!
IntOrDec
,
csv_publication_month
::
!
Int
,
csv_publication_day
::
!
Int
{
csv_title
::
!
Text
,
csv_source
::
!
Text
,
csv_publication_year
::
!
(
Maybe
IntOrDec
)
,
csv_publication_month
::
!
(
Maybe
Int
)
,
csv_publication_day
::
!
(
Maybe
Int
)
,
csv_abstract
::
!
Text
,
csv_authors
::
!
Text
}
...
...
@@ -172,21 +184,21 @@ instance FromNamedRecord CsvDoc where
instance
ToNamedRecord
CsvDoc
where
toNamedRecord
(
CsvDoc
t
s
py
pm
pd
abst
aut
)
=
namedRecord
[
"title"
.=
t
,
"source"
.=
s
namedRecord
[
"title"
.=
t
,
"source"
.=
s
,
"publication_year"
.=
py
,
"publication_month"
.=
pm
,
"publication_day"
.=
pd
,
"abstract"
.=
abst
,
"authors"
.=
aut
]
]
hyperdataDocument2csvDoc
::
HyperdataDocument
->
CsvDoc
hyperdataDocument2csvDoc
h
=
CsvDoc
(
m
$
_hd_title
h
)
(
m
$
_hd_source
h
)
(
IntOrDec
$
mI
$
_hd_publication_year
h
)
(
mI
$
_hd_publication_month
h
)
(
mI
$
_hd_publication_day
h
)
(
Just
$
IntOrDec
$
mI
$
_hd_publication_year
h
)
(
Just
$
mI
$
_hd_publication_month
h
)
(
Just
$
mI
$
_hd_publication_day
h
)
(
m
$
_hd_abstract
h
)
(
m
$
_hd_authors
h
)
...
...
@@ -368,7 +380,7 @@ csvHal2doc (CsvHal title source
csv2doc
::
CsvDoc
->
HyperdataDocument
csv2doc
(
CsvDoc
title
source
(
IntOrDec
pub_year
)
pub_month
pub_d
ay
mPubYear
mPubMonth
mPubD
ay
abstract
authors
)
=
HyperdataDocument
(
Just
"CsvHal"
)
Nothing
Nothing
...
...
@@ -380,14 +392,18 @@ csv2doc (CsvDoc title source
Nothing
(
Just
source
)
(
Just
abstract
)
(
Just
$
pack
.
show
$
jour
(
fromIntegral
pub
_year
)
pub_month
pub_d
ay
)
(
Just
$
fromIntegral
pub_y
ear
)
(
Just
pub
_m
onth
)
(
Just
pub
_d
ay
)
(
Just
$
pack
.
show
$
jour
(
fromIntegral
pub
Year
)
pubMonth
pubD
ay
)
(
Just
pubY
ear
)
(
Just
pub
M
onth
)
(
Just
pub
D
ay
)
Nothing
Nothing
Nothing
Nothing
where
pubYear
=
fromMIntOrDec
defaultYear
mPubYear
pubMonth
=
fromMaybe
defaultMonth
mPubMonth
pubDay
=
fromMaybe
defaultDay
mPubDay
------------------------------------------------------------------------
parseHal
::
FilePath
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
View file @
f35d84d9
...
...
@@ -28,7 +28,7 @@ import System.IO (FilePath)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
CsvDoc
(
..
),
writeFile
,
headerCsvGargV3
)
import
Data.Vector
(
fromList
)
data
Patent
=
Patent
{
_patent_title
::
Text
data
Patent
=
Patent
{
_patent_title
::
Text
,
_patent_abstract
::
Text
,
_patent_year
::
Text
,
_patent_id
::
Text
...
...
@@ -49,7 +49,7 @@ json2csv fin fout = do
patent2csvDoc
::
Patent
->
CsvDoc
patent2csvDoc
(
Patent
title
abstract
year
_
)
=
CsvDoc
title
"Source"
(
read
(
unpack
year
))
1
1
abstract
"Authors"
CsvDoc
title
"Source"
(
Just
$
read
(
unpack
year
))
(
Just
1
)
(
Just
1
)
abstract
"Authors"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment