Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
195
Issues
195
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
b13dfc93
Commit
b13dfc93
authored
Jul 27, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[list] CSV parsing draft
parent
63f099cb
Pipeline
#1679
passed with stage
in 49 minutes and 2 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
133 additions
and
56 deletions
+133
-56
List.hs
src/Gargantext/API/Ngrams/List.hs
+73
-56
Types.hs
src/Gargantext/API/Ngrams/List/Types.hs
+54
-0
stack.yaml
stack.yaml
+6
-0
No files found.
src/Gargantext/API/Ngrams/List.hs
View file @
b13dfc93
...
@@ -9,6 +9,7 @@ Portability : POSIX
...
@@ -9,6 +9,7 @@ Portability : POSIX
-}
-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeOperators #-}
...
@@ -17,33 +18,36 @@ module Gargantext.API.Ngrams.List
...
@@ -17,33 +18,36 @@ module Gargantext.API.Ngrams.List
import
Control.Lens
hiding
(
elements
,
Indexed
)
import
Control.Lens
hiding
(
elements
,
Indexed
)
import
Data.Aeson
import
Data.Aeson
import
qualified
Data.ByteString.Lazy
as
BSL
import
qualified
Data.Csv
as
Csv
import
Data.Either
(
Either
(
..
))
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Map
(
toList
,
fromList
)
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.List
as
List
import
Data.Map
(
Map
,
toList
,
fromList
)
import
qualified
Data.Map
as
Map
import
Data.Maybe
(
catMaybes
)
import
Data.Maybe
(
catMaybes
)
import
Data.Set
(
Set
)
import
Data.Set
(
Set
)
import
Data.Swagger
(
ToSchema
,
declareNamedSchema
,
genericDeclareNamedSchema
)
import
Data.Text
(
Text
,
concat
,
pack
)
import
Data.Text
(
Text
,
concat
,
pack
)
import
GHC.Generics
(
Generic
)
import
qualified
Data.Text
as
Text
import
Data.Vector
(
Vector
)
import
qualified
Data.Vector
as
Vec
import
Network.HTTP.Media
((
//
),
(
/:
))
import
Network.HTTP.Media
((
//
),
(
/:
))
import
qualified
Prelude
as
Prelude
import
Servant
import
Servant
import
Servant.Job.Async
import
Servant.Job.Async
import
Servant.Job.Utils
(
jsonOptions
)
import
Web.FormUrlEncoded
(
FromForm
)
import
qualified
Protolude
as
P
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Text
as
Text
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Ngrams
(
getNgramsTableMap
,
setListNgrams
)
import
Gargantext.API.Ngrams
(
getNgramsTableMap
,
setListNgrams
)
import
Gargantext.API.Ngrams.List.Types
import
Gargantext.API.Ngrams.Tools
(
getTermsWith
)
import
Gargantext.API.Ngrams.Tools
(
getTermsWith
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Node.Corpus.New.File
(
FileType
(
..
))
import
Gargantext.API.Prelude
(
GargServer
)
import
Gargantext.API.Prelude
(
GargServer
)
import
Gargantext.Core.Text.Terms
(
ExtractedNgrams
(
..
))
import
Gargantext.Core.Text.Terms
(
ExtractedNgrams
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
buildPatterns
,
termsInText
)
import
Gargantext.Core.Text.Terms.WithList
(
buildPatterns
,
termsInText
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefixSwagger
)
import
Gargantext.Database.Action.Flow
(
saveDocNgramsWith
)
import
Gargantext.Database.Action.Flow
(
saveDocNgramsWith
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getOccByNgramsOnlyFast'
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getOccByNgramsOnlyFast'
)
...
@@ -55,20 +59,6 @@ import Gargantext.Database.Schema.Node
...
@@ -55,20 +59,6 @@ import Gargantext.Database.Schema.Node
import
Gargantext.Database.Types
(
Indexed
(
..
))
import
Gargantext.Database.Types
(
Indexed
(
..
))
import
Gargantext.Prelude
import
Gargantext.Prelude
------------------------------------------------------------------------
type
API
=
Get
'[
J
SON
,
HTML
]
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsList
)
-- :<|> ReqBody '[JSON] NgramsList :> Post '[JSON] Bool
:<|>
PostAPI
:<|>
CSVPostAPI
api
::
ListId
->
GargServer
API
api
l
=
get
l
:<|>
postAsync
l
:<|>
csvPostAsync
l
data
HTML
instance
Accept
HTML
where
contentType
_
=
"text"
//
"html"
/:
(
"charset"
,
"utf-8"
)
instance
ToJSON
a
=>
MimeRender
HTML
a
where
mimeRender
_
=
encode
------------------------------------------------------------------------
------------------------------------------------------------------------
get
::
RepoCmdM
env
err
m
=>
get
::
RepoCmdM
env
err
m
=>
...
@@ -101,15 +91,6 @@ post l m = do
...
@@ -101,15 +91,6 @@ post l m = do
-- TODO reindex
-- TODO reindex
pure
True
pure
True
------------------------------------------------------------------------
csvPost
::
FlowCmdM
env
err
m
=>
ListId
->
NgramsList
->
m
Bool
csvPost
l
m
=
do
printDebug
"[csvPost] l"
l
printDebug
"[csvPost] m"
m
pure
True
-----------------------------------------------------------------------------
-----------------------------------------------------------------------------
-- | Re-index documents of a corpus with new ngrams (called orphans here)
-- | Re-index documents of a corpus with new ngrams (called orphans here)
...
@@ -207,27 +188,64 @@ postAsync' l (WithFile _ m _) logStatus = do
...
@@ -207,27 +188,64 @@ postAsync' l (WithFile _ m _) logStatus = do
,
_scst_events
=
Just
[]
,
_scst_events
=
Just
[]
}
}
------------------------------------------------------------------------
------------------------------------------------------------------------
readCsvText
::
Text
->
[(
Text
,
Text
,
Text
)]
readCsvText
t
=
case
eDec
of
Left
_
->
[]
Right
dec
->
Vec
.
toList
dec
where
lt
=
BSL
.
fromStrict
$
P
.
encodeUtf8
t
eDec
=
Csv
.
decodeWith
(
Csv
.
defaultDecodeOptions
{
Csv
.
decDelimiter
=
fromIntegral
(
P
.
ord
'
\t
'
)
})
Csv
.
HasHeader
lt
::
Either
Prelude
.
String
(
Vector
(
Text
,
Text
,
Text
))
parseCsvData
::
[(
Text
,
Text
,
Text
)]
->
Map
NgramsTerm
NgramsRepoElement
parseCsvData
lst
=
Map
.
fromList
$
conv
<$>
lst
where
conv
(
_status
,
label
,
_forms
)
=
(
NgramsTerm
label
,
NgramsRepoElement
{
_nre_size
=
1
,
_nre_list
=
CandidateTerm
,
_nre_root
=
Nothing
,
_nre_parent
=
Nothing
,
_nre_children
=
MSet
Map
.
empty
})
csvPost
::
FlowCmdM
env
err
m
=>
ListId
->
Text
->
m
Bool
csvPost
l
m
=
do
printDebug
"[csvPost] l"
l
-- printDebug "[csvPost] m" m
-- status label forms
let
lst
=
readCsvText
m
let
p
=
parseCsvData
lst
--printDebug "[csvPost] lst" lst
--printDebug "[csvPost] p" p
_
<-
setListNgrams
l
NgramsTerms
p
pure
True
------------------------------------------------------------------------
type
CSVPostAPI
=
Summary
"Update List (legacy v3 CSV)"
type
CSVPostAPI
=
Summary
"Update List (legacy v3 CSV)"
:>
"csv"
:>
"csv"
:>
"add"
:>
"add"
:>
"form"
:>
"form"
:>
"async"
:>
"async"
:>
AsyncJobs
JobLog
'[
F
ormUrlEncoded
]
WithFile
JobLog
:>
AsyncJobs
JobLog
'[
F
ormUrlEncoded
]
With
Text
File
JobLog
csvPostAsync
::
ListId
->
GargServer
PostAPI
csvPostAsync
::
ListId
->
GargServer
CSV
PostAPI
csvPostAsync
lId
=
csvPostAsync
lId
=
serveJobsAPI
$
serveJobsAPI
$
JobFunction
$
\
f
@
(
WithFile
ft
_
n
)
log'
->
do
JobFunction
$
\
f
@
(
WithTextFile
ft
_
n
)
log'
->
do
printDebug
"[csvPostAsync] filetype"
ft
let
log''
x
=
do
printDebug
"[csvPostAsync] name"
n
printDebug
"[csvPostAsync] filetype"
ft
csvPostAsync'
lId
f
(
liftBase
.
log'
)
printDebug
"[csvPostAsync] name"
n
liftBase
$
log'
x
csvPostAsync'
lId
f
log''
csvPostAsync'
::
FlowCmdM
env
err
m
csvPostAsync'
::
FlowCmdM
env
err
m
=>
ListId
=>
ListId
->
WithFile
->
With
Text
File
->
(
JobLog
->
m
()
)
->
(
JobLog
->
m
()
)
->
m
JobLog
->
m
JobLog
csvPostAsync'
l
(
WithFile
_
m
_
)
logStatus
=
do
csvPostAsync'
l
(
With
Text
File
_
m
_
)
logStatus
=
do
logStatus
JobLog
{
_scst_succeeded
=
Just
0
logStatus
JobLog
{
_scst_succeeded
=
Just
0
,
_scst_failed
=
Just
0
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
1
,
_scst_remaining
=
Just
1
...
@@ -240,19 +258,18 @@ csvPostAsync' l (WithFile _ m _) logStatus = do
...
@@ -240,19 +258,18 @@ csvPostAsync' l (WithFile _ m _) logStatus = do
,
_scst_remaining
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
,
_scst_events
=
Just
[]
}
}
------------------------------------------------------------------------
------------------------------------------------------------------------
type
API
=
Get
'[
J
SON
,
HTML
]
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsList
)
-- :<|> ReqBody '[JSON] NgramsList :> Post '[JSON] Bool
:<|>
PostAPI
:<|>
CSVPostAPI
data
WithFile
=
WithFile
api
::
ListId
->
GargServer
API
{
_wf_filetype
::
!
FileType
api
l
=
get
l
:<|>
postAsync
l
:<|>
csvPostAsync
l
,
_wf_data
::
!
NgramsList
,
_wf_name
::
!
Text
data
HTML
}
deriving
(
Eq
,
Show
,
Generic
)
instance
Accept
HTML
where
contentType
_
=
"text"
//
"html"
/:
(
"charset"
,
"utf-8"
)
makeLenses
''
W
ithFile
instance
ToJSON
a
=>
MimeRender
HTML
a
where
instance
FromForm
WithFile
mimeRender
_
=
encode
instance
FromJSON
WithFile
where
parseJSON
=
genericParseJSON
$
jsonOptions
"_wf_"
instance
ToJSON
WithFile
where
toJSON
=
genericToJSON
$
jsonOptions
"_wf_"
instance
ToSchema
WithFile
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_wf_"
)
src/Gargantext/API/Ngrams/List/Types.hs
0 → 100644
View file @
b13dfc93
module
Gargantext.API.Ngrams.List.Types
where
--{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
--import Control.Lens hiding (elements, Indexed)
import
Data.Aeson
import
Data.Swagger
(
ToSchema
,
declareNamedSchema
,
genericDeclareNamedSchema
)
import
Data.Text
import
Servant.Job.Utils
(
jsonOptions
)
import
Web.FormUrlEncoded
(
FromForm
)
import
Protolude
import
Gargantext.API.Ngrams.Types
(
NgramsList
)
import
Gargantext.API.Node.Corpus.New.File
(
FileType
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefixSwagger
)
------------------------------------------------------------------------
data
WithFile
=
WithFile
{
_wf_filetype
::
!
FileType
,
_wf_data
::
!
NgramsList
,
_wf_name
::
!
Text
}
deriving
(
Eq
,
Show
,
Generic
)
--makeLenses ''WithFile
instance
FromForm
WithFile
instance
FromJSON
WithFile
where
parseJSON
=
genericParseJSON
$
jsonOptions
"_wf_"
instance
ToJSON
WithFile
where
toJSON
=
genericToJSON
$
jsonOptions
"_wf_"
instance
ToSchema
WithFile
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_wf_"
)
------------------------------------------------------------------------
data
WithTextFile
=
WithTextFile
{
_wtf_filetype
::
!
FileType
,
_wtf_data
::
!
Text
,
_wtf_name
::
!
Text
}
deriving
(
Eq
,
Show
,
Generic
)
--makeLenses ''WithTextFile
instance
FromForm
WithTextFile
instance
FromJSON
WithTextFile
where
parseJSON
=
genericParseJSON
$
jsonOptions
"_wtf_"
instance
ToJSON
WithTextFile
where
toJSON
=
genericToJSON
$
jsonOptions
"_wtf_"
instance
ToSchema
WithTextFile
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_wtf_"
)
stack.yaml
View file @
b13dfc93
...
@@ -22,6 +22,9 @@ nix:
...
@@ -22,6 +22,9 @@ nix:
allow-newer
:
true
allow-newer
:
true
#ghc-options:
# "$everything": -haddock
extra-deps
:
extra-deps
:
-
git
:
https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
-
git
:
https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
commit
:
3e32ec3aca71eb326805355d3a99b9288dc342ee
commit
:
3e32ec3aca71eb326805355d3a99b9288dc342ee
...
@@ -103,3 +106,6 @@ extra-deps:
...
@@ -103,3 +106,6 @@ extra-deps:
-
stemmer-0.5.2@sha256:823aec56249ec2619f60a2c0d1384b732894dbbbe642856d337ebfe9629a0efd,4082
-
stemmer-0.5.2@sha256:823aec56249ec2619f60a2c0d1384b732894dbbbe642856d337ebfe9629a0efd,4082
-
xmlbf-0.6.1@sha256:57867fcb39e0514d17b3328ff5de8d241a18482fc89bb742d9ed820a6a2a5187,1540
-
xmlbf-0.6.1@sha256:57867fcb39e0514d17b3328ff5de8d241a18482fc89bb742d9ed820a6a2a5187,1540
-
xmlbf-xeno-0.2@sha256:39f70fced6052524c290cf595f114661c721452e65fc3e0953a44e7682a6a6b0,950
-
xmlbf-xeno-0.2@sha256:39f70fced6052524c290cf595f114661c721452e65fc3e0953a44e7682a6a6b0,950
# need Vector.uncons
-
vector-0.12.3.0@sha256:0ae2c1ba86f0077910be242ec6802cc3d7725fe7b2bea6987201aa3737b239b5,7953
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment