Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
b13dfc93
Commit
b13dfc93
authored
Jul 27, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[list] CSV parsing draft
parent
63f099cb
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
133 additions
and
56 deletions
+133
-56
List.hs
src/Gargantext/API/Ngrams/List.hs
+73
-56
Types.hs
src/Gargantext/API/Ngrams/List/Types.hs
+54
-0
stack.yaml
stack.yaml
+6
-0
No files found.
src/Gargantext/API/Ngrams/List.hs
View file @
b13dfc93
...
@@ -9,6 +9,7 @@ Portability : POSIX
...
@@ -9,6 +9,7 @@ Portability : POSIX
-}
-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeOperators #-}
...
@@ -17,33 +18,36 @@ module Gargantext.API.Ngrams.List
...
@@ -17,33 +18,36 @@ module Gargantext.API.Ngrams.List
import
Control.Lens
hiding
(
elements
,
Indexed
)
import
Control.Lens
hiding
(
elements
,
Indexed
)
import
Data.Aeson
import
Data.Aeson
import
qualified
Data.ByteString.Lazy
as
BSL
import
qualified
Data.Csv
as
Csv
import
Data.Either
(
Either
(
..
))
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Map
(
toList
,
fromList
)
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.List
as
List
import
Data.Map
(
Map
,
toList
,
fromList
)
import
qualified
Data.Map
as
Map
import
Data.Maybe
(
catMaybes
)
import
Data.Maybe
(
catMaybes
)
import
Data.Set
(
Set
)
import
Data.Set
(
Set
)
import
Data.Swagger
(
ToSchema
,
declareNamedSchema
,
genericDeclareNamedSchema
)
import
Data.Text
(
Text
,
concat
,
pack
)
import
Data.Text
(
Text
,
concat
,
pack
)
import
GHC.Generics
(
Generic
)
import
qualified
Data.Text
as
Text
import
Data.Vector
(
Vector
)
import
qualified
Data.Vector
as
Vec
import
Network.HTTP.Media
((
//
),
(
/:
))
import
Network.HTTP.Media
((
//
),
(
/:
))
import
qualified
Prelude
as
Prelude
import
Servant
import
Servant
import
Servant.Job.Async
import
Servant.Job.Async
import
Servant.Job.Utils
(
jsonOptions
)
import
Web.FormUrlEncoded
(
FromForm
)
import
qualified
Protolude
as
P
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Text
as
Text
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Ngrams
(
getNgramsTableMap
,
setListNgrams
)
import
Gargantext.API.Ngrams
(
getNgramsTableMap
,
setListNgrams
)
import
Gargantext.API.Ngrams.List.Types
import
Gargantext.API.Ngrams.Tools
(
getTermsWith
)
import
Gargantext.API.Ngrams.Tools
(
getTermsWith
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Node.Corpus.New.File
(
FileType
(
..
))
import
Gargantext.API.Prelude
(
GargServer
)
import
Gargantext.API.Prelude
(
GargServer
)
import
Gargantext.Core.Text.Terms
(
ExtractedNgrams
(
..
))
import
Gargantext.Core.Text.Terms
(
ExtractedNgrams
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
buildPatterns
,
termsInText
)
import
Gargantext.Core.Text.Terms.WithList
(
buildPatterns
,
termsInText
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefixSwagger
)
import
Gargantext.Database.Action.Flow
(
saveDocNgramsWith
)
import
Gargantext.Database.Action.Flow
(
saveDocNgramsWith
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getOccByNgramsOnlyFast'
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getOccByNgramsOnlyFast'
)
...
@@ -55,20 +59,6 @@ import Gargantext.Database.Schema.Node
...
@@ -55,20 +59,6 @@ import Gargantext.Database.Schema.Node
import
Gargantext.Database.Types
(
Indexed
(
..
))
import
Gargantext.Database.Types
(
Indexed
(
..
))
import
Gargantext.Prelude
import
Gargantext.Prelude
------------------------------------------------------------------------
type
API
=
Get
'[
J
SON
,
HTML
]
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsList
)
-- :<|> ReqBody '[JSON] NgramsList :> Post '[JSON] Bool
:<|>
PostAPI
:<|>
CSVPostAPI
api
::
ListId
->
GargServer
API
api
l
=
get
l
:<|>
postAsync
l
:<|>
csvPostAsync
l
data
HTML
instance
Accept
HTML
where
contentType
_
=
"text"
//
"html"
/:
(
"charset"
,
"utf-8"
)
instance
ToJSON
a
=>
MimeRender
HTML
a
where
mimeRender
_
=
encode
------------------------------------------------------------------------
------------------------------------------------------------------------
get
::
RepoCmdM
env
err
m
=>
get
::
RepoCmdM
env
err
m
=>
...
@@ -101,15 +91,6 @@ post l m = do
...
@@ -101,15 +91,6 @@ post l m = do
-- TODO reindex
-- TODO reindex
pure
True
pure
True
------------------------------------------------------------------------
csvPost
::
FlowCmdM
env
err
m
=>
ListId
->
NgramsList
->
m
Bool
csvPost
l
m
=
do
printDebug
"[csvPost] l"
l
printDebug
"[csvPost] m"
m
pure
True
-----------------------------------------------------------------------------
-----------------------------------------------------------------------------
-- | Re-index documents of a corpus with new ngrams (called orphans here)
-- | Re-index documents of a corpus with new ngrams (called orphans here)
...
@@ -207,27 +188,64 @@ postAsync' l (WithFile _ m _) logStatus = do
...
@@ -207,27 +188,64 @@ postAsync' l (WithFile _ m _) logStatus = do
,
_scst_events
=
Just
[]
,
_scst_events
=
Just
[]
}
}
------------------------------------------------------------------------
------------------------------------------------------------------------
readCsvText
::
Text
->
[(
Text
,
Text
,
Text
)]
readCsvText
t
=
case
eDec
of
Left
_
->
[]
Right
dec
->
Vec
.
toList
dec
where
lt
=
BSL
.
fromStrict
$
P
.
encodeUtf8
t
eDec
=
Csv
.
decodeWith
(
Csv
.
defaultDecodeOptions
{
Csv
.
decDelimiter
=
fromIntegral
(
P
.
ord
'
\t
'
)
})
Csv
.
HasHeader
lt
::
Either
Prelude
.
String
(
Vector
(
Text
,
Text
,
Text
))
parseCsvData
::
[(
Text
,
Text
,
Text
)]
->
Map
NgramsTerm
NgramsRepoElement
parseCsvData
lst
=
Map
.
fromList
$
conv
<$>
lst
where
conv
(
_status
,
label
,
_forms
)
=
(
NgramsTerm
label
,
NgramsRepoElement
{
_nre_size
=
1
,
_nre_list
=
CandidateTerm
,
_nre_root
=
Nothing
,
_nre_parent
=
Nothing
,
_nre_children
=
MSet
Map
.
empty
})
csvPost
::
FlowCmdM
env
err
m
=>
ListId
->
Text
->
m
Bool
csvPost
l
m
=
do
printDebug
"[csvPost] l"
l
-- printDebug "[csvPost] m" m
-- status label forms
let
lst
=
readCsvText
m
let
p
=
parseCsvData
lst
--printDebug "[csvPost] lst" lst
--printDebug "[csvPost] p" p
_
<-
setListNgrams
l
NgramsTerms
p
pure
True
------------------------------------------------------------------------
type
CSVPostAPI
=
Summary
"Update List (legacy v3 CSV)"
type
CSVPostAPI
=
Summary
"Update List (legacy v3 CSV)"
:>
"csv"
:>
"csv"
:>
"add"
:>
"add"
:>
"form"
:>
"form"
:>
"async"
:>
"async"
:>
AsyncJobs
JobLog
'[
F
ormUrlEncoded
]
WithFile
JobLog
:>
AsyncJobs
JobLog
'[
F
ormUrlEncoded
]
With
Text
File
JobLog
csvPostAsync
::
ListId
->
GargServer
PostAPI
csvPostAsync
::
ListId
->
GargServer
CSV
PostAPI
csvPostAsync
lId
=
csvPostAsync
lId
=
serveJobsAPI
$
serveJobsAPI
$
JobFunction
$
\
f
@
(
WithFile
ft
_
n
)
log'
->
do
JobFunction
$
\
f
@
(
WithTextFile
ft
_
n
)
log'
->
do
printDebug
"[csvPostAsync] filetype"
ft
let
log''
x
=
do
printDebug
"[csvPostAsync] name"
n
printDebug
"[csvPostAsync] filetype"
ft
csvPostAsync'
lId
f
(
liftBase
.
log'
)
printDebug
"[csvPostAsync] name"
n
liftBase
$
log'
x
csvPostAsync'
lId
f
log''
csvPostAsync'
::
FlowCmdM
env
err
m
csvPostAsync'
::
FlowCmdM
env
err
m
=>
ListId
=>
ListId
->
WithFile
->
With
Text
File
->
(
JobLog
->
m
()
)
->
(
JobLog
->
m
()
)
->
m
JobLog
->
m
JobLog
csvPostAsync'
l
(
WithFile
_
m
_
)
logStatus
=
do
csvPostAsync'
l
(
With
Text
File
_
m
_
)
logStatus
=
do
logStatus
JobLog
{
_scst_succeeded
=
Just
0
logStatus
JobLog
{
_scst_succeeded
=
Just
0
,
_scst_failed
=
Just
0
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
1
,
_scst_remaining
=
Just
1
...
@@ -240,19 +258,18 @@ csvPostAsync' l (WithFile _ m _) logStatus = do
...
@@ -240,19 +258,18 @@ csvPostAsync' l (WithFile _ m _) logStatus = do
,
_scst_remaining
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
,
_scst_events
=
Just
[]
}
}
------------------------------------------------------------------------
------------------------------------------------------------------------
type
API
=
Get
'[
J
SON
,
HTML
]
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsList
)
-- :<|> ReqBody '[JSON] NgramsList :> Post '[JSON] Bool
:<|>
PostAPI
:<|>
CSVPostAPI
data
WithFile
=
WithFile
api
::
ListId
->
GargServer
API
{
_wf_filetype
::
!
FileType
api
l
=
get
l
:<|>
postAsync
l
:<|>
csvPostAsync
l
,
_wf_data
::
!
NgramsList
,
_wf_name
::
!
Text
data
HTML
}
deriving
(
Eq
,
Show
,
Generic
)
instance
Accept
HTML
where
contentType
_
=
"text"
//
"html"
/:
(
"charset"
,
"utf-8"
)
makeLenses
''
W
ithFile
instance
ToJSON
a
=>
MimeRender
HTML
a
where
instance
FromForm
WithFile
mimeRender
_
=
encode
instance
FromJSON
WithFile
where
parseJSON
=
genericParseJSON
$
jsonOptions
"_wf_"
instance
ToJSON
WithFile
where
toJSON
=
genericToJSON
$
jsonOptions
"_wf_"
instance
ToSchema
WithFile
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_wf_"
)
src/Gargantext/API/Ngrams/List/Types.hs
0 → 100644
View file @
b13dfc93
module
Gargantext.API.Ngrams.List.Types
where
--{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
--import Control.Lens hiding (elements, Indexed)
import
Data.Aeson
import
Data.Swagger
(
ToSchema
,
declareNamedSchema
,
genericDeclareNamedSchema
)
import
Data.Text
import
Servant.Job.Utils
(
jsonOptions
)
import
Web.FormUrlEncoded
(
FromForm
)
import
Protolude
import
Gargantext.API.Ngrams.Types
(
NgramsList
)
import
Gargantext.API.Node.Corpus.New.File
(
FileType
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefixSwagger
)
------------------------------------------------------------------------
data
WithFile
=
WithFile
{
_wf_filetype
::
!
FileType
,
_wf_data
::
!
NgramsList
,
_wf_name
::
!
Text
}
deriving
(
Eq
,
Show
,
Generic
)
--makeLenses ''WithFile
instance
FromForm
WithFile
instance
FromJSON
WithFile
where
parseJSON
=
genericParseJSON
$
jsonOptions
"_wf_"
instance
ToJSON
WithFile
where
toJSON
=
genericToJSON
$
jsonOptions
"_wf_"
instance
ToSchema
WithFile
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_wf_"
)
------------------------------------------------------------------------
data
WithTextFile
=
WithTextFile
{
_wtf_filetype
::
!
FileType
,
_wtf_data
::
!
Text
,
_wtf_name
::
!
Text
}
deriving
(
Eq
,
Show
,
Generic
)
--makeLenses ''WithTextFile
instance
FromForm
WithTextFile
instance
FromJSON
WithTextFile
where
parseJSON
=
genericParseJSON
$
jsonOptions
"_wtf_"
instance
ToJSON
WithTextFile
where
toJSON
=
genericToJSON
$
jsonOptions
"_wtf_"
instance
ToSchema
WithTextFile
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_wtf_"
)
stack.yaml
View file @
b13dfc93
...
@@ -22,6 +22,9 @@ nix:
...
@@ -22,6 +22,9 @@ nix:
allow-newer
:
true
allow-newer
:
true
#ghc-options:
# "$everything": -haddock
extra-deps
:
extra-deps
:
-
git
:
https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
-
git
:
https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
commit
:
3e32ec3aca71eb326805355d3a99b9288dc342ee
commit
:
3e32ec3aca71eb326805355d3a99b9288dc342ee
...
@@ -103,3 +106,6 @@ extra-deps:
...
@@ -103,3 +106,6 @@ extra-deps:
-
stemmer-0.5.2@sha256:823aec56249ec2619f60a2c0d1384b732894dbbbe642856d337ebfe9629a0efd,4082
-
stemmer-0.5.2@sha256:823aec56249ec2619f60a2c0d1384b732894dbbbe642856d337ebfe9629a0efd,4082
-
xmlbf-0.6.1@sha256:57867fcb39e0514d17b3328ff5de8d241a18482fc89bb742d9ed820a6a2a5187,1540
-
xmlbf-0.6.1@sha256:57867fcb39e0514d17b3328ff5de8d241a18482fc89bb742d9ed820a6a2a5187,1540
-
xmlbf-xeno-0.2@sha256:39f70fced6052524c290cf595f114661c721452e65fc3e0953a44e7682a6a6b0,950
-
xmlbf-xeno-0.2@sha256:39f70fced6052524c290cf595f114661c721452e65fc3e0953a44e7682a6a6b0,950
# need Vector.uncons
-
vector-0.12.3.0@sha256:0ae2c1ba86f0077910be242ec6802cc3d7725fe7b2bea6987201aa3737b239b5,7953
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment