Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
160
Issues
160
List
Board
Labels
Milestones
Merge Requests
8
Merge Requests
8
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
6492bff3
Commit
6492bff3
authored
Mar 19, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[OPTIM] gargantext-import, too much RAM used
parent
b6b50641
Pipeline
#288
failed with stage
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
26 additions
and
7 deletions
+26
-7
Main.hs
bin/gargantext-import/Main.hs
+10
-2
package.yaml
package.yaml
+1
-0
Flow.hs
src/Gargantext/Database/Flow.hs
+6
-0
GrandDebat.hs
src/Gargantext/Text/Parsers/GrandDebat.hs
+8
-5
stack.yaml
stack.yaml
+1
-0
No files found.
bin/gargantext-import/Main.hs
View file @
6492bff3
...
...
@@ -22,7 +22,7 @@ module Main where
import
Control.Exception
(
finally
)
import
Servant
(
ServantErr
)
import
Gargantext.Prelude
import
Gargantext.Database.Flow
(
FlowCmdM
,
flowCorpus
)
import
Gargantext.Database.Flow
(
FlowCmdM
,
flowCorpus
''
)
import
Gargantext.Text.Parsers
(
FileFormat
(
CsvHalFormat
))
import
Gargantext.Database.Utils
(
Cmd
,
)
import
Gargantext.Database.Types.Node
(
CorpusId
)
...
...
@@ -32,6 +32,9 @@ import Gargantext.Core (Lang(..))
import
Gargantext.API.Node
()
-- instances
import
Gargantext.API.Settings
(
newDevEnvWith
,
runCmdDev
,
DevEnv
)
import
System.Environment
(
getArgs
)
import
Gargantext.Text.Parsers.GrandDebat
(
readFile
,
GrandDebatReference
(
..
))
import
qualified
Data.Text
as
Text
import
Control.Monad.IO.Class
(
liftIO
)
main
::
IO
()
main
=
do
...
...
@@ -39,10 +42,15 @@ main = do
{-let createUsers :: Cmd ServantErr Int64
createUsers = insertUsers [gargantuaUser,simpleUser]
-}
let cmdCorpus :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId
cmdCorpus = flowCorpus (cs user) (cs name) (Mono EN) CsvHalFormat corpusPath
-}
let
cmdCorpus
::
forall
m
.
FlowCmdM
DevEnv
ServantErr
m
=>
m
[
CorpusId
]
cmdCorpus
=
do
docs
<-
liftIO
(
splitEvery
1000
<$>
take
5000
<$>
readFile
corpusPath
::
IO
[[
GrandDebatReference
]])
ids
<-
flowCorpus''
(
Text
.
pack
user
)
(
Text
.
pack
name
)
(
Mono
FR
)
docs
pure
ids
-- cmd = {-createUsers >>-} cmdCorpus
...
...
package.yaml
View file @
6492bff3
...
...
@@ -116,6 +116,7 @@ library:
-
insert-ordered-containers
-
jose-jwt
# - kmeans-vector
-
json-stream
-
KMP
-
lens
-
located-base
...
...
src/Gargantext/Database/Flow.hs
View file @
6492bff3
...
...
@@ -80,6 +80,12 @@ flowCorpus :: FlowCmdM env ServantErr m
=>
Username
->
CorpusName
->
TermType
Lang
->
FileFormat
->
FilePath
->
m
CorpusId
flowCorpus
u
cn
la
ff
fp
=
liftIO
(
parseDocs
ff
fp
)
>>=
\
docs
->
flowCorpus'
u
cn
la
docs
--{-
flowCorpus''
::
(
FlowCmdM
env
ServantErr
m
,
ToHyperdataDocument
a
)
=>
Username
->
CorpusName
->
TermType
Lang
->
[[
a
]]
->
m
[
CorpusId
]
flowCorpus''
u
cn
la
docs
=
mapM
(
\
doc
->
flowCorpus'
u
cn
la
doc
)
docs
--}
flowCorpus'
::
(
FlowCmdM
env
ServantErr
m
,
ToHyperdataDocument
a
)
=>
Username
->
CorpusName
->
TermType
Lang
->
[
a
]
->
m
CorpusId
flowCorpus'
u
cn
la
docs
=
do
...
...
src/Gargantext/Text/Parsers/GrandDebat.hs
View file @
6492bff3
...
...
@@ -20,8 +20,10 @@ module Gargantext.Text.Parsers.GrandDebat
where
import
GHC.IO
(
FilePath
)
import
Data.Aeson
(
ToJSON
,
FromJSON
,
decode
)
import
Data.Maybe
(
Maybe
(),
maybe
)
import
Data.Aeson
(
ToJSON
,
FromJSON
)
import
Data.JsonStream.Parser
(
eitherDecode
)
import
Data.Either
(
either
)
import
Data.Maybe
(
Maybe
())
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
Text
import
qualified
Data.ByteString.Lazy
as
DBL
...
...
@@ -76,13 +78,13 @@ instance ToHyperdataDocument GrandDebatReference
responses'
)
=
HyperdataDocument
(
Just
"GrandDebat"
)
id'
Nothing
Nothing
Nothing
Nothing
title'
authorType'
author
ZipCod
e'
authorZipCode'
title'
authorType'
author
Typ
e'
authorZipCode'
(
toAbstract
<$>
responses'
)
publishedAt'
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
(
Just
$
Text
.
pack
$
show
FR
)
where
toAbstract
=
(
Text
.
intercalate
" . "
)
.
(
map
toSentence
)
toAbstract
=
(
Text
.
intercalate
" . "
)
.
(
(
filter
(
/=
""
))
.
(
map
toSentence
)
)
toSentence
(
GrandDebatResponse
_id
_qtitle
_qvalue
r
)
=
case
r
of
Nothing
->
""
Just
r'
->
case
Text
.
length
r'
>
10
of
...
...
@@ -95,4 +97,5 @@ class ReadFile a
instance
ReadFile
[
GrandDebatReference
]
where
readFile
fp
=
maybe
[]
identity
<$>
decode
<$>
DBL
.
readFile
fp
--readFile fp = maybe [] identity <$> decode <$> DBL.readFile fp
readFile
fp
=
either
(
panic
.
Text
.
pack
)
identity
<$>
eitherDecode
<$>
DBL
.
readFile
fp
stack.yaml
View file @
6492bff3
...
...
@@ -35,6 +35,7 @@ extra-deps:
-
probable-0.1.3
-
rake-0.0.1
-
rdf4h-3.1.1
-
json-stream-0.4.2.4
# Text.Parsers (JSON)
-
serialise-0.2.0.0
-
servant-flatten-0.2
-
servant-multipart-0.11.2
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment