Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
7b9de040
Commit
7b9de040
authored
Oct 16, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev' into 86-dev-graphql
parents
e30519a3
f5bb8c77
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
79 additions
and
29 deletions
+79
-29
.envrc
.envrc
+1
-0
.gitignore
.gitignore
+3
-0
install
bin/install
+1
-1
gargantext.ini_toModify
gargantext.ini_toModify
+1
-0
pkgs.nix
nix/pkgs.nix
+2
-0
package.yaml
package.yaml
+1
-1
Job.hs
src/Gargantext/API/Job.hs
+12
-0
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+37
-16
FrameCalcUpload.hs
src/Gargantext/API/Node/FrameCalcUpload.hs
+2
-1
Types.hs
src/Gargantext/API/Node/Types.hs
+1
-1
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+11
-4
Facet.hs
src/Gargantext/Database/Query/Facet.hs
+1
-1
stack.yaml
stack.yaml
+6
-4
No files found.
.envrc
0 → 100644
View file @
7b9de040
use_nix
.gitignore
View file @
7b9de040
...
...
@@ -11,6 +11,7 @@ profiling
# Stack
.stack-work
dist-newstyle
# Emacs
TAGS
...
...
@@ -33,5 +34,7 @@ _darcs
# Runtime
# Repo
repos
repo.json*
tmp*repo*json
data
bin/install
View file @
7b9de040
#!/bin/bash
stack
install
--nix
--profile
--test
--fast
# --haddo
ck
stack
install
--nix
--profile
--test
--fast
--no-install-ghc
--skip-ghc-che
ck
gargantext.ini_toModify
View file @
7b9de040
...
...
@@ -33,6 +33,7 @@ FRAME_VISIO_URL = URL_TO_CHANGE
FRAME_SEARX_URL = URL_TO_CHANGE
FRAME_ISTEX_URL = URL_TO_CHANGE
MAX_DOCS_PARSERS = 1000000
MAX_DOCS_SCRAPERS = 10000
[server]
...
...
nix/pkgs.nix
View file @
7b9de040
...
...
@@ -26,6 +26,8 @@ rec {
blas
gfortran7
# gfortran7.cc.lib
expat
icu
];
libPaths
=
pkgs
.
lib
.
makeLibraryPath
nonhsBuildInputs
;
shellHook
=
''
...
...
package.yaml
View file @
7b9de040
name
:
gargantext
version
:
'
0.0.4.
3
'
version
:
'
0.0.4.
4
'
synopsis
:
Search, map, share
description
:
Please see README.md
category
:
Data
...
...
src/Gargantext/API/Job.hs
View file @
7b9de040
...
...
@@ -3,6 +3,7 @@ module Gargantext.API.Job where
import
Control.Lens
(
over
,
_Just
)
import
Data.IORef
import
Data.Maybe
import
qualified
Data.Text
as
T
import
Gargantext.Prelude
...
...
@@ -16,6 +17,14 @@ jobLogInit rem =
,
_scst_failed
=
Just
0
,
_scst_events
=
Just
[]
}
addEvent
::
T
.
Text
->
T
.
Text
->
JobLog
->
JobLog
addEvent
level
message
(
JobLog
{
_scst_events
=
mEvts
,
..
})
=
JobLog
{
_scst_events
=
Just
(
evts
<>
[
newEvt
]),
..
}
where
evts
=
fromMaybe
[]
mEvts
newEvt
=
ScraperEvent
{
_scev_message
=
Just
message
,
_scev_level
=
Just
level
,
_scev_date
=
Nothing
}
jobLogSuccess
::
JobLog
->
JobLog
jobLogSuccess
jl
=
over
(
scst_succeeded
.
_Just
)
(
+
1
)
$
over
(
scst_remaining
.
_Just
)
(
\
x
->
x
-
1
)
jl
...
...
@@ -38,6 +47,9 @@ jobLogFailTotal (JobLog { _scst_succeeded = mSucc
Nothing
->
(
Nothing
,
mFail
)
Just
rem
->
(
Just
0
,
(
+
rem
)
<$>
mFail
)
jobLogFailTotalWithMessage
::
T
.
Text
->
JobLog
->
JobLog
jobLogFailTotalWithMessage
message
jl
=
addEvent
"ERROR"
message
$
jobLogFailTotal
jl
jobLogEvt
::
JobLog
->
ScraperEvent
->
JobLog
jobLogEvt
jl
evt
=
over
(
scst_events
.
_Just
)
(
\
evts
->
(
evt
:
evts
))
jl
...
...
src/Gargantext/API/Node/Corpus/New.hs
View file @
7b9de040
...
...
@@ -21,25 +21,25 @@ module Gargantext.API.Node.Corpus.New
import
Control.Lens
hiding
(
elements
,
Empty
)
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
qualified
Data.ByteString.Base64
as
BSB64
import
Data.Either
import
Data.Maybe
(
fromMaybe
)
import
Data.Swagger
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
T
import
GHC.Generics
(
Generic
)
import
qualified
Prelude
as
Prelude
import
Protolude
(
readFile
)
import
Servant
import
Servant.Job.Utils
(
jsonOptions
)
-- import Servant.Multipart
import
qualified
Data.Text.Encoding
as
TE
-- import Test.QuickCheck (elements)
import
Test.QuickCheck.Arbitrary
import
Gargantext.Prelude
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
,
ScraperEvent
(
..
),
scst_events
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Job
(
jobLogSuccess
,
jobLogFailTotal
)
import
Gargantext.API.Job
(
jobLogSuccess
,
jobLogFailTotal
,
jobLogFailTotalWithMessage
)
import
Gargantext.API.Node.Corpus.New.File
import
Gargantext.API.Node.Corpus.Searx
import
Gargantext.API.Node.Corpus.Types
...
...
@@ -57,11 +57,12 @@ import Gargantext.Database.Action.Node (mkNodeWithParent)
import
Gargantext.Database.Action.User
(
getUserId
)
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
NodeType
(
..
),
UserId
)
import
Gargantext.Database.Prelude
(
hasConfig
)
import
Gargantext.Database.Query.Table.Node
(
getNodeWith
)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
qualified
Gargantext.Database.GargDB
as
GargDB
import
Gargantext.Prelude.Config
(
gc_max_docs_parsers
)
------------------------------------------------------------------------
{-
data Query = Query { query_query :: Text
...
...
@@ -240,7 +241,7 @@ type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint"
:>
"async"
:>
AsyncJobs
JobLog
'[
F
ormUrlEncoded
]
NewWithForm
JobLog
addToCorpusWithForm
::
FlowCmdM
env
err
m
addToCorpusWithForm
::
(
FlowCmdM
env
err
m
)
=>
User
->
CorpusId
->
NewWithForm
...
...
@@ -258,12 +259,33 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
WOS
->
Parser
.
parseFormat
Parser
.
WOS
PresseRIS
->
Parser
.
parseFormat
Parser
.
RisPresse
ZIP
->
Parser
.
parseFormat
Parser
.
ZIP
-- TODO granularity of the logStatus
eDocs
<-
liftBase
$
parse
$
cs
d
let
data
'
=
case
ft
of
ZIP
->
case
BSB64
.
decode
$
TE
.
encodeUtf8
d
of
Left
err
->
panic
$
T
.
pack
"[addToCorpusWithForm] error decoding base64: "
<>
T
.
pack
err
Right
decoded
->
decoded
_
->
cs
d
eDocs
<-
liftBase
$
parse
data
'
case
eDocs
of
Right
docs'
->
do
let
docs
=
splitEvery
500
$
take
1000000
docs'
-- TODO Add progress (jobStatus) update for docs - this is a
-- long action
limit'
<-
view
$
hasConfig
.
gc_max_docs_parsers
let
limit
=
fromIntegral
limit'
if
length
docs'
>
limit
then
do
printDebug
"[addToCorpusWithForm] number of docs exceeds the limit"
(
show
$
length
docs'
)
let
panicMsg'
=
[
"[addToCorpusWithForm] number of docs ("
,
show
$
length
docs'
,
") exceeds the MAX_DOCS_PARSERS limit ("
,
show
limit
,
")"
]
let
panicMsg
=
T
.
concat
$
T
.
pack
<$>
panicMsg'
logStatus
$
jobLogFailTotalWithMessage
panicMsg
jobLog
panic
panicMsg
else
pure
()
let
docs
=
splitEvery
500
$
take
limit
docs'
printDebug
"Parsing corpus finished : "
cid
logStatus
jobLog2
...
...
@@ -283,20 +305,19 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
logStatus
jobLog3
pure
$
jobLog3
Left
e
->
do
printDebug
"
E
rror"
e
printDebug
"
[addToCorpusWithForm] parse e
rror"
e
logStatus
jobLogE
let
evt
=
ScraperEvent
{
_scev_message
=
Just
$
T
.
pack
e
,
_scev_level
=
Just
"ERROR"
,
_scev_date
=
Nothing
}
logStatus
$
over
(
scst_events
.
_Just
)
(
\
evt'
->
evt'
<>
[
evt
])
jobLogE
pure
jobLogE
where
jobLog2
=
jobLogSuccess
jobLog
jobLog3
=
jobLogSuccess
jobLog2
jobLogE
=
jobLogFailTotal
jobLog
parseCsvGargV3Path
::
[
Char
]
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseCsvGargV3Path
fp
=
do
contents
<-
readFile
fp
Parser
.
parseFormat
Parser
.
CsvGargV3
$
cs
contents
{-
addToCorpusWithFile :: FlowCmdM env err m
=> CorpusId
...
...
src/Gargantext/API/Node/FrameCalcUpload.hs
View file @
7b9de040
...
...
@@ -27,6 +27,7 @@ import Gargantext.Core.Types.Individu (User(..))
import
Gargantext.Database.Action.Flow.Types
import
Gargantext.Database.Admin.Types.Hyperdata.Frame
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Prelude
(
HasConfig
)
import
Gargantext.Database.Query.Table.Node
(
getClosestParentIdByType
,
getNodeWith
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Prelude
...
...
@@ -53,7 +54,7 @@ frameCalcUploadAPI uId nId =
)
frameCalcUploadAsync
::
FlowCmdM
env
err
m
frameCalcUploadAsync
::
(
HasConfig
env
,
FlowCmdM
env
err
m
)
=>
UserId
->
NodeId
->
FrameCalcUpload
...
...
src/Gargantext/API/Node/Types.hs
View file @
7b9de040
...
...
@@ -24,7 +24,7 @@ import Gargantext.API.Node.Corpus.New.File (FileType)
-------------------------------------------------------
data
NewWithForm
=
NewWithForm
{
_wf_filetype
::
!
FileType
,
_wf_data
::
!
Text
,
_wf_data
::
!
Text
-- NOTE for binary files, this represents base-64 data
,
_wf_lang
::
!
(
Maybe
Lang
)
,
_wf_name
::
!
Text
}
deriving
(
Eq
,
Show
,
Generic
)
...
...
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
7b9de040
...
...
@@ -25,7 +25,8 @@ module Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), clean, parseFile, cl
import
"zip"
Codec.Archive.Zip
(
withArchive
,
getEntry
,
getEntries
)
import
Control.Concurrent.Async
as
CCA
(
mapConcurrently
)
import
Control.Monad
(
join
)
import
Control.Monad
(
join
,
sequence
)
import
Control.Monad.IO.Class
(
liftIO
)
import
Data.Attoparsec.ByteString
(
parseOnly
,
Parser
)
import
Data.Either
(
Either
(
..
))
import
Data.Either.Extra
(
partitionEithers
)
...
...
@@ -43,6 +44,7 @@ import qualified Data.ByteString.Lazy as DBL
import
qualified
Data.Map
as
DM
import
qualified
Data.Text
as
DT
import
qualified
Prelude
as
Prelude
import
System.IO.Temp
(
emptySystemTempFile
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
...
...
@@ -95,9 +97,14 @@ parseFormat WOS bs = do
$
partitionEithers
$
[
runParser'
WOS
bs
]
pure
$
Right
docs
parseFormat
ZIP
_bs
=
do
printDebug
"[parseFormat]"
ZIP
pure
$
Left
"Not implemented for ZIP"
parseFormat
ZIP
bs
=
do
path
<-
emptySystemTempFile
"parsed.zip"
DB
.
writeFile
path
bs
withArchive
path
$
do
files
<-
DM
.
keys
<$>
getEntries
filesContents
<-
mapM
getEntry
files
ddocs
<-
liftIO
$
mapM
(
parseFormat
CsvGargV3
)
filesContents
pure
$
concat
<$>
sequence
ddocs
parseFormat
_
_
=
undefined
-- | Parse file into documents
...
...
src/Gargantext/Database/Query/Facet.hs
View file @
7b9de040
...
...
@@ -360,7 +360,7 @@ viewDocuments cId t ntId mQuery = proc () -> do
restrict
-<
if
query
==
""
then
pgBool
True
--else (n^.ns_search) @@ (pgTSQuery (T.unpack query))
else
(
n
^.
ns_search
)
@@
(
toTSQuery
$
T
.
unpack
query
)
else
(
n
^.
ns_search
)
@@
(
plain
toTSQuery
$
T
.
unpack
query
)
returnA
-<
FacetDoc
(
_ns_id
n
)
(
_ns_date
n
)
...
...
stack.yaml
View file @
7b9de040
resolver
:
url
:
https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/1
2
.yaml
url
:
https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/1
3
.yaml
flags
:
{}
extra-package-dbs
:
[]
packages
:
...
...
@@ -11,6 +11,7 @@ packages:
docker
:
enable
:
false
#enable: true
repo
:
'
cgenie/stack-build:lts-18.12-garg'
run-args
:
-
'
--publish=8008:8008'
...
...
@@ -26,8 +27,9 @@ allow-newer: true
# "$everything": -haddock
extra-deps
:
-
git
:
https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
commit
:
3e32ec3aca71eb326805355d3a99b9288dc342ee
-
#git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
git
:
https://gitlab.iscpif.fr/cgenie/haskell-gargantext-prelude.git
commit
:
35b09629a658fc16cc9ff63e7591e58511cd98a7
# Data Mining Libs
-
git
:
https://github.com/delanoe/data-time-segment.git
commit
:
10a416b9f6c443866b36479c3441ebb3bcdeb7ef
...
...
@@ -44,7 +46,7 @@ extra-deps:
# Databases libs
-
git
:
https://github.com/delanoe/haskell-opaleye.git
commit
:
806da7f9fb6fe1032f51c1822fc224b281cdd84f
commit
:
d3ab7acd5ede737478763630035aa880f7e34444
-
git
:
https://github.com/delanoe/hsparql.git
commit
:
308c74b71a1abb0a91546fa57d353131248e3a7f
-
git
:
https://github.com/robstewart57/rdf4h.git
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment