Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
7b9de040
Commit
7b9de040
authored
Oct 16, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev' into 86-dev-graphql
parents
e30519a3
f5bb8c77
Pipeline
#1982
failed with stage
in 10 minutes and 22 seconds
Changes
13
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
79 additions
and
29 deletions
+79
-29
.envrc
.envrc
+1
-0
.gitignore
.gitignore
+3
-0
install
bin/install
+1
-1
gargantext.ini_toModify
gargantext.ini_toModify
+1
-0
pkgs.nix
nix/pkgs.nix
+2
-0
package.yaml
package.yaml
+1
-1
Job.hs
src/Gargantext/API/Job.hs
+12
-0
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+37
-16
FrameCalcUpload.hs
src/Gargantext/API/Node/FrameCalcUpload.hs
+2
-1
Types.hs
src/Gargantext/API/Node/Types.hs
+1
-1
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+11
-4
Facet.hs
src/Gargantext/Database/Query/Facet.hs
+1
-1
stack.yaml
stack.yaml
+6
-4
No files found.
.envrc
0 → 100644
View file @
7b9de040
use_nix
.gitignore
View file @
7b9de040
...
...
@@ -11,6 +11,7 @@ profiling
# Stack
.stack-work
dist-newstyle
# Emacs
TAGS
...
...
@@ -33,5 +34,7 @@ _darcs
# Runtime
# Repo
repos
repo.json*
tmp*repo*json
data
bin/install
View file @
7b9de040
#!/bin/bash
stack
install
--nix
--profile
--test
--fast
# --haddo
ck
stack
install
--nix
--profile
--test
--fast
--no-install-ghc
--skip-ghc-che
ck
gargantext.ini_toModify
View file @
7b9de040
...
...
@@ -33,6 +33,7 @@ FRAME_VISIO_URL = URL_TO_CHANGE
FRAME_SEARX_URL = URL_TO_CHANGE
FRAME_ISTEX_URL = URL_TO_CHANGE
MAX_DOCS_PARSERS = 1000000
MAX_DOCS_SCRAPERS = 10000
[server]
...
...
nix/pkgs.nix
View file @
7b9de040
...
...
@@ -26,6 +26,8 @@ rec {
blas
gfortran7
# gfortran7.cc.lib
expat
icu
];
libPaths
=
pkgs
.
lib
.
makeLibraryPath
nonhsBuildInputs
;
shellHook
=
''
...
...
package.yaml
View file @
7b9de040
name
:
gargantext
version
:
'
0.0.4.
3
'
version
:
'
0.0.4.
4
'
synopsis
:
Search, map, share
description
:
Please see README.md
category
:
Data
...
...
src/Gargantext/API/Job.hs
View file @
7b9de040
...
...
@@ -3,6 +3,7 @@ module Gargantext.API.Job where
import
Control.Lens
(
over
,
_Just
)
import
Data.IORef
import
Data.Maybe
import
qualified
Data.Text
as
T
import
Gargantext.Prelude
...
...
@@ -16,6 +17,14 @@ jobLogInit rem =
,
_scst_failed
=
Just
0
,
_scst_events
=
Just
[]
}
addEvent
::
T
.
Text
->
T
.
Text
->
JobLog
->
JobLog
addEvent
level
message
(
JobLog
{
_scst_events
=
mEvts
,
..
})
=
JobLog
{
_scst_events
=
Just
(
evts
<>
[
newEvt
]),
..
}
where
evts
=
fromMaybe
[]
mEvts
newEvt
=
ScraperEvent
{
_scev_message
=
Just
message
,
_scev_level
=
Just
level
,
_scev_date
=
Nothing
}
jobLogSuccess
::
JobLog
->
JobLog
jobLogSuccess
jl
=
over
(
scst_succeeded
.
_Just
)
(
+
1
)
$
over
(
scst_remaining
.
_Just
)
(
\
x
->
x
-
1
)
jl
...
...
@@ -38,6 +47,9 @@ jobLogFailTotal (JobLog { _scst_succeeded = mSucc
Nothing
->
(
Nothing
,
mFail
)
Just
rem
->
(
Just
0
,
(
+
rem
)
<$>
mFail
)
jobLogFailTotalWithMessage
::
T
.
Text
->
JobLog
->
JobLog
jobLogFailTotalWithMessage
message
jl
=
addEvent
"ERROR"
message
$
jobLogFailTotal
jl
jobLogEvt
::
JobLog
->
ScraperEvent
->
JobLog
jobLogEvt
jl
evt
=
over
(
scst_events
.
_Just
)
(
\
evts
->
(
evt
:
evts
))
jl
...
...
src/Gargantext/API/Node/Corpus/New.hs
View file @
7b9de040
...
...
@@ -21,25 +21,25 @@ module Gargantext.API.Node.Corpus.New
import
Control.Lens
hiding
(
elements
,
Empty
)
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
qualified
Data.ByteString.Base64
as
BSB64
import
Data.Either
import
Data.Maybe
(
fromMaybe
)
import
Data.Swagger
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
T
import
GHC.Generics
(
Generic
)
import
qualified
Prelude
as
Prelude
import
Protolude
(
readFile
)
import
Servant
import
Servant.Job.Utils
(
jsonOptions
)
-- import Servant.Multipart
import
qualified
Data.Text.Encoding
as
TE
-- import Test.QuickCheck (elements)
import
Test.QuickCheck.Arbitrary
import
Gargantext.Prelude
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
,
ScraperEvent
(
..
),
scst_events
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Job
(
jobLogSuccess
,
jobLogFailTotal
)
import
Gargantext.API.Job
(
jobLogSuccess
,
jobLogFailTotal
,
jobLogFailTotalWithMessage
)
import
Gargantext.API.Node.Corpus.New.File
import
Gargantext.API.Node.Corpus.Searx
import
Gargantext.API.Node.Corpus.Types
...
...
@@ -57,11 +57,12 @@ import Gargantext.Database.Action.Node (mkNodeWithParent)
import
Gargantext.Database.Action.User
(
getUserId
)
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
NodeType
(
..
),
UserId
)
import
Gargantext.Database.Prelude
(
hasConfig
)
import
Gargantext.Database.Query.Table.Node
(
getNodeWith
)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
qualified
Gargantext.Database.GargDB
as
GargDB
import
Gargantext.Prelude.Config
(
gc_max_docs_parsers
)
------------------------------------------------------------------------
{-
data Query = Query { query_query :: Text
...
...
@@ -240,7 +241,7 @@ type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint"
:>
"async"
:>
AsyncJobs
JobLog
'[
F
ormUrlEncoded
]
NewWithForm
JobLog
addToCorpusWithForm
::
FlowCmdM
env
err
m
addToCorpusWithForm
::
(
FlowCmdM
env
err
m
)
=>
User
->
CorpusId
->
NewWithForm
...
...
@@ -258,12 +259,33 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
WOS
->
Parser
.
parseFormat
Parser
.
WOS
PresseRIS
->
Parser
.
parseFormat
Parser
.
RisPresse
ZIP
->
Parser
.
parseFormat
Parser
.
ZIP
-- TODO granularity of the logStatus
eDocs
<-
liftBase
$
parse
$
cs
d
let
data
'
=
case
ft
of
ZIP
->
case
BSB64
.
decode
$
TE
.
encodeUtf8
d
of
Left
err
->
panic
$
T
.
pack
"[addToCorpusWithForm] error decoding base64: "
<>
T
.
pack
err
Right
decoded
->
decoded
_
->
cs
d
eDocs
<-
liftBase
$
parse
data
'
case
eDocs
of
Right
docs'
->
do
let
docs
=
splitEvery
500
$
take
1000000
docs'
-- TODO Add progress (jobStatus) update for docs - this is a
-- long action
limit'
<-
view
$
hasConfig
.
gc_max_docs_parsers
let
limit
=
fromIntegral
limit'
if
length
docs'
>
limit
then
do
printDebug
"[addToCorpusWithForm] number of docs exceeds the limit"
(
show
$
length
docs'
)
let
panicMsg'
=
[
"[addToCorpusWithForm] number of docs ("
,
show
$
length
docs'
,
") exceeds the MAX_DOCS_PARSERS limit ("
,
show
limit
,
")"
]
let
panicMsg
=
T
.
concat
$
T
.
pack
<$>
panicMsg'
logStatus
$
jobLogFailTotalWithMessage
panicMsg
jobLog
panic
panicMsg
else
pure
()
let
docs
=
splitEvery
500
$
take
limit
docs'
printDebug
"Parsing corpus finished : "
cid
logStatus
jobLog2
...
...
@@ -283,20 +305,19 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
logStatus
jobLog3
pure
$
jobLog3
Left
e
->
do
printDebug
"
E
rror"
e
printDebug
"
[addToCorpusWithForm] parse e
rror"
e
logStatus
jobLogE
let
evt
=
ScraperEvent
{
_scev_message
=
Just
$
T
.
pack
e
,
_scev_level
=
Just
"ERROR"
,
_scev_date
=
Nothing
}
logStatus
$
over
(
scst_events
.
_Just
)
(
\
evt'
->
evt'
<>
[
evt
])
jobLogE
pure
jobLogE
where
jobLog2
=
jobLogSuccess
jobLog
jobLog3
=
jobLogSuccess
jobLog2
jobLogE
=
jobLogFailTotal
jobLog
parseCsvGargV3Path
::
[
Char
]
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseCsvGargV3Path
fp
=
do
contents
<-
readFile
fp
Parser
.
parseFormat
Parser
.
CsvGargV3
$
cs
contents
{-
addToCorpusWithFile :: FlowCmdM env err m
=> CorpusId
...
...
src/Gargantext/API/Node/FrameCalcUpload.hs
View file @
7b9de040
...
...
@@ -27,6 +27,7 @@ import Gargantext.Core.Types.Individu (User(..))
import
Gargantext.Database.Action.Flow.Types
import
Gargantext.Database.Admin.Types.Hyperdata.Frame
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Prelude
(
HasConfig
)
import
Gargantext.Database.Query.Table.Node
(
getClosestParentIdByType
,
getNodeWith
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Prelude
...
...
@@ -53,7 +54,7 @@ frameCalcUploadAPI uId nId =
)
frameCalcUploadAsync
::
FlowCmdM
env
err
m
frameCalcUploadAsync
::
(
HasConfig
env
,
FlowCmdM
env
err
m
)
=>
UserId
->
NodeId
->
FrameCalcUpload
...
...
src/Gargantext/API/Node/Types.hs
View file @
7b9de040
...
...
@@ -24,7 +24,7 @@ import Gargantext.API.Node.Corpus.New.File (FileType)
-------------------------------------------------------
data
NewWithForm
=
NewWithForm
{
_wf_filetype
::
!
FileType
,
_wf_data
::
!
Text
,
_wf_data
::
!
Text
-- NOTE for binary files, this represents base-64 data
,
_wf_lang
::
!
(
Maybe
Lang
)
,
_wf_name
::
!
Text
}
deriving
(
Eq
,
Show
,
Generic
)
...
...
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
7b9de040
...
...
@@ -25,7 +25,8 @@ module Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), clean, parseFile, cl
import
"zip"
Codec.Archive.Zip
(
withArchive
,
getEntry
,
getEntries
)
import
Control.Concurrent.Async
as
CCA
(
mapConcurrently
)
import
Control.Monad
(
join
)
import
Control.Monad
(
join
,
sequence
)
import
Control.Monad.IO.Class
(
liftIO
)
import
Data.Attoparsec.ByteString
(
parseOnly
,
Parser
)
import
Data.Either
(
Either
(
..
))
import
Data.Either.Extra
(
partitionEithers
)
...
...
@@ -43,6 +44,7 @@ import qualified Data.ByteString.Lazy as DBL
import
qualified
Data.Map
as
DM
import
qualified
Data.Text
as
DT
import
qualified
Prelude
as
Prelude
import
System.IO.Temp
(
emptySystemTempFile
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
...
...
@@ -95,9 +97,14 @@ parseFormat WOS bs = do
$
partitionEithers
$
[
runParser'
WOS
bs
]
pure
$
Right
docs
parseFormat
ZIP
_bs
=
do
printDebug
"[parseFormat]"
ZIP
pure
$
Left
"Not implemented for ZIP"
parseFormat
ZIP
bs
=
do
path
<-
emptySystemTempFile
"parsed.zip"
DB
.
writeFile
path
bs
withArchive
path
$
do
files
<-
DM
.
keys
<$>
getEntries
filesContents
<-
mapM
getEntry
files
ddocs
<-
liftIO
$
mapM
(
parseFormat
CsvGargV3
)
filesContents
pure
$
concat
<$>
sequence
ddocs
parseFormat
_
_
=
undefined
-- | Parse file into documents
...
...
src/Gargantext/Database/Query/Facet.hs
View file @
7b9de040
...
...
@@ -360,7 +360,7 @@ viewDocuments cId t ntId mQuery = proc () -> do
restrict
-<
if
query
==
""
then
pgBool
True
--else (n^.ns_search) @@ (pgTSQuery (T.unpack query))
else
(
n
^.
ns_search
)
@@
(
toTSQuery
$
T
.
unpack
query
)
else
(
n
^.
ns_search
)
@@
(
plain
toTSQuery
$
T
.
unpack
query
)
returnA
-<
FacetDoc
(
_ns_id
n
)
(
_ns_date
n
)
...
...
stack.yaml
View file @
7b9de040
resolver
:
url
:
https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/1
2
.yaml
url
:
https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/1
3
.yaml
flags
:
{}
extra-package-dbs
:
[]
packages
:
...
...
@@ -11,6 +11,7 @@ packages:
docker
:
enable
:
false
#enable: true
repo
:
'
cgenie/stack-build:lts-18.12-garg'
run-args
:
-
'
--publish=8008:8008'
...
...
@@ -26,8 +27,9 @@ allow-newer: true
# "$everything": -haddock
extra-deps
:
-
git
:
https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
commit
:
3e32ec3aca71eb326805355d3a99b9288dc342ee
-
#git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
git
:
https://gitlab.iscpif.fr/cgenie/haskell-gargantext-prelude.git
commit
:
35b09629a658fc16cc9ff63e7591e58511cd98a7
# Data Mining Libs
-
git
:
https://github.com/delanoe/data-time-segment.git
commit
:
10a416b9f6c443866b36479c3441ebb3bcdeb7ef
...
...
@@ -44,7 +46,7 @@ extra-deps:
# Databases libs
-
git
:
https://github.com/delanoe/haskell-opaleye.git
commit
:
806da7f9fb6fe1032f51c1822fc224b281cdd84f
commit
:
d3ab7acd5ede737478763630035aa880f7e34444
-
git
:
https://github.com/delanoe/hsparql.git
commit
:
308c74b71a1abb0a91546fa57d353131248e3a7f
-
git
:
https://github.com/robstewart57/rdf4h.git
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment