Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
123
Issues
123
List
Board
Labels
Milestones
Merge Requests
7
Merge Requests
7
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
e1dd0752
Commit
e1dd0752
authored
Oct 13, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[upload zip] add MAX_DOCS_PARSERS limit to ZIP/CSV upload
parent
5e8bc9f2
Pipeline
#1967
failed with stage
in 9 minutes and 57 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
37 additions
and
7 deletions
+37
-7
gargantext.ini_toModify
gargantext.ini_toModify
+1
-0
Job.hs
src/Gargantext/API/Job.hs
+12
-0
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+19
-4
FrameCalcUpload.hs
src/Gargantext/API/Node/FrameCalcUpload.hs
+2
-1
stack.yaml
stack.yaml
+3
-2
No files found.
gargantext.ini_toModify
View file @
e1dd0752
...
...
@@ -33,6 +33,7 @@ FRAME_VISIO_URL = URL_TO_CHANGE
FRAME_SEARX_URL = URL_TO_CHANGE
FRAME_ISTEX_URL = URL_TO_CHANGE
MAX_DOCS_PARSERS = 1000000
MAX_DOCS_SCRAPERS = 10000
[server]
...
...
src/Gargantext/API/Job.hs
View file @
e1dd0752
...
...
@@ -3,6 +3,7 @@ module Gargantext.API.Job where
import
Control.Lens
(
over
,
_Just
)
import
Data.IORef
import
Data.Maybe
import
qualified
Data.Text
as
T
import
Gargantext.Prelude
...
...
@@ -16,6 +17,14 @@ jobLogInit rem =
,
_scst_failed
=
Just
0
,
_scst_events
=
Just
[]
}
addEvent
::
T
.
Text
->
T
.
Text
->
JobLog
->
JobLog
addEvent
level
message
(
JobLog
{
_scst_events
=
mEvts
,
..
})
=
JobLog
{
_scst_events
=
Just
(
evts
<>
[
newEvt
]),
..
}
where
evts
=
fromMaybe
[]
mEvts
newEvt
=
ScraperEvent
{
_scev_message
=
Just
message
,
_scev_level
=
Just
level
,
_scev_date
=
Nothing
}
jobLogSuccess
::
JobLog
->
JobLog
jobLogSuccess
jl
=
over
(
scst_succeeded
.
_Just
)
(
+
1
)
$
over
(
scst_remaining
.
_Just
)
(
\
x
->
x
-
1
)
jl
...
...
@@ -38,6 +47,9 @@ jobLogFailTotal (JobLog { _scst_succeeded = mSucc
Nothing
->
(
Nothing
,
mFail
)
Just
rem
->
(
Just
0
,
(
+
rem
)
<$>
mFail
)
jobLogFailTotalWithMessage
::
T
.
Text
->
JobLog
->
JobLog
jobLogFailTotalWithMessage
message
jl
=
addEvent
"ERROR"
message
$
jobLogFailTotal
jl
jobLogEvt
::
JobLog
->
ScraperEvent
->
JobLog
jobLogEvt
jl
evt
=
over
(
scst_events
.
_Just
)
(
\
evts
->
(
evt
:
evts
))
jl
...
...
src/Gargantext/API/Node/Corpus/New.hs
View file @
e1dd0752
...
...
@@ -39,7 +39,7 @@ import Gargantext.Prelude
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
,
ScraperEvent
(
..
),
scst_events
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Job
(
jobLogSuccess
,
jobLogFailTotal
)
import
Gargantext.API.Job
(
jobLogSuccess
,
jobLogFailTotal
,
jobLogFailTotalWithMessage
)
import
Gargantext.API.Node.Corpus.New.File
import
Gargantext.API.Node.Corpus.Searx
import
Gargantext.API.Node.Corpus.Types
...
...
@@ -57,11 +57,12 @@ import Gargantext.Database.Action.Node (mkNodeWithParent)
import
Gargantext.Database.Action.User
(
getUserId
)
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
NodeType
(
..
),
UserId
)
import
Gargantext.Database.Prelude
(
hasConfig
)
import
Gargantext.Database.Query.Table.Node
(
getNodeWith
)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
qualified
Gargantext.Database.GargDB
as
GargDB
import
Gargantext.Prelude.Config
(
gc_max_docs_parsers
)
------------------------------------------------------------------------
{-
data Query = Query { query_query :: Text
...
...
@@ -240,7 +241,7 @@ type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint"
:>
"async"
:>
AsyncJobs
JobLog
'[
F
ormUrlEncoded
]
NewWithForm
JobLog
addToCorpusWithForm
::
FlowCmdM
env
err
m
addToCorpusWithForm
::
(
FlowCmdM
env
err
m
)
=>
User
->
CorpusId
->
NewWithForm
...
...
@@ -270,7 +271,21 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
Right
docs'
->
do
-- TODO Add progress (jobStatus) update for docs - this is a
-- long action
let
docs
=
splitEvery
500
$
take
1000000
docs'
limit'
<-
view
$
hasConfig
.
gc_max_docs_parsers
let
limit
=
fromIntegral
limit'
if
length
docs'
>
limit
then
do
printDebug
"[addToCorpusWithForm] number of docs exceeds the limit"
(
show
$
length
docs'
)
let
panicMsg'
=
[
"[addToCorpusWithForm] number of docs ("
,
show
$
length
docs'
,
") exceeds the MAX_DOCS_PARSERS limit ("
,
show
limit
,
")"
]
let
panicMsg
=
T
.
concat
$
T
.
pack
<$>
panicMsg'
logStatus
$
jobLogFailTotalWithMessage
panicMsg
jobLog
panic
panicMsg
else
pure
()
let
docs
=
splitEvery
500
$
take
limit
docs'
printDebug
"Parsing corpus finished : "
cid
logStatus
jobLog2
...
...
src/Gargantext/API/Node/FrameCalcUpload.hs
View file @
e1dd0752
...
...
@@ -27,6 +27,7 @@ import Gargantext.Core.Types.Individu (User(..))
import
Gargantext.Database.Action.Flow.Types
import
Gargantext.Database.Admin.Types.Hyperdata.Frame
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Prelude
(
HasConfig
)
import
Gargantext.Database.Query.Table.Node
(
getClosestParentIdByType
,
getNodeWith
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Prelude
...
...
@@ -53,7 +54,7 @@ frameCalcUploadAPI uId nId =
)
frameCalcUploadAsync
::
FlowCmdM
env
err
m
frameCalcUploadAsync
::
(
HasConfig
env
,
FlowCmdM
env
err
m
)
=>
UserId
->
NodeId
->
FrameCalcUpload
...
...
stack.yaml
View file @
e1dd0752
...
...
@@ -27,8 +27,9 @@ allow-newer: true
# "$everything": -haddock
extra-deps
:
-
git
:
https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
commit
:
3e32ec3aca71eb326805355d3a99b9288dc342ee
-
#git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
git
:
https://gitlab.iscpif.fr/cgenie/haskell-gargantext-prelude.git
commit
:
35b09629a658fc16cc9ff63e7591e58511cd98a7
# Data Mining Libs
-
git
:
https://github.com/delanoe/data-time-segment.git
commit
:
10a416b9f6c443866b36479c3441ebb3bcdeb7ef
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment