Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
199
Issues
199
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
e1dd0752
Commit
e1dd0752
authored
Oct 13, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[upload zip] add MAX_DOCS_PARSERS limit to ZIP/CSV upload
parent
5e8bc9f2
Pipeline
#1967
failed with stage
in 9 minutes and 57 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
37 additions
and
7 deletions
+37
-7
gargantext.ini_toModify
gargantext.ini_toModify
+1
-0
Job.hs
src/Gargantext/API/Job.hs
+12
-0
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+19
-4
FrameCalcUpload.hs
src/Gargantext/API/Node/FrameCalcUpload.hs
+2
-1
stack.yaml
stack.yaml
+3
-2
No files found.
gargantext.ini_toModify
View file @
e1dd0752
...
@@ -33,6 +33,7 @@ FRAME_VISIO_URL = URL_TO_CHANGE
...
@@ -33,6 +33,7 @@ FRAME_VISIO_URL = URL_TO_CHANGE
FRAME_SEARX_URL = URL_TO_CHANGE
FRAME_SEARX_URL = URL_TO_CHANGE
FRAME_ISTEX_URL = URL_TO_CHANGE
FRAME_ISTEX_URL = URL_TO_CHANGE
MAX_DOCS_PARSERS = 1000000
MAX_DOCS_SCRAPERS = 10000
MAX_DOCS_SCRAPERS = 10000
[server]
[server]
...
...
src/Gargantext/API/Job.hs
View file @
e1dd0752
...
@@ -3,6 +3,7 @@ module Gargantext.API.Job where
...
@@ -3,6 +3,7 @@ module Gargantext.API.Job where
import
Control.Lens
(
over
,
_Just
)
import
Control.Lens
(
over
,
_Just
)
import
Data.IORef
import
Data.IORef
import
Data.Maybe
import
Data.Maybe
import
qualified
Data.Text
as
T
import
Gargantext.Prelude
import
Gargantext.Prelude
...
@@ -16,6 +17,14 @@ jobLogInit rem =
...
@@ -16,6 +17,14 @@ jobLogInit rem =
,
_scst_failed
=
Just
0
,
_scst_failed
=
Just
0
,
_scst_events
=
Just
[]
}
,
_scst_events
=
Just
[]
}
addEvent
::
T
.
Text
->
T
.
Text
->
JobLog
->
JobLog
addEvent
level
message
(
JobLog
{
_scst_events
=
mEvts
,
..
})
=
JobLog
{
_scst_events
=
Just
(
evts
<>
[
newEvt
]),
..
}
where
evts
=
fromMaybe
[]
mEvts
newEvt
=
ScraperEvent
{
_scev_message
=
Just
message
,
_scev_level
=
Just
level
,
_scev_date
=
Nothing
}
jobLogSuccess
::
JobLog
->
JobLog
jobLogSuccess
::
JobLog
->
JobLog
jobLogSuccess
jl
=
over
(
scst_succeeded
.
_Just
)
(
+
1
)
$
jobLogSuccess
jl
=
over
(
scst_succeeded
.
_Just
)
(
+
1
)
$
over
(
scst_remaining
.
_Just
)
(
\
x
->
x
-
1
)
jl
over
(
scst_remaining
.
_Just
)
(
\
x
->
x
-
1
)
jl
...
@@ -38,6 +47,9 @@ jobLogFailTotal (JobLog { _scst_succeeded = mSucc
...
@@ -38,6 +47,9 @@ jobLogFailTotal (JobLog { _scst_succeeded = mSucc
Nothing
->
(
Nothing
,
mFail
)
Nothing
->
(
Nothing
,
mFail
)
Just
rem
->
(
Just
0
,
(
+
rem
)
<$>
mFail
)
Just
rem
->
(
Just
0
,
(
+
rem
)
<$>
mFail
)
jobLogFailTotalWithMessage
::
T
.
Text
->
JobLog
->
JobLog
jobLogFailTotalWithMessage
message
jl
=
addEvent
"ERROR"
message
$
jobLogFailTotal
jl
jobLogEvt
::
JobLog
->
ScraperEvent
->
JobLog
jobLogEvt
::
JobLog
->
ScraperEvent
->
JobLog
jobLogEvt
jl
evt
=
over
(
scst_events
.
_Just
)
(
\
evts
->
(
evt
:
evts
))
jl
jobLogEvt
jl
evt
=
over
(
scst_events
.
_Just
)
(
\
evts
->
(
evt
:
evts
))
jl
...
...
src/Gargantext/API/Node/Corpus/New.hs
View file @
e1dd0752
...
@@ -39,7 +39,7 @@ import Gargantext.Prelude
...
@@ -39,7 +39,7 @@ import Gargantext.Prelude
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
,
ScraperEvent
(
..
),
scst_events
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
,
ScraperEvent
(
..
),
scst_events
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Job
(
jobLogSuccess
,
jobLogFailTotal
)
import
Gargantext.API.Job
(
jobLogSuccess
,
jobLogFailTotal
,
jobLogFailTotalWithMessage
)
import
Gargantext.API.Node.Corpus.New.File
import
Gargantext.API.Node.Corpus.New.File
import
Gargantext.API.Node.Corpus.Searx
import
Gargantext.API.Node.Corpus.Searx
import
Gargantext.API.Node.Corpus.Types
import
Gargantext.API.Node.Corpus.Types
...
@@ -57,11 +57,12 @@ import Gargantext.Database.Action.Node (mkNodeWithParent)
...
@@ -57,11 +57,12 @@ import Gargantext.Database.Action.Node (mkNodeWithParent)
import
Gargantext.Database.Action.User
(
getUserId
)
import
Gargantext.Database.Action.User
(
getUserId
)
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
NodeType
(
..
),
UserId
)
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
NodeType
(
..
),
UserId
)
import
Gargantext.Database.Prelude
(
hasConfig
)
import
Gargantext.Database.Query.Table.Node
(
getNodeWith
)
import
Gargantext.Database.Query.Table.Node
(
getNodeWith
)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
qualified
Gargantext.Database.GargDB
as
GargDB
import
qualified
Gargantext.Database.GargDB
as
GargDB
import
Gargantext.Prelude.Config
(
gc_max_docs_parsers
)
------------------------------------------------------------------------
------------------------------------------------------------------------
{-
{-
data Query = Query { query_query :: Text
data Query = Query { query_query :: Text
...
@@ -240,7 +241,7 @@ type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint"
...
@@ -240,7 +241,7 @@ type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint"
:>
"async"
:>
"async"
:>
AsyncJobs
JobLog
'[
F
ormUrlEncoded
]
NewWithForm
JobLog
:>
AsyncJobs
JobLog
'[
F
ormUrlEncoded
]
NewWithForm
JobLog
addToCorpusWithForm
::
FlowCmdM
env
err
m
addToCorpusWithForm
::
(
FlowCmdM
env
err
m
)
=>
User
=>
User
->
CorpusId
->
CorpusId
->
NewWithForm
->
NewWithForm
...
@@ -270,7 +271,21 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
...
@@ -270,7 +271,21 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
Right
docs'
->
do
Right
docs'
->
do
-- TODO Add progress (jobStatus) update for docs - this is a
-- TODO Add progress (jobStatus) update for docs - this is a
-- long action
-- long action
let
docs
=
splitEvery
500
$
take
1000000
docs'
limit'
<-
view
$
hasConfig
.
gc_max_docs_parsers
let
limit
=
fromIntegral
limit'
if
length
docs'
>
limit
then
do
printDebug
"[addToCorpusWithForm] number of docs exceeds the limit"
(
show
$
length
docs'
)
let
panicMsg'
=
[
"[addToCorpusWithForm] number of docs ("
,
show
$
length
docs'
,
") exceeds the MAX_DOCS_PARSERS limit ("
,
show
limit
,
")"
]
let
panicMsg
=
T
.
concat
$
T
.
pack
<$>
panicMsg'
logStatus
$
jobLogFailTotalWithMessage
panicMsg
jobLog
panic
panicMsg
else
pure
()
let
docs
=
splitEvery
500
$
take
limit
docs'
printDebug
"Parsing corpus finished : "
cid
printDebug
"Parsing corpus finished : "
cid
logStatus
jobLog2
logStatus
jobLog2
...
...
src/Gargantext/API/Node/FrameCalcUpload.hs
View file @
e1dd0752
...
@@ -27,6 +27,7 @@ import Gargantext.Core.Types.Individu (User(..))
...
@@ -27,6 +27,7 @@ import Gargantext.Core.Types.Individu (User(..))
import
Gargantext.Database.Action.Flow.Types
import
Gargantext.Database.Action.Flow.Types
import
Gargantext.Database.Admin.Types.Hyperdata.Frame
import
Gargantext.Database.Admin.Types.Hyperdata.Frame
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Prelude
(
HasConfig
)
import
Gargantext.Database.Query.Table.Node
(
getClosestParentIdByType
,
getNodeWith
)
import
Gargantext.Database.Query.Table.Node
(
getClosestParentIdByType
,
getNodeWith
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Prelude
import
Gargantext.Prelude
...
@@ -53,7 +54,7 @@ frameCalcUploadAPI uId nId =
...
@@ -53,7 +54,7 @@ frameCalcUploadAPI uId nId =
)
)
frameCalcUploadAsync
::
FlowCmdM
env
err
m
frameCalcUploadAsync
::
(
HasConfig
env
,
FlowCmdM
env
err
m
)
=>
UserId
=>
UserId
->
NodeId
->
NodeId
->
FrameCalcUpload
->
FrameCalcUpload
...
...
stack.yaml
View file @
e1dd0752
...
@@ -27,8 +27,9 @@ allow-newer: true
...
@@ -27,8 +27,9 @@ allow-newer: true
# "$everything": -haddock
# "$everything": -haddock
extra-deps
:
extra-deps
:
-
git
:
https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
-
#git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
commit
:
3e32ec3aca71eb326805355d3a99b9288dc342ee
git
:
https://gitlab.iscpif.fr/cgenie/haskell-gargantext-prelude.git
commit
:
35b09629a658fc16cc9ff63e7591e58511cd98a7
# Data Mining Libs
# Data Mining Libs
-
git
:
https://github.com/delanoe/data-time-segment.git
-
git
:
https://github.com/delanoe/data-time-segment.git
commit
:
10a416b9f6c443866b36479c3441ebb3bcdeb7ef
commit
:
10a416b9f6c443866b36479c3441ebb3bcdeb7ef
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment