Commit e1dd0752 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[upload zip] add MAX_DOCS_PARSERS limit to ZIP/CSV upload

parent 5e8bc9f2
...@@ -33,6 +33,7 @@ FRAME_VISIO_URL = URL_TO_CHANGE ...@@ -33,6 +33,7 @@ FRAME_VISIO_URL = URL_TO_CHANGE
FRAME_SEARX_URL = URL_TO_CHANGE FRAME_SEARX_URL = URL_TO_CHANGE
FRAME_ISTEX_URL = URL_TO_CHANGE FRAME_ISTEX_URL = URL_TO_CHANGE
MAX_DOCS_PARSERS = 1000000
MAX_DOCS_SCRAPERS = 10000 MAX_DOCS_SCRAPERS = 10000
[server] [server]
......
...@@ -3,6 +3,7 @@ module Gargantext.API.Job where ...@@ -3,6 +3,7 @@ module Gargantext.API.Job where
import Control.Lens (over, _Just) import Control.Lens (over, _Just)
import Data.IORef import Data.IORef
import Data.Maybe import Data.Maybe
import qualified Data.Text as T
import Gargantext.Prelude import Gargantext.Prelude
...@@ -16,6 +17,14 @@ jobLogInit rem = ...@@ -16,6 +17,14 @@ jobLogInit rem =
, _scst_failed = Just 0 , _scst_failed = Just 0
, _scst_events = Just [] } , _scst_events = Just [] }
addEvent :: T.Text -> T.Text -> JobLog -> JobLog
addEvent level message (JobLog { _scst_events = mEvts, .. }) = JobLog { _scst_events = Just (evts <> [ newEvt ]), .. }
where
evts = fromMaybe [] mEvts
newEvt = ScraperEvent { _scev_message = Just message
, _scev_level = Just level
, _scev_date = Nothing }
jobLogSuccess :: JobLog -> JobLog jobLogSuccess :: JobLog -> JobLog
jobLogSuccess jl = over (scst_succeeded . _Just) (+ 1) $ jobLogSuccess jl = over (scst_succeeded . _Just) (+ 1) $
over (scst_remaining . _Just) (\x -> x - 1) jl over (scst_remaining . _Just) (\x -> x - 1) jl
...@@ -38,6 +47,9 @@ jobLogFailTotal (JobLog { _scst_succeeded = mSucc ...@@ -38,6 +47,9 @@ jobLogFailTotal (JobLog { _scst_succeeded = mSucc
Nothing -> (Nothing, mFail) Nothing -> (Nothing, mFail)
Just rem -> (Just 0, (+ rem) <$> mFail) Just rem -> (Just 0, (+ rem) <$> mFail)
jobLogFailTotalWithMessage :: T.Text -> JobLog -> JobLog
jobLogFailTotalWithMessage message jl = addEvent "ERROR" message $ jobLogFailTotal jl
jobLogEvt :: JobLog -> ScraperEvent -> JobLog jobLogEvt :: JobLog -> ScraperEvent -> JobLog
jobLogEvt jl evt = over (scst_events . _Just) (\evts -> (evt:evts)) jl jobLogEvt jl evt = over (scst_events . _Just) (\evts -> (evt:evts)) jl
......
...@@ -39,7 +39,7 @@ import Gargantext.Prelude ...@@ -39,7 +39,7 @@ import Gargantext.Prelude
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs, ScraperEvent(..), scst_events) import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs, ScraperEvent(..), scst_events)
import Gargantext.API.Admin.Types (HasSettings) import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Job (jobLogSuccess, jobLogFailTotal) import Gargantext.API.Job (jobLogSuccess, jobLogFailTotal, jobLogFailTotalWithMessage)
import Gargantext.API.Node.Corpus.New.File import Gargantext.API.Node.Corpus.New.File
import Gargantext.API.Node.Corpus.Searx import Gargantext.API.Node.Corpus.Searx
import Gargantext.API.Node.Corpus.Types import Gargantext.API.Node.Corpus.Types
...@@ -57,11 +57,12 @@ import Gargantext.Database.Action.Node (mkNodeWithParent) ...@@ -57,11 +57,12 @@ import Gargantext.Database.Action.Node (mkNodeWithParent)
import Gargantext.Database.Action.User (getUserId) import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Types.Hyperdata import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..), UserId) import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..), UserId)
import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Database.Query.Table.Node (getNodeWith) import Gargantext.Database.Query.Table.Node (getNodeWith)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata) import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Node (node_hyperdata) import Gargantext.Database.Schema.Node (node_hyperdata)
import qualified Gargantext.Database.GargDB as GargDB import qualified Gargantext.Database.GargDB as GargDB
import Gargantext.Prelude.Config (gc_max_docs_parsers)
------------------------------------------------------------------------ ------------------------------------------------------------------------
{- {-
data Query = Query { query_query :: Text data Query = Query { query_query :: Text
...@@ -240,7 +241,7 @@ type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint" ...@@ -240,7 +241,7 @@ type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint"
:> "async" :> "async"
:> AsyncJobs JobLog '[FormUrlEncoded] NewWithForm JobLog :> AsyncJobs JobLog '[FormUrlEncoded] NewWithForm JobLog
addToCorpusWithForm :: FlowCmdM env err m addToCorpusWithForm :: (FlowCmdM env err m)
=> User => User
-> CorpusId -> CorpusId
-> NewWithForm -> NewWithForm
...@@ -270,7 +271,21 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do ...@@ -270,7 +271,21 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
Right docs' -> do Right docs' -> do
-- TODO Add progress (jobStatus) update for docs - this is a -- TODO Add progress (jobStatus) update for docs - this is a
-- long action -- long action
let docs = splitEvery 500 $ take 1000000 docs' limit' <- view $ hasConfig . gc_max_docs_parsers
let limit = fromIntegral limit'
if length docs' > limit then do
printDebug "[addToCorpusWithForm] number of docs exceeds the limit" (show $ length docs')
let panicMsg' = [ "[addToCorpusWithForm] number of docs ("
, show $ length docs'
, ") exceeds the MAX_DOCS_PARSERS limit ("
, show limit
, ")" ]
let panicMsg = T.concat $ T.pack <$> panicMsg'
logStatus $ jobLogFailTotalWithMessage panicMsg jobLog
panic panicMsg
else
pure ()
let docs = splitEvery 500 $ take limit docs'
printDebug "Parsing corpus finished : " cid printDebug "Parsing corpus finished : " cid
logStatus jobLog2 logStatus jobLog2
......
...@@ -27,6 +27,7 @@ import Gargantext.Core.Types.Individu (User(..)) ...@@ -27,6 +27,7 @@ import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Flow.Types import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Admin.Types.Hyperdata.Frame import Gargantext.Database.Admin.Types.Hyperdata.Frame
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (HasConfig)
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType, getNodeWith) import Gargantext.Database.Query.Table.Node (getClosestParentIdByType, getNodeWith)
import Gargantext.Database.Schema.Node (node_hyperdata) import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -53,7 +54,7 @@ frameCalcUploadAPI uId nId = ...@@ -53,7 +54,7 @@ frameCalcUploadAPI uId nId =
) )
frameCalcUploadAsync :: FlowCmdM env err m frameCalcUploadAsync :: (HasConfig env, FlowCmdM env err m)
=> UserId => UserId
-> NodeId -> NodeId
-> FrameCalcUpload -> FrameCalcUpload
......
...@@ -27,8 +27,9 @@ allow-newer: true ...@@ -27,8 +27,9 @@ allow-newer: true
# "$everything": -haddock # "$everything": -haddock
extra-deps: extra-deps:
- git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git - #git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
commit: 3e32ec3aca71eb326805355d3a99b9288dc342ee git: https://gitlab.iscpif.fr/cgenie/haskell-gargantext-prelude.git
commit: 35b09629a658fc16cc9ff63e7591e58511cd98a7
# Data Mining Libs # Data Mining Libs
- git: https://github.com/delanoe/data-time-segment.git - git: https://github.com/delanoe/data-time-segment.git
commit: 10a416b9f6c443866b36479c3441ebb3bcdeb7ef commit: 10a416b9f6c443866b36479c3441ebb3bcdeb7ef
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment