Commit dcaef593 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[Async] Corpus New addToCorpusWithForm

parent b8eb0898
......@@ -244,8 +244,14 @@ addToCorpusWithForm :: FlowCmdM env err m
-> WithForm
-> (ScraperStatus -> m ())
-> m ScraperStatus
addToCorpusWithForm cid form logStatus = do
addToCorpusWithForm cid (WithForm ft d l _n) logStatus = do
let
parse = case ft of
CSV_HAL -> Parser.parseFormat Parser.CsvHal
CSV -> Parser.parseFormat Parser.CsvGargV3
WOS -> Parser.parseFormat Parser.WOS
PresseRIS -> Parser.parseFormat Parser.RisPresse
logStatus ScraperStatus { _scst_succeeded = Just 1
, _scst_failed = Just 0
......@@ -253,33 +259,27 @@ addToCorpusWithForm cid form logStatus = do
, _scst_events = Just []
}
_ <- asyncFlowCorpus cid form
pure ScraperStatus { _scst_succeeded = Just 2
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
asyncFlowCorpus :: FlowCmdM env err m
=> CorpusId
-> WithForm
-> m ()
asyncFlowCorpus cid (WithForm ft d l _n) = do
let
parse = case ft of
CSV_HAL -> Parser.parseFormat Parser.CsvHal
CSV -> Parser.parseFormat Parser.CsvGargV3
WOS -> Parser.parseFormat Parser.WOS
PresseRIS -> Parser.parseFormat Parser.RisPresse
printDebug "Parsing corpus: " cid
-- TODO granularity of the logStatus
docs <- liftIO $ splitEvery 500
<$> take 1000000
<$> parse (cs d)
printDebug "Parsing corpus finished : " cid
printDebug "Starting extraction : " cid
-- TODO granularity of the logStatus
_cid' <- flowCorpus "user1"
(Right [cid])
(Multi $ fromMaybe EN l)
(map (map toHyperdataDocument) docs)
pure ()
printDebug "Extraction finished : " cid
pure ScraperStatus { _scst_succeeded = Just 2
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment