Commit 797e19df authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[flow] add better progress report to flow corpus

parent 8306c484
Pipeline #2064 failed with stage
in 10 minutes and 21 seconds
......@@ -47,13 +47,13 @@ main = do
tt = (Multi EN)
format = CsvGargV3 -- CsvHal --WOS
corpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt format corpusPath Nothing
corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt format corpusPath Nothing (\_ -> pure ())
corpusCsvHal :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt CsvHal corpusPath Nothing
corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt CsvHal corpusPath Nothing (\_ -> pure ())
annuaire :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath
annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath (\_ -> pure ())
{-
let debatCorpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
......
......@@ -93,7 +93,7 @@ addContact u nId (AddContactParams fn ln) logStatus = do
, _scst_remaining = Just 1
, _scst_events = Just []
}
_ <- flow (Nothing :: Maybe HyperdataAnnuaire) u (Right [nId]) (Multi EN) Nothing [[hyperdataContact fn ln]]
_ <- flow (Nothing :: Maybe HyperdataAnnuaire) u (Right [nId]) (Multi EN) Nothing [[hyperdataContact fn ln]] logStatus
pure JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
......
......@@ -213,15 +213,15 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
-- TODO if cid is folder -> create Corpus
-- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private
txts <- mapM (\db -> getDataText db (Multi l) q maybeLimit) [database2origin dbs]
txts <- mapM (\db -> getDataText db (Multi l) q maybeLimit) [database2origin dbs]
logStatus JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_remaining = Just $ 1 + length txts
, _scst_events = Just []
}
cids <- mapM (\txt -> flowDataText user txt (Multi l) cid Nothing) txts
cids <- mapM (\txt -> flowDataText user txt (Multi l) cid Nothing logStatus) txts
printDebug "corpus id" cids
printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text)
sendMail user
......@@ -297,6 +297,7 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
(Multi $ fromMaybe EN l)
Nothing
(map (map toHyperdataDocument) docs)
logStatus
printDebug "Extraction finished : " cid
printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text)
......
......@@ -107,6 +107,6 @@ documentUpload uId nId doc logStatus = do
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ T.pack $ show EN }
_ <- flowDataText (RootId (NodeId uId)) (DataNew [[hd]]) (Multi EN) cId Nothing
_ <- flowDataText (RootId (NodeId uId)) (DataNew [[hd]]) (Multi EN) cId Nothing logStatus
pure $ jobLogSuccess jl
......@@ -100,7 +100,7 @@ documentsFromWriteNodes uId nId _p logStatus = do
let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
let parsed = rights parsedE
_ <- flowDataText (RootId (NodeId uId)) (DataNew [parsed]) (Multi EN) cId Nothing
_ <- flowDataText (RootId (NodeId uId)) (DataNew [parsed]) (Multi EN) cId Nothing logStatus
pure $ jobLogSuccess jobLog
------------------------------------------------------------------------
......
......@@ -65,6 +65,7 @@ import qualified Data.HashMap.Strict as HashMap
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
import qualified Data.Map as Map
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..))
import Gargantext.Core (Lang(..), PosTagAlgo(..))
import Gargantext.Core.Ext.IMT (toSchoolName)
import Gargantext.Core.Ext.IMTUser (readFile_Annuaire)
......@@ -154,11 +155,12 @@ flowDataText :: ( FlowCmdM env err m
-> TermType Lang
-> CorpusId
-> Maybe FlowSocialListWith
-> (JobLog -> m ())
-> m CorpusId
flowDataText u (DataOld ids) tt cid mfslw = flowCorpusUser (_tt_lang tt) u (Right [cid]) corpusType ids mfslw
flowDataText u (DataOld ids) tt cid mfslw _ = flowCorpusUser (_tt_lang tt) u (Right [cid]) corpusType ids mfslw
where
corpusType = (Nothing :: Maybe HyperdataCorpus)
flowDataText u (DataNew txt) tt cid mfslw = flowCorpus u (Right [cid]) tt mfslw txt
flowDataText u (DataNew txt) tt cid mfslw logStatus = flowCorpus u (Right [cid]) tt mfslw txt logStatus
------------------------------------------------------------------------
-- TODO use proxy
......@@ -167,10 +169,11 @@ flowAnnuaire :: (FlowCmdM env err m)
-> Either CorpusName [CorpusId]
-> (TermType Lang)
-> FilePath
-> (JobLog -> m ())
-> m AnnuaireId
flowAnnuaire u n l filePath = do
flowAnnuaire u n l filePath logStatus = do
docs <- liftBase $ (( splitEvery 500 <$> readFile_Annuaire filePath) :: IO [[HyperdataContact]])
flow (Nothing :: Maybe HyperdataAnnuaire) u n l Nothing docs
flow (Nothing :: Maybe HyperdataAnnuaire) u n l Nothing docs logStatus
------------------------------------------------------------------------
flowCorpusFile :: (FlowCmdM env err m)
......@@ -179,13 +182,14 @@ flowCorpusFile :: (FlowCmdM env err m)
-> Limit -- Limit the number of docs (for dev purpose)
-> TermType Lang -> FileFormat -> FilePath
-> Maybe FlowSocialListWith
-> (JobLog -> m ())
-> m CorpusId
flowCorpusFile u n l la ff fp mfslw = do
flowCorpusFile u n l la ff fp mfslw logStatus = do
eParsed <- liftBase $ parseFile ff fp
case eParsed of
Right parsed -> do
let docs = splitEvery 500 $ take l parsed
flowCorpus u n la mfslw (map (map toHyperdataDocument) docs)
flowCorpus u n la mfslw (map (map toHyperdataDocument) docs) logStatus
Left e -> panic $ "Error: " <> (T.pack e)
------------------------------------------------------------------------
......@@ -197,6 +201,7 @@ flowCorpus :: (FlowCmdM env err m, FlowCorpus a)
-> TermType Lang
-> Maybe FlowSocialListWith
-> [[a]]
-> (JobLog -> m ())
-> m CorpusId
flowCorpus = flow (Nothing :: Maybe HyperdataCorpus)
......@@ -211,10 +216,19 @@ flow :: ( FlowCmdM env err m
-> TermType Lang
-> Maybe FlowSocialListWith
-> [[a]]
-> (JobLog -> m ())
-> m CorpusId
flow c u cn la mfslw docs = do
flow c u cn la mfslw docs logStatus = do
-- TODO if public insertMasterDocs else insertUserDocs
ids <- traverse (insertMasterDocs c la) docs
ids <- traverse (\(idx, doc) -> do
id <- insertMasterDocs c la doc
logStatus JobLog { _scst_succeeded = Just $ 1 + idx
, _scst_failed = Just 0
, _scst_remaining = Just $ length docs - idx
, _scst_events = Just []
}
pure id
) (zip [1..] docs)
flowCorpusUser (la ^. tt_lang) u cn c (concat ids) mfslw
------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment