Commit 797e19df authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[flow] add better progress report to flow corpus

parent 8306c484
...@@ -47,13 +47,13 @@ main = do ...@@ -47,13 +47,13 @@ main = do
tt = (Multi EN) tt = (Multi EN)
format = CsvGargV3 -- CsvHal --WOS format = CsvGargV3 -- CsvHal --WOS
corpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId corpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt format corpusPath Nothing corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt format corpusPath Nothing (\_ -> pure ())
corpusCsvHal :: forall m. FlowCmdM DevEnv GargError m => m CorpusId corpusCsvHal :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt CsvHal corpusPath Nothing corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt CsvHal corpusPath Nothing (\_ -> pure ())
annuaire :: forall m. FlowCmdM DevEnv GargError m => m CorpusId annuaire :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath (\_ -> pure ())
{- {-
let debatCorpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId let debatCorpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
......
...@@ -93,7 +93,7 @@ addContact u nId (AddContactParams fn ln) logStatus = do ...@@ -93,7 +93,7 @@ addContact u nId (AddContactParams fn ln) logStatus = do
, _scst_remaining = Just 1 , _scst_remaining = Just 1
, _scst_events = Just [] , _scst_events = Just []
} }
_ <- flow (Nothing :: Maybe HyperdataAnnuaire) u (Right [nId]) (Multi EN) Nothing [[hyperdataContact fn ln]] _ <- flow (Nothing :: Maybe HyperdataAnnuaire) u (Right [nId]) (Multi EN) Nothing [[hyperdataContact fn ln]] logStatus
pure JobLog { _scst_succeeded = Just 2 pure JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0 , _scst_failed = Just 0
......
...@@ -213,15 +213,15 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q ...@@ -213,15 +213,15 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
-- TODO if cid is folder -> create Corpus -- TODO if cid is folder -> create Corpus
-- if cid is corpus -> add to corpus -- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private -- if cid is root -> create corpus in Private
txts <- mapM (\db -> getDataText db (Multi l) q maybeLimit) [database2origin dbs] txts <- mapM (\db -> getDataText db (Multi l) q maybeLimit) [database2origin dbs]
logStatus JobLog { _scst_succeeded = Just 2 logStatus JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0 , _scst_failed = Just 0
, _scst_remaining = Just 1 , _scst_remaining = Just $ 1 + length txts
, _scst_events = Just [] , _scst_events = Just []
} }
cids <- mapM (\txt -> flowDataText user txt (Multi l) cid Nothing) txts cids <- mapM (\txt -> flowDataText user txt (Multi l) cid Nothing logStatus) txts
printDebug "corpus id" cids printDebug "corpus id" cids
printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text) printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text)
sendMail user sendMail user
...@@ -297,6 +297,7 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do ...@@ -297,6 +297,7 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
(Multi $ fromMaybe EN l) (Multi $ fromMaybe EN l)
Nothing Nothing
(map (map toHyperdataDocument) docs) (map (map toHyperdataDocument) docs)
logStatus
printDebug "Extraction finished : " cid printDebug "Extraction finished : " cid
printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text) printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text)
......
...@@ -107,6 +107,6 @@ documentUpload uId nId doc logStatus = do ...@@ -107,6 +107,6 @@ documentUpload uId nId doc logStatus = do
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ T.pack $ show EN } , _hd_language_iso2 = Just $ T.pack $ show EN }
_ <- flowDataText (RootId (NodeId uId)) (DataNew [[hd]]) (Multi EN) cId Nothing _ <- flowDataText (RootId (NodeId uId)) (DataNew [[hd]]) (Multi EN) cId Nothing logStatus
pure $ jobLogSuccess jl pure $ jobLogSuccess jl
...@@ -100,7 +100,7 @@ documentsFromWriteNodes uId nId _p logStatus = do ...@@ -100,7 +100,7 @@ documentsFromWriteNodes uId nId _p logStatus = do
let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
let parsed = rights parsedE let parsed = rights parsedE
_ <- flowDataText (RootId (NodeId uId)) (DataNew [parsed]) (Multi EN) cId Nothing _ <- flowDataText (RootId (NodeId uId)) (DataNew [parsed]) (Multi EN) cId Nothing logStatus
pure $ jobLogSuccess jobLog pure $ jobLogSuccess jobLog
------------------------------------------------------------------------ ------------------------------------------------------------------------
......
...@@ -65,6 +65,7 @@ import qualified Data.HashMap.Strict as HashMap ...@@ -65,6 +65,7 @@ import qualified Data.HashMap.Strict as HashMap
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
import qualified Data.Map as Map import qualified Data.Map as Map
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..))
import Gargantext.Core (Lang(..), PosTagAlgo(..)) import Gargantext.Core (Lang(..), PosTagAlgo(..))
import Gargantext.Core.Ext.IMT (toSchoolName) import Gargantext.Core.Ext.IMT (toSchoolName)
import Gargantext.Core.Ext.IMTUser (readFile_Annuaire) import Gargantext.Core.Ext.IMTUser (readFile_Annuaire)
...@@ -154,11 +155,12 @@ flowDataText :: ( FlowCmdM env err m ...@@ -154,11 +155,12 @@ flowDataText :: ( FlowCmdM env err m
-> TermType Lang -> TermType Lang
-> CorpusId -> CorpusId
-> Maybe FlowSocialListWith -> Maybe FlowSocialListWith
-> (JobLog -> m ())
-> m CorpusId -> m CorpusId
flowDataText u (DataOld ids) tt cid mfslw = flowCorpusUser (_tt_lang tt) u (Right [cid]) corpusType ids mfslw flowDataText u (DataOld ids) tt cid mfslw _ = flowCorpusUser (_tt_lang tt) u (Right [cid]) corpusType ids mfslw
where where
corpusType = (Nothing :: Maybe HyperdataCorpus) corpusType = (Nothing :: Maybe HyperdataCorpus)
flowDataText u (DataNew txt) tt cid mfslw = flowCorpus u (Right [cid]) tt mfslw txt flowDataText u (DataNew txt) tt cid mfslw logStatus = flowCorpus u (Right [cid]) tt mfslw txt logStatus
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- TODO use proxy -- TODO use proxy
...@@ -167,10 +169,11 @@ flowAnnuaire :: (FlowCmdM env err m) ...@@ -167,10 +169,11 @@ flowAnnuaire :: (FlowCmdM env err m)
-> Either CorpusName [CorpusId] -> Either CorpusName [CorpusId]
-> (TermType Lang) -> (TermType Lang)
-> FilePath -> FilePath
-> (JobLog -> m ())
-> m AnnuaireId -> m AnnuaireId
flowAnnuaire u n l filePath = do flowAnnuaire u n l filePath logStatus = do
docs <- liftBase $ (( splitEvery 500 <$> readFile_Annuaire filePath) :: IO [[HyperdataContact]]) docs <- liftBase $ (( splitEvery 500 <$> readFile_Annuaire filePath) :: IO [[HyperdataContact]])
flow (Nothing :: Maybe HyperdataAnnuaire) u n l Nothing docs flow (Nothing :: Maybe HyperdataAnnuaire) u n l Nothing docs logStatus
------------------------------------------------------------------------ ------------------------------------------------------------------------
flowCorpusFile :: (FlowCmdM env err m) flowCorpusFile :: (FlowCmdM env err m)
...@@ -179,13 +182,14 @@ flowCorpusFile :: (FlowCmdM env err m) ...@@ -179,13 +182,14 @@ flowCorpusFile :: (FlowCmdM env err m)
-> Limit -- Limit the number of docs (for dev purpose) -> Limit -- Limit the number of docs (for dev purpose)
-> TermType Lang -> FileFormat -> FilePath -> TermType Lang -> FileFormat -> FilePath
-> Maybe FlowSocialListWith -> Maybe FlowSocialListWith
-> (JobLog -> m ())
-> m CorpusId -> m CorpusId
flowCorpusFile u n l la ff fp mfslw = do flowCorpusFile u n l la ff fp mfslw logStatus = do
eParsed <- liftBase $ parseFile ff fp eParsed <- liftBase $ parseFile ff fp
case eParsed of case eParsed of
Right parsed -> do Right parsed -> do
let docs = splitEvery 500 $ take l parsed let docs = splitEvery 500 $ take l parsed
flowCorpus u n la mfslw (map (map toHyperdataDocument) docs) flowCorpus u n la mfslw (map (map toHyperdataDocument) docs) logStatus
Left e -> panic $ "Error: " <> (T.pack e) Left e -> panic $ "Error: " <> (T.pack e)
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -197,6 +201,7 @@ flowCorpus :: (FlowCmdM env err m, FlowCorpus a) ...@@ -197,6 +201,7 @@ flowCorpus :: (FlowCmdM env err m, FlowCorpus a)
-> TermType Lang -> TermType Lang
-> Maybe FlowSocialListWith -> Maybe FlowSocialListWith
-> [[a]] -> [[a]]
-> (JobLog -> m ())
-> m CorpusId -> m CorpusId
flowCorpus = flow (Nothing :: Maybe HyperdataCorpus) flowCorpus = flow (Nothing :: Maybe HyperdataCorpus)
...@@ -211,10 +216,19 @@ flow :: ( FlowCmdM env err m ...@@ -211,10 +216,19 @@ flow :: ( FlowCmdM env err m
-> TermType Lang -> TermType Lang
-> Maybe FlowSocialListWith -> Maybe FlowSocialListWith
-> [[a]] -> [[a]]
-> (JobLog -> m ())
-> m CorpusId -> m CorpusId
flow c u cn la mfslw docs = do flow c u cn la mfslw docs logStatus = do
-- TODO if public insertMasterDocs else insertUserDocs -- TODO if public insertMasterDocs else insertUserDocs
ids <- traverse (insertMasterDocs c la) docs ids <- traverse (\(idx, doc) -> do
id <- insertMasterDocs c la doc
logStatus JobLog { _scst_succeeded = Just $ 1 + idx
, _scst_failed = Just 0
, _scst_remaining = Just $ length docs - idx
, _scst_events = Just []
}
pure id
) (zip [1..] docs)
flowCorpusUser (la ^. tt_lang) u cn c (concat ids) mfslw flowCorpusUser (la ^. tt_lang) u cn c (concat ids) mfslw
------------------------------------------------------------------------ ------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment