Commit b1aae86b authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Generalise addDocumentsToHyperCorpus

parent 223d0d09
Pipeline #4557 passed with stages
in 38 minutes and 47 seconds
...@@ -16,17 +16,18 @@ import Gargantext.API.Admin.EnvTypes (GargJob(..), Env) ...@@ -16,17 +16,18 @@ import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs) import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Prelude import Gargantext.API.Prelude
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.NLP (nlpServerGet)
import Gargantext.Core.Text.Corpus.Parsers.Date (dateSplit) import Gargantext.Core.Text.Corpus.Parsers.Date (dateSplit)
import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix)
import Gargantext.Database.Action.Flow.Types
import Gargantext.Core.Text.Terms (TermType(..)) import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix)
import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus) import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus)
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Admin.Types.Hyperdata.Corpus
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType') import Gargantext.Database.Query.Table.Node (getClosestParentIdByType')
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..)) import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Gargantext.Core.NLP (nlpServerGet)
data DocumentUpload = DocumentUpload data DocumentUpload = DocumentUpload
...@@ -118,4 +119,4 @@ documentUpload nId doc = do ...@@ -118,4 +119,4 @@ documentUpload nId doc = do
let lang = EN let lang = EN
ncs <- view $ nlpServerGet lang ncs <- view $ nlpServerGet lang
addDocumentsToHyperCorpus ncs Nothing (Multi lang) cId [hd] addDocumentsToHyperCorpus ncs (Nothing :: Maybe HyperdataCorpus) (Multi lang) cId [hd]
...@@ -288,33 +288,36 @@ flow :: forall env err m a c. ...@@ -288,33 +288,36 @@ flow :: forall env err m a c.
flow c u cn la mfslw (mLength, docsC) jobHandle = do flow c u cn la mfslw (mLength, docsC) jobHandle = do
(_userId, userCorpusId, listId) <- createNodes u cn c (_userId, userCorpusId, listId) <- createNodes u cn c
-- TODO if public insertMasterDocs else insertUserDocs -- TODO if public insertMasterDocs else insertUserDocs
runConduit $ zipSources (yieldMany [1..]) docsC nlpServer <- view $ nlpServerGet (_tt_lang la)
runConduit $ zipSources (yieldMany ([1..] :: [Int])) docsC
.| CList.chunksOf 100 .| CList.chunksOf 100
.| mapM_C (\docs -> void $ insertDocs' docs >>= Doc.add userCorpusId) .| mapM_C (addDocumentsWithProgress nlpServer userCorpusId)
.| sinkNull .| sinkNull
$(logLocM) DEBUG "Calling flowCorpusUser" $(logLocM) DEBUG "Calling flowCorpusUser"
flowCorpusUser (la ^. tt_lang) u userCorpusId listId c mfslw flowCorpusUser (la ^. tt_lang) u userCorpusId listId c mfslw
where where
insertDocs' :: [(Integer, a)] -> m [NodeId] addDocumentsWithProgress :: NLPServerConfig -> CorpusId -> [(Int, a)] -> m ()
insertDocs' [] = pure [] addDocumentsWithProgress nlpServer userCorpusId docsChunk = do
insertDocs' docs = do $(logLocM) DEBUG $ T.pack $ "calling insertDoc, ([idx], mLength) = " <> show (fst <$> docsChunk, mLength)
ncs <- view $ nlpServerGet (_tt_lang la) docs <- addDocumentsToHyperCorpus nlpServer c la userCorpusId (map snd docsChunk)
$(logLocM) DEBUG $ T.pack $ "calling insertDoc, ([idx], mLength) = " <> show (fst <$> docs, mLength)
ids <- insertMasterDocs ncs c la (snd <$> docs)
markProgress (length docs) jobHandle markProgress (length docs) jobHandle
pure ids
-- | Given a list of corpus documents and a 'NodeId' identifying the 'CorpusId', adds -- | Given a list of corpus documents and a 'NodeId' identifying the 'CorpusId', adds
-- the given documents to the corpus. Returns the Ids of the inserted documents. -- the given documents to the corpus. Returns the Ids of the inserted documents.
addDocumentsToHyperCorpus :: (DbCmd' env err m, HasNodeError err) addDocumentsToHyperCorpus :: ( DbCmd' env err m
=> NLPServerConfig , HasNodeError err
-> Maybe HyperdataCorpus , FlowCorpus document
-> TermType Lang , MkCorpus corpus
-> CorpusId )
-> [HyperdataDocument] => NLPServerConfig
-> m [DocId] -> Maybe corpus
-> TermType Lang
-> CorpusId
-> [document]
-> m [DocId]
addDocumentsToHyperCorpus ncs mb_hyper la corpusId docs = do addDocumentsToHyperCorpus ncs mb_hyper la corpusId docs = do
ids <- insertMasterDocs ncs mb_hyper la docs ids <- insertMasterDocs ncs mb_hyper la docs
void $ Doc.add corpusId ids void $ Doc.add corpusId ids
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment