Commit b1aae86b authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Generalise addDocumentsToHyperCorpus

parent 223d0d09
Pipeline #4557 passed with stages
in 38 minutes and 47 seconds
......@@ -16,17 +16,18 @@ import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Prelude
import Gargantext.Core (Lang(..))
import Gargantext.Core.NLP (nlpServerGet)
import Gargantext.Core.Text.Corpus.Parsers.Date (dateSplit)
import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix)
import Gargantext.Database.Action.Flow.Types
import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix)
import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus)
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Admin.Types.Hyperdata.Corpus
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType')
import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Gargantext.Core.NLP (nlpServerGet)
data DocumentUpload = DocumentUpload
......@@ -118,4 +119,4 @@ documentUpload nId doc = do
let lang = EN
ncs <- view $ nlpServerGet lang
addDocumentsToHyperCorpus ncs Nothing (Multi lang) cId [hd]
addDocumentsToHyperCorpus ncs (Nothing :: Maybe HyperdataCorpus) (Multi lang) cId [hd]
......@@ -288,33 +288,36 @@ flow :: forall env err m a c.
flow c u cn la mfslw (mLength, docsC) jobHandle = do
(_userId, userCorpusId, listId) <- createNodes u cn c
-- TODO if public insertMasterDocs else insertUserDocs
runConduit $ zipSources (yieldMany [1..]) docsC
nlpServer <- view $ nlpServerGet (_tt_lang la)
runConduit $ zipSources (yieldMany ([1..] :: [Int])) docsC
.| CList.chunksOf 100
.| mapM_C (\docs -> void $ insertDocs' docs >>= Doc.add userCorpusId)
.| mapM_C (addDocumentsWithProgress nlpServer userCorpusId)
.| sinkNull
$(logLocM) DEBUG "Calling flowCorpusUser"
flowCorpusUser (la ^. tt_lang) u userCorpusId listId c mfslw
where
insertDocs' :: [(Integer, a)] -> m [NodeId]
insertDocs' [] = pure []
insertDocs' docs = do
ncs <- view $ nlpServerGet (_tt_lang la)
$(logLocM) DEBUG $ T.pack $ "calling insertDoc, ([idx], mLength) = " <> show (fst <$> docs, mLength)
ids <- insertMasterDocs ncs c la (snd <$> docs)
addDocumentsWithProgress :: NLPServerConfig -> CorpusId -> [(Int, a)] -> m ()
addDocumentsWithProgress nlpServer userCorpusId docsChunk = do
$(logLocM) DEBUG $ T.pack $ "calling insertDoc, ([idx], mLength) = " <> show (fst <$> docsChunk, mLength)
docs <- addDocumentsToHyperCorpus nlpServer c la userCorpusId (map snd docsChunk)
markProgress (length docs) jobHandle
pure ids
-- | Given a list of corpus documents and a 'NodeId' identifying the 'CorpusId', adds
-- the given documents to the corpus. Returns the Ids of the inserted documents.
addDocumentsToHyperCorpus :: (DbCmd' env err m, HasNodeError err)
=> NLPServerConfig
-> Maybe HyperdataCorpus
-> TermType Lang
-> CorpusId
-> [HyperdataDocument]
-> m [DocId]
addDocumentsToHyperCorpus :: ( DbCmd' env err m
, HasNodeError err
, FlowCorpus document
, MkCorpus corpus
)
=> NLPServerConfig
-> Maybe corpus
-> TermType Lang
-> CorpusId
-> [document]
-> m [DocId]
addDocumentsToHyperCorpus ncs mb_hyper la corpusId docs = do
ids <- insertMasterDocs ncs mb_hyper la docs
void $ Doc.add corpusId ids
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment