Commit b1aae86b authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Generalise addDocumentsToHyperCorpus

parent 223d0d09
...@@ -16,17 +16,18 @@ import Gargantext.API.Admin.EnvTypes (GargJob(..), Env) ...@@ -16,17 +16,18 @@ import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs) import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Prelude import Gargantext.API.Prelude
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.NLP (nlpServerGet)
import Gargantext.Core.Text.Corpus.Parsers.Date (dateSplit) import Gargantext.Core.Text.Corpus.Parsers.Date (dateSplit)
import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix)
import Gargantext.Database.Action.Flow.Types
import Gargantext.Core.Text.Terms (TermType(..)) import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix)
import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus) import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus)
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Admin.Types.Hyperdata.Corpus
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType') import Gargantext.Database.Query.Table.Node (getClosestParentIdByType')
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..)) import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Gargantext.Core.NLP (nlpServerGet)
data DocumentUpload = DocumentUpload data DocumentUpload = DocumentUpload
...@@ -118,4 +119,4 @@ documentUpload nId doc = do ...@@ -118,4 +119,4 @@ documentUpload nId doc = do
let lang = EN let lang = EN
ncs <- view $ nlpServerGet lang ncs <- view $ nlpServerGet lang
addDocumentsToHyperCorpus ncs Nothing (Multi lang) cId [hd] addDocumentsToHyperCorpus ncs (Nothing :: Maybe HyperdataCorpus) (Multi lang) cId [hd]
...@@ -288,32 +288,35 @@ flow :: forall env err m a c. ...@@ -288,32 +288,35 @@ flow :: forall env err m a c.
flow c u cn la mfslw (mLength, docsC) jobHandle = do flow c u cn la mfslw (mLength, docsC) jobHandle = do
(_userId, userCorpusId, listId) <- createNodes u cn c (_userId, userCorpusId, listId) <- createNodes u cn c
-- TODO if public insertMasterDocs else insertUserDocs -- TODO if public insertMasterDocs else insertUserDocs
runConduit $ zipSources (yieldMany [1..]) docsC nlpServer <- view $ nlpServerGet (_tt_lang la)
runConduit $ zipSources (yieldMany ([1..] :: [Int])) docsC
.| CList.chunksOf 100 .| CList.chunksOf 100
.| mapM_C (\docs -> void $ insertDocs' docs >>= Doc.add userCorpusId) .| mapM_C (addDocumentsWithProgress nlpServer userCorpusId)
.| sinkNull .| sinkNull
$(logLocM) DEBUG "Calling flowCorpusUser" $(logLocM) DEBUG "Calling flowCorpusUser"
flowCorpusUser (la ^. tt_lang) u userCorpusId listId c mfslw flowCorpusUser (la ^. tt_lang) u userCorpusId listId c mfslw
where where
insertDocs' :: [(Integer, a)] -> m [NodeId] addDocumentsWithProgress :: NLPServerConfig -> CorpusId -> [(Int, a)] -> m ()
insertDocs' [] = pure [] addDocumentsWithProgress nlpServer userCorpusId docsChunk = do
insertDocs' docs = do $(logLocM) DEBUG $ T.pack $ "calling insertDoc, ([idx], mLength) = " <> show (fst <$> docsChunk, mLength)
ncs <- view $ nlpServerGet (_tt_lang la) docs <- addDocumentsToHyperCorpus nlpServer c la userCorpusId (map snd docsChunk)
$(logLocM) DEBUG $ T.pack $ "calling insertDoc, ([idx], mLength) = " <> show (fst <$> docs, mLength)
ids <- insertMasterDocs ncs c la (snd <$> docs)
markProgress (length docs) jobHandle markProgress (length docs) jobHandle
pure ids
-- | Given a list of corpus documents and a 'NodeId' identifying the 'CorpusId', adds -- | Given a list of corpus documents and a 'NodeId' identifying the 'CorpusId', adds
-- the given documents to the corpus. Returns the Ids of the inserted documents. -- the given documents to the corpus. Returns the Ids of the inserted documents.
addDocumentsToHyperCorpus :: (DbCmd' env err m, HasNodeError err) addDocumentsToHyperCorpus :: ( DbCmd' env err m
, HasNodeError err
, FlowCorpus document
, MkCorpus corpus
)
=> NLPServerConfig => NLPServerConfig
-> Maybe HyperdataCorpus -> Maybe corpus
-> TermType Lang -> TermType Lang
-> CorpusId -> CorpusId
-> [HyperdataDocument] -> [document]
-> m [DocId] -> m [DocId]
addDocumentsToHyperCorpus ncs mb_hyper la corpusId docs = do addDocumentsToHyperCorpus ncs mb_hyper la corpusId docs = do
ids <- insertMasterDocs ncs mb_hyper la docs ids <- insertMasterDocs ncs mb_hyper la docs
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment