Commit 05d58045 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

insertMasterDocs can now report all failures

This commit introduces a version of `insertMasterDoc` that can process
each document insertion separately and collect any exception thrown by
the DB layer, returning at the very end a list of failures (if any)
alongside the inserted documents.

Failures can then be logged and reported to the user and/or in the
server logs.
parent 0c2fb6fb
Pipeline #7654 passed with stages
in 67 minutes and 36 seconds
......@@ -90,7 +90,7 @@ import Gargantext.Core.Types.Main ( ListType(MapTerm) )
import Gargantext.Database.Action.Flow.Extract () -- ExtractNgramsT instances
import Gargantext.Database.Action.Flow.List ( flowList_DbRepo, toNodeNgramsW' )
import Gargantext.Database.Action.Flow.Types ( do_api, DataOrigin(..), DataText(..), FlowCorpus, DocumentIdWithNgrams (..) )
import Gargantext.Database.Action.Flow.Utils (insertDocNgrams, insertDocs, mkNodeIdNgramsMap, ngramsByDoc, documentIdWithNgrams)
import Gargantext.Database.Action.Flow.Utils (insertDocNgrams, mkNodeIdNgramsMap, ngramsByDoc, documentIdWithNgrams, insertDoc)
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore)
import Gargantext.Database.Action.Search (searchDocInDatabase)
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact )
......@@ -109,7 +109,7 @@ import Gargantext.Database.Query.Tree.Root (MkCorpusUser(..), getOrMkRoot, getOr
import Gargantext.Database.Schema.Ngrams ( indexNgrams, NgramsId )
import Gargantext.Database.Schema.Node
import Gargantext.Database.Types
import Gargantext.Prelude hiding (catch, onException, to)
import Gargantext.Prelude hiding (try, catch, onException, to)
import Gargantext.System.Logging ( logLocM, LogLevel(DEBUG, ERROR), MonadLogger )
import Gargantext.Utils.Jobs.Error (HumanFriendlyErrorText(..))
import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..) )
......@@ -335,7 +335,7 @@ addDocumentsToHyperCorpus jobHandle mb_hyper la corpusId docs = do
-- First extract all the ngrams for the input documents via the nlp server,
-- log errors (if any) and pass the final result to 'insertMasterDocs'.
uncommittedNgrams <- extractNgramsFromDocuments nlp la docs
(failures, ids) <- runDBTx $ insertMasterDocs cfg uncommittedNgrams mb_hyper docs
(failures, ids) <- insertMasterDocs cfg uncommittedNgrams mb_hyper docs
forM_ failures $ \failure -> do
case failure of
NgramsNotFound _mb_hashId docId -> do
......@@ -545,18 +545,12 @@ extractNgramsFromDocuments nlpServer lang docs =
ngrams <- extractNgramsFromDocument nlpServer lang inputDoc
pure $ acc <> ngrams
commitNgramsForDocuments :: UniqParameters doc
=> UncommittedNgrams doc
-> [Indexed ContextId (Node doc)]
-> ([InsertDocError], CommittedNgrams)
commitNgramsForDocuments ng nodes =
let (errs, successes) = partitionEithers $ map (commitNgramsForDocument ng) nodes
in (errs, mconcat successes)
insertMasterDocs :: ( HasNodeError err
insertMasterDocs :: forall env err doc c m. ( HasNodeError err
, UniqParameters doc
, FlowCorpus doc
, MkCorpus c
, IsDBCmd env err m
, MonadCatch m
)
=> GargConfig
-> UncommittedNgrams doc
......@@ -566,22 +560,51 @@ insertMasterDocs :: ( HasNodeError err
-- with the node being created.
-> Maybe c
-> [doc]
-> DBUpdate err ([InsertDocError], [DocId])
insertMasterDocs cfg uncommittedNgrams c hs = do
-> m ([InsertDocError], [DocId])
insertMasterDocs cfg uncommittedNgrams c docs =
bimap reverse reverse <$> foldlM go (mempty, mempty) docs
where
go :: ([InsertDocError], [DocId]) -> doc -> m ([InsertDocError], [DocId])
go (!errs, !documents) doc = do
res <- try $ runDBTx (insertMasterDoc cfg uncommittedNgrams c doc)
case res of
Left err
-> pure (DocumentInsertionError err : errs, documents)
Right (Left err)
-> pure (err : errs, documents)
Right (Right d)
-> pure (errs, d : documents)
insertMasterDoc :: ( HasNodeError err
, UniqParameters doc
, FlowCorpus doc
, MkCorpus c
)
=> GargConfig
-> UncommittedNgrams doc
-- ^ The ngrams extracted for /all/ the documents
-- and indexed by the hash of the given document.
-- We can use this map to associate the document
-- with the node being created.
-> Maybe c
-> doc
-> DBUpdate err (Either InsertDocError DocId)
insertMasterDoc cfg uncommittedNgrams c h = do
(masterUserId, _, masterCorpusId) <- getOrMkRootWithCorpus cfg MkCorpusUserMaster c
(ids', documentsWithId) <- insertDocs masterUserId masterCorpusId (map (toNode masterUserId Nothing) hs )
_ <- Doc.add masterCorpusId ids'
documentWithId <- insertDoc masterUserId masterCorpusId (toNode masterUserId Nothing h)
_ <- Doc.add masterCorpusId [_index documentWithId]
-- TODO
-- create a corpus with database name (CSV or PubMed)
-- add documents to the corpus (create node_node link)
-- this will enable global database monitoring
let (failedExtraction, ngramsDocsMap) = commitNgramsForDocuments uncommittedNgrams documentsWithId
lId <- getOrMkList masterCorpusId masterUserId
_ <- saveDocNgramsWith lId ngramsDocsMap
pure $ (failedExtraction, map contextId2NodeId ids') --FIXME: populate errors
case commitNgramsForDocument uncommittedNgrams documentWithId of
Left failed -> pure $ Left failed
Right ngramsDocsMap -> do
lId <- getOrMkList masterCorpusId masterUserId
_ <- saveDocNgramsWith lId ngramsDocsMap
pure $ Right (contextId2NodeId $ _index documentWithId)
saveDocNgramsWith :: ListId
......
......@@ -16,6 +16,7 @@ module Gargantext.Database.Action.Flow.Utils
, mapDocumentIdWithNgrams
, insertDocNgrams
, insertDocs
, insertDoc
, mkNodeIdNgramsMap
, ngramsByDoc )
where
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment