Commit 05d58045 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

insertMasterDocs can now report all failures

This commit introduces a version of `insertMasterDoc` that can process
each document insertion separately and collect any exception thrown by
the DB layer, returning at the very end a list of failures (if any)
alongside the inserted documents.

Failures can then be logged and reported to the user and/or in the
server logs.
parent 0c2fb6fb
Pipeline #7654 passed with stages
in 67 minutes and 36 seconds
...@@ -90,7 +90,7 @@ import Gargantext.Core.Types.Main ( ListType(MapTerm) ) ...@@ -90,7 +90,7 @@ import Gargantext.Core.Types.Main ( ListType(MapTerm) )
import Gargantext.Database.Action.Flow.Extract () -- ExtractNgramsT instances import Gargantext.Database.Action.Flow.Extract () -- ExtractNgramsT instances
import Gargantext.Database.Action.Flow.List ( flowList_DbRepo, toNodeNgramsW' ) import Gargantext.Database.Action.Flow.List ( flowList_DbRepo, toNodeNgramsW' )
import Gargantext.Database.Action.Flow.Types ( do_api, DataOrigin(..), DataText(..), FlowCorpus, DocumentIdWithNgrams (..) ) import Gargantext.Database.Action.Flow.Types ( do_api, DataOrigin(..), DataText(..), FlowCorpus, DocumentIdWithNgrams (..) )
import Gargantext.Database.Action.Flow.Utils (insertDocNgrams, insertDocs, mkNodeIdNgramsMap, ngramsByDoc, documentIdWithNgrams) import Gargantext.Database.Action.Flow.Utils (insertDocNgrams, mkNodeIdNgramsMap, ngramsByDoc, documentIdWithNgrams, insertDoc)
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore) import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore)
import Gargantext.Database.Action.Search (searchDocInDatabase) import Gargantext.Database.Action.Search (searchDocInDatabase)
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact ) import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact )
...@@ -109,7 +109,7 @@ import Gargantext.Database.Query.Tree.Root (MkCorpusUser(..), getOrMkRoot, getOr ...@@ -109,7 +109,7 @@ import Gargantext.Database.Query.Tree.Root (MkCorpusUser(..), getOrMkRoot, getOr
import Gargantext.Database.Schema.Ngrams ( indexNgrams, NgramsId ) import Gargantext.Database.Schema.Ngrams ( indexNgrams, NgramsId )
import Gargantext.Database.Schema.Node import Gargantext.Database.Schema.Node
import Gargantext.Database.Types import Gargantext.Database.Types
import Gargantext.Prelude hiding (catch, onException, to) import Gargantext.Prelude hiding (try, catch, onException, to)
import Gargantext.System.Logging ( logLocM, LogLevel(DEBUG, ERROR), MonadLogger ) import Gargantext.System.Logging ( logLocM, LogLevel(DEBUG, ERROR), MonadLogger )
import Gargantext.Utils.Jobs.Error (HumanFriendlyErrorText(..)) import Gargantext.Utils.Jobs.Error (HumanFriendlyErrorText(..))
import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..) ) import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..) )
...@@ -335,7 +335,7 @@ addDocumentsToHyperCorpus jobHandle mb_hyper la corpusId docs = do ...@@ -335,7 +335,7 @@ addDocumentsToHyperCorpus jobHandle mb_hyper la corpusId docs = do
-- First extract all the ngrams for the input documents via the nlp server, -- First extract all the ngrams for the input documents via the nlp server,
-- log errors (if any) and pass the final result to 'insertMasterDocs'. -- log errors (if any) and pass the final result to 'insertMasterDocs'.
uncommittedNgrams <- extractNgramsFromDocuments nlp la docs uncommittedNgrams <- extractNgramsFromDocuments nlp la docs
(failures, ids) <- runDBTx $ insertMasterDocs cfg uncommittedNgrams mb_hyper docs (failures, ids) <- insertMasterDocs cfg uncommittedNgrams mb_hyper docs
forM_ failures $ \failure -> do forM_ failures $ \failure -> do
case failure of case failure of
NgramsNotFound _mb_hashId docId -> do NgramsNotFound _mb_hashId docId -> do
...@@ -545,18 +545,12 @@ extractNgramsFromDocuments nlpServer lang docs = ...@@ -545,18 +545,12 @@ extractNgramsFromDocuments nlpServer lang docs =
ngrams <- extractNgramsFromDocument nlpServer lang inputDoc ngrams <- extractNgramsFromDocument nlpServer lang inputDoc
pure $ acc <> ngrams pure $ acc <> ngrams
commitNgramsForDocuments :: UniqParameters doc insertMasterDocs :: forall env err doc c m. ( HasNodeError err
=> UncommittedNgrams doc
-> [Indexed ContextId (Node doc)]
-> ([InsertDocError], CommittedNgrams)
commitNgramsForDocuments ng nodes =
let (errs, successes) = partitionEithers $ map (commitNgramsForDocument ng) nodes
in (errs, mconcat successes)
insertMasterDocs :: ( HasNodeError err
, UniqParameters doc , UniqParameters doc
, FlowCorpus doc , FlowCorpus doc
, MkCorpus c , MkCorpus c
, IsDBCmd env err m
, MonadCatch m
) )
=> GargConfig => GargConfig
-> UncommittedNgrams doc -> UncommittedNgrams doc
...@@ -566,22 +560,51 @@ insertMasterDocs :: ( HasNodeError err ...@@ -566,22 +560,51 @@ insertMasterDocs :: ( HasNodeError err
-- with the node being created. -- with the node being created.
-> Maybe c -> Maybe c
-> [doc] -> [doc]
-> DBUpdate err ([InsertDocError], [DocId]) -> m ([InsertDocError], [DocId])
insertMasterDocs cfg uncommittedNgrams c hs = do insertMasterDocs cfg uncommittedNgrams c docs =
bimap reverse reverse <$> foldlM go (mempty, mempty) docs
where
go :: ([InsertDocError], [DocId]) -> doc -> m ([InsertDocError], [DocId])
go (!errs, !documents) doc = do
res <- try $ runDBTx (insertMasterDoc cfg uncommittedNgrams c doc)
case res of
Left err
-> pure (DocumentInsertionError err : errs, documents)
Right (Left err)
-> pure (err : errs, documents)
Right (Right d)
-> pure (errs, d : documents)
insertMasterDoc :: ( HasNodeError err
, UniqParameters doc
, FlowCorpus doc
, MkCorpus c
)
=> GargConfig
-> UncommittedNgrams doc
-- ^ The ngrams extracted for /all/ the documents
-- and indexed by the hash of the given document.
-- We can use this map to associate the document
-- with the node being created.
-> Maybe c
-> doc
-> DBUpdate err (Either InsertDocError DocId)
insertMasterDoc cfg uncommittedNgrams c h = do
(masterUserId, _, masterCorpusId) <- getOrMkRootWithCorpus cfg MkCorpusUserMaster c (masterUserId, _, masterCorpusId) <- getOrMkRootWithCorpus cfg MkCorpusUserMaster c
(ids', documentsWithId) <- insertDocs masterUserId masterCorpusId (map (toNode masterUserId Nothing) hs ) documentWithId <- insertDoc masterUserId masterCorpusId (toNode masterUserId Nothing h)
_ <- Doc.add masterCorpusId ids' _ <- Doc.add masterCorpusId [_index documentWithId]
-- TODO -- TODO
-- create a corpus with database name (CSV or PubMed) -- create a corpus with database name (CSV or PubMed)
-- add documents to the corpus (create node_node link) -- add documents to the corpus (create node_node link)
-- this will enable global database monitoring -- this will enable global database monitoring
let (failedExtraction, ngramsDocsMap) = commitNgramsForDocuments uncommittedNgrams documentsWithId case commitNgramsForDocument uncommittedNgrams documentWithId of
Left failed -> pure $ Left failed
lId <- getOrMkList masterCorpusId masterUserId Right ngramsDocsMap -> do
_ <- saveDocNgramsWith lId ngramsDocsMap lId <- getOrMkList masterCorpusId masterUserId
pure $ (failedExtraction, map contextId2NodeId ids') --FIXME: populate errors _ <- saveDocNgramsWith lId ngramsDocsMap
pure $ Right (contextId2NodeId $ _index documentWithId)
saveDocNgramsWith :: ListId saveDocNgramsWith :: ListId
......
...@@ -16,6 +16,7 @@ module Gargantext.Database.Action.Flow.Utils ...@@ -16,6 +16,7 @@ module Gargantext.Database.Action.Flow.Utils
, mapDocumentIdWithNgrams , mapDocumentIdWithNgrams
, insertDocNgrams , insertDocNgrams
, insertDocs , insertDocs
, insertDoc
, mkNodeIdNgramsMap , mkNodeIdNgramsMap
, ngramsByDoc ) , ngramsByDoc )
where where
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment