Commit f280b7e1 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Minor cleanup of flow functions

parent 64987614
...@@ -87,7 +87,7 @@ import Gargantext.Core.Types.Main ( ListType(MapTerm) ) ...@@ -87,7 +87,7 @@ import Gargantext.Core.Types.Main ( ListType(MapTerm) )
import Gargantext.Database.Action.Flow.Extract () -- ExtractNgramsT instances import Gargantext.Database.Action.Flow.Extract () -- ExtractNgramsT instances
import Gargantext.Database.Action.Flow.List ( flowList_DbRepo, toNodeNgramsW' ) import Gargantext.Database.Action.Flow.List ( flowList_DbRepo, toNodeNgramsW' )
import Gargantext.Database.Action.Flow.Types ( do_api, DataOrigin(..), DataText(..), FlowCorpus ) import Gargantext.Database.Action.Flow.Types ( do_api, DataOrigin(..), DataText(..), FlowCorpus )
import Gargantext.Database.Action.Flow.Utils (documentIdWithNgrams, insertDocNgrams, insertDocs, mapNodeIdNgrams, ngramsByDoc) import Gargantext.Database.Action.Flow.Utils (insertDocNgrams, insertDocs, mkNodeIdNgramsMap, ngramsByDoc, mapDocumentIdWithNgrams)
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore) import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore)
import Gargantext.Database.Action.Search (searchDocInDatabase) import Gargantext.Database.Action.Search (searchDocInDatabase)
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact ) import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact )
...@@ -441,15 +441,15 @@ insertMasterDocs cfg nlpServer c lang hs = do ...@@ -441,15 +441,15 @@ insertMasterDocs cfg nlpServer c lang hs = do
-- add documents to the corpus (create node_node link) -- add documents to the corpus (create node_node link)
-- this will enable global database monitoring -- this will enable global database monitoring
mapNgramsDocs' :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount))) ngramsDocsMap :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
<- mapNodeIdNgrams <- mkNodeIdNgramsMap
<$> documentIdWithNgrams <$> mapDocumentIdWithNgrams
(extractNgrams nlpServer $ withLang lang documentsWithId) (extractNgrams nlpServer $ withLang lang documentsWithId)
(map (B.first contextId2NodeId) documentsWithId) (map (B.first contextId2NodeId) documentsWithId)
runDBTx $ do runDBTx $ do
lId <- getOrMkList masterCorpusId masterUserId lId <- getOrMkList masterCorpusId masterUserId
_ <- saveDocNgramsWith lId mapNgramsDocs' _ <- saveDocNgramsWith lId ngramsDocsMap
pure $ map contextId2NodeId ids' pure $ map contextId2NodeId ids'
......
...@@ -12,9 +12,10 @@ Portability : POSIX ...@@ -12,9 +12,10 @@ Portability : POSIX
module Gargantext.Database.Action.Flow.Utils module Gargantext.Database.Action.Flow.Utils
( docNgrams ( docNgrams
, documentIdWithNgrams , documentIdWithNgrams
, mapDocumentIdWithNgrams
, insertDocNgrams , insertDocNgrams
, insertDocs , insertDocs
, mapNodeIdNgrams , mkNodeIdNgramsMap
, ngramsByDoc ) , ngramsByDoc )
where where
...@@ -38,7 +39,6 @@ import Gargantext.Database.Prelude ...@@ -38,7 +39,6 @@ import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.ContextNodeNgrams ( ContextNodeNgramsPoly(..), insertContextNodeNgrams ) import Gargantext.Database.Query.Table.ContextNodeNgrams ( ContextNodeNgramsPoly(..), insertContextNodeNgrams )
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add) import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add)
import Gargantext.Database.Query.Table.Node.Document.Insert (ReturnId, addUniqId, insertDb, reId, reInserted, reUniqId) import Gargantext.Database.Query.Table.Node.Document.Insert (ReturnId, addUniqId, insertDb, reId, reInserted, reUniqId)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Schema.Context (context_oid_hyperdata, context_oid_id) import Gargantext.Database.Schema.Context (context_oid_hyperdata, context_oid_id)
import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTypeId(..), text2ngrams) import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTypeId(..), text2ngrams)
import Gargantext.Database.Types ( Indexed(..), index ) import Gargantext.Database.Types ( Indexed(..), index )
...@@ -85,26 +85,32 @@ docNgrams lang ts doc = ...@@ -85,26 +85,32 @@ docNgrams lang ts doc =
) )
documentIdWithNgrams :: HasNodeError err documentIdWithNgrams :: Monad m
=> ( a => ( a -> m (HashMap.HashMap b (Map NgramsType TermsWeight, TermsCount)) )
-> DBCmd err (HashMap.HashMap b (Map NgramsType TermsWeight, TermsCount)) ) -> Indexed NodeId a
-> [Indexed NodeId a] -> m (DocumentIdWithNgrams a b)
-> DBCmd err [DocumentIdWithNgrams a b] documentIdWithNgrams f = toDocumentIdWithNgrams
documentIdWithNgrams f = traverse toDocumentIdWithNgrams
where where
toDocumentIdWithNgrams d = do toDocumentIdWithNgrams d = do
e <- f $ _unIndex d e <- f $ _unIndex d
pure $ DocumentIdWithNgrams d e pure $ DocumentIdWithNgrams d e
mapDocumentIdWithNgrams :: Monad m
=> ( a -> m (HashMap.HashMap b (Map NgramsType TermsWeight, TermsCount)) )
-> [Indexed NodeId a]
-> m [DocumentIdWithNgrams a b]
mapDocumentIdWithNgrams f = mapM (documentIdWithNgrams f)
-- | TODO check optimization
mapNodeIdNgrams :: (Ord b, Hashable b) -- | Creates a NodeIdNgrams map out of the input 'DocumentIdWithNgrams' list.
-- TODO check optimization
mkNodeIdNgramsMap :: (Ord b, Hashable b)
=> [DocumentIdWithNgrams a b] => [DocumentIdWithNgrams a b]
-> HashMap.HashMap b -> HashMap.HashMap b
(Map NgramsType (Map NgramsType
(Map NodeId (TermsWeight, TermsCount)) (Map NodeId (TermsWeight, TermsCount))
) )
mapNodeIdNgrams = HashMap.unionsWith (DM.unionWith (DM.unionWith addTuples)) . fmap f mkNodeIdNgramsMap = HashMap.unionsWith (DM.unionWith (DM.unionWith addTuples)) . fmap f
where where
-- | NOTE We are somehow multiplying 'TermsCount' here: If the -- | NOTE We are somehow multiplying 'TermsCount' here: If the
-- same ngrams term has different ngrams types, the 'TermsCount' -- same ngrams term has different ngrams types, the 'TermsCount'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment