Commit 47ab0d5b authored by Alexandre Delanoë's avatar Alexandre Delanoë

[OPTIM] Insert ngrams

parent 4d140947
Pipeline #1372 failed with stage
...@@ -448,19 +448,20 @@ indexAllDocumentsWithPosTag = do ...@@ -448,19 +448,20 @@ indexAllDocumentsWithPosTag = do
corpusIds <- findNodesId rootId [NodeCorpus] corpusIds <- findNodesId rootId [NodeCorpus]
docs <- List.concat <$> mapM getDocumentsWithParentId corpusIds docs <- List.concat <$> mapM getDocumentsWithParentId corpusIds
printDebug "Nb of docs" (List.length docs) _ <- mapM extractInsert (splitEvery 1000 docs)
pure ()
extractInsert :: FlowCmdM env err m => [Node HyperdataDocument] -> m ()
extractInsert docs = do
let documentsWithId = map (\doc -> Indexed (doc ^. node_id) doc) docs let documentsWithId = map (\doc -> Indexed (doc ^. node_id) doc) docs
mapNgramsDocs' :: HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int)) mapNgramsDocs' <- mapNodeIdNgrams
<- mapNodeIdNgrams
<$> documentIdWithNgrams <$> documentIdWithNgrams
(extractNgramsT $ withLang (Multi EN) documentsWithId) (extractNgramsT $ withLang (Multi EN) documentsWithId)
documentsWithId documentsWithId
_ <- mapM insertExtractedNgrams _ <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
$ splitEvery 500
$ HashMap.keys mapNgramsDocs'
pure () pure ()
...@@ -471,7 +472,3 @@ indexAllDocumentsWithPosTag = do ...@@ -471,7 +472,3 @@ indexAllDocumentsWithPosTag = do
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment