Commit 47ab0d5b authored by Alexandre Delanoë's avatar Alexandre Delanoë

[OPTIM] Insert ngrams

parent 4d140947
Pipeline #1372 failed with stage
......@@ -448,19 +448,20 @@ indexAllDocumentsWithPosTag = do
corpusIds <- findNodesId rootId [NodeCorpus]
docs <- List.concat <$> mapM getDocumentsWithParentId corpusIds
printDebug "Nb of docs" (List.length docs)
_ <- mapM extractInsert (splitEvery 1000 docs)
pure ()
extractInsert :: FlowCmdM env err m => [Node HyperdataDocument] -> m ()
extractInsert docs = do
let documentsWithId = map (\doc -> Indexed (doc ^. node_id) doc) docs
mapNgramsDocs' :: HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int))
<- mapNodeIdNgrams
mapNgramsDocs' <- mapNodeIdNgrams
<$> documentIdWithNgrams
(extractNgramsT $ withLang (Multi EN) documentsWithId)
documentsWithId
_ <- mapM insertExtractedNgrams
$ splitEvery 500
$ HashMap.keys mapNgramsDocs'
_ <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
pure ()
......@@ -471,7 +472,3 @@ indexAllDocumentsWithPosTag = do
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment