Commit 38867db9 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[REFACT][FLOW] insertDocs fun

parent 531b18c3
Pipeline #1017 failed with stage
{-|
Module : Gargantext.API.Flow
Description : Main Flow API DataTypes
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Flow
where
-- import Gargantext.API.Prelude
import Gargantext.Prelude
data InputFlow = TextsInput
| NgramsInput
| ListInput
data Flow = EndFlow
| Texts InputFlow [Flow]
| Ngrams InputFlow [Flow]
| Lists InputFlow [Flow]
data OutputFlow
flow :: Flow -> OutputFlow
flow = undefined
...@@ -186,6 +186,7 @@ flow :: (FlowCmdM env err m, FlowCorpus a, MkCorpus c) ...@@ -186,6 +186,7 @@ flow :: (FlowCmdM env err m, FlowCorpus a, MkCorpus c)
-> [[a]] -> [[a]]
-> m CorpusId -> m CorpusId
flow c u cn la docs = do flow c u cn la docs = do
-- TODO if public insertMasterDocs else insertUserDocs
ids <- traverse (insertMasterDocs c la) docs ids <- traverse (insertMasterDocs c la) docs
flowCorpusUser (la ^. tt_lang) u cn c (concat ids) flowCorpusUser (la ^. tt_lang) u cn c (concat ids)
...@@ -224,6 +225,23 @@ flowCorpusUser l user corpusName ctype ids = do ...@@ -224,6 +225,23 @@ flowCorpusUser l user corpusName ctype ids = do
-- _ <- mkAnnuaire rootUserId userId -- _ <- mkAnnuaire rootUserId userId
pure userCorpusId pure userCorpusId
-- TODO Type NodeDocumentUnicised
insertDocs :: ( FlowCmdM env err m
, FlowCorpus a
)
=> [a]
-> UserId
-> CorpusId
-> m ([DocId], [DocumentWithId a])
insertDocs hs uId cId = do
let docs = map addUniqId hs
ids <- insertDb uId cId docs
let
ids' = map reId ids
documentsWithId = mergeData (toInserted ids) (Map.fromList $ map viewUniqId' docs)
_ <- Doc.add cId ids'
pure (ids', documentsWithId)
insertMasterDocs :: ( FlowCmdM env err m insertMasterDocs :: ( FlowCmdM env err m
, FlowCorpus a , FlowCorpus a
...@@ -235,46 +253,39 @@ insertMasterDocs :: ( FlowCmdM env err m ...@@ -235,46 +253,39 @@ insertMasterDocs :: ( FlowCmdM env err m
-> m [DocId] -> m [DocId]
insertMasterDocs c lang hs = do insertMasterDocs c lang hs = do
(masterUserId, _, masterCorpusId) <- getOrMk_RootWithCorpus (UserName userMaster) (Left corpusMasterName) c (masterUserId, _, masterCorpusId) <- getOrMk_RootWithCorpus (UserName userMaster) (Left corpusMasterName) c
(ids', documentsWithId) <- insertDocs hs masterUserId masterCorpusId
-- TODO Type NodeDocumentUnicised
let docs = map addUniqId hs
ids <- insertDb masterUserId masterCorpusId docs
let
ids' = map reId ids
documentsWithId = mergeData (toInserted ids) (Map.fromList $ map viewUniqId' docs)
-- TODO -- TODO
-- create a corpus with database name (CSV or PubMed) -- create a corpus with database name (CSV or PubMed)
-- add documents to the corpus (create node_node link) -- add documents to the corpus (create node_node link)
-- this will enable global database monitoring -- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int)) -- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
maps <- mapNodeIdNgrams mapNgramsDocs <- mapNodeIdNgrams
<$> documentIdWithNgrams (extractNgramsT $ withLang lang documentsWithId) documentsWithId <$> documentIdWithNgrams (extractNgramsT $ withLang lang documentsWithId) documentsWithId
terms2id <- insertNgrams $ Map.keys maps terms2id <- insertNgrams $ Map.keys mapNgramsDocs
-- to be removed -- to be removed
let indexedNgrams = Map.mapKeys (indexNgrams terms2id) maps let indexedNgrams = Map.mapKeys (indexNgrams terms2id) mapNgramsDocs
-- new -- new
lId <- getOrMkList masterCorpusId masterUserId lId <- getOrMkList masterCorpusId masterUserId
mapCgramsId <- listInsertDb lId toNodeNgramsW' mapCgramsId <- listInsertDb lId toNodeNgramsW'
$ map (first _ngramsTerms . second Map.keys) $ map (first _ngramsTerms . second Map.keys)
$ Map.toList maps $ Map.toList mapNgramsDocs
-- insertDocNgrams -- insertDocNgrams
_return <- insertNodeNodeNgrams2 _return <- insertNodeNodeNgrams2
$ catMaybes [ NodeNodeNgrams2 <$> Just nId $ catMaybes [ NodeNodeNgrams2 <$> Just nId
<*> getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms'') <*> getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms'')
<*> Just (fromIntegral w :: Double) <*> Just (fromIntegral w :: Double)
| (terms'', mapNgramsTypes) <- Map.toList maps | (terms'', mapNgramsTypes) <- Map.toList mapNgramsDocs
, (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes , (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
, (nId, w) <- Map.toList mapNodeIdWeight , (nId, w) <- Map.toList mapNodeIdWeight
] ]
_ <- Doc.add masterCorpusId ids'
_cooc <- insertDefaultNode NodeListCooc lId masterUserId _cooc <- insertDefaultNode NodeListCooc lId masterUserId
-- to be removed -- to be removed
_ <- insertDocNgrams lId indexedNgrams _ <- insertDocNgrams lId indexedNgrams
pure ids' pure ids'
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -319,7 +330,6 @@ documentIdWithNgrams f = traverse toDocumentIdWithNgrams ...@@ -319,7 +330,6 @@ documentIdWithNgrams f = traverse toDocumentIdWithNgrams
e <- f $ documentData d e <- f $ documentData d
pure $ DocumentIdWithNgrams d e pure $ DocumentIdWithNgrams d e
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance ExtractNgramsT HyperdataContact instance ExtractNgramsT HyperdataContact
where where
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment