Commit efa77558 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FLOW] InsertDocument in corpus rewritten.

parent c8538e14
...@@ -7,6 +7,11 @@ Maintainer : team@gargantext.org ...@@ -7,6 +7,11 @@ Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
-- TODO-ACCESS:
-- check userId CanFillUserCorpus userCorpusId
-- check masterUserId CanFillMasterCorpus masterCorpusId
-} -}
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
...@@ -43,7 +48,7 @@ import Gargantext.Database.Flow.Utils (insertToNodeNgrams) ...@@ -43,7 +48,7 @@ import Gargantext.Database.Flow.Utils (insertToNodeNgrams)
import Gargantext.Text.Terms (extractTerms) import Gargantext.Text.Terms (extractTerms)
import Gargantext.Text.Metrics.TFICF (Tficf(..)) import Gargantext.Text.Metrics.TFICF (Tficf(..))
import Gargantext.Database.Metrics.Count (getNgramsElementsWithParentNodeId) import Gargantext.Database.Metrics.Count (getNgramsElementsWithParentNodeId)
import Gargantext.Database.Node.Document.Add (add) import qualified Gargantext.Database.Node.Document.Add as Doc (add)
import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..)) import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Root (getRoot) import Gargantext.Database.Root (getRoot)
import Gargantext.Database.Schema.Ngrams (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId) import Gargantext.Database.Schema.Ngrams (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
...@@ -72,52 +77,54 @@ type FlowCmdM env err m = ...@@ -72,52 +77,54 @@ type FlowCmdM env err m =
, HasRepoVar env , HasRepoVar env
) )
type DocId = NodeId
flowCorpus :: FlowCmdM env ServantErr m flowCorpus :: FlowCmdM env ServantErr m
=> FileFormat -> FilePath -> CorpusName -> m CorpusId => Username -> FileFormat -> FilePath -> CorpusName -> m CorpusId
flowCorpus ff fp cName = do flowCorpus userName ff fp corpusName = do
--insertUsers [gargantuaUser, simpleUser]
hyperdataDocuments' <- map addUniqIdsDoc <$> liftIO (parseDocs ff fp)
params <- flowInsert NodeCorpus hyperdataDocuments' cName
flowCorpus' NodeCorpus hyperdataDocuments' params
-- Master Flow
docs <- map addUniqIdsDoc <$> liftIO (parseDocs ff fp)
-- ChunkAlong needed for big corpora
ids <- mapM insertMasterDocs $ chunkAlong 10000 10000 docs
flowInsert :: HasNodeError err => NodeType -> [HyperdataDocument] -> CorpusName -- User Flow
-> Cmd err ([ReturnId], MasterUserId, MasterCorpusId, UserId, CorpusId) (userId, _rootId, userCorpusId) <- getOrMkRootWithCorpus userName corpusName
flowInsert _nt hyperdataDocuments cName = do -- TODO: check if present already, ignore
let hyperdataDocuments' = map (\h -> ToDbDocument h) hyperdataDocuments _ <- Doc.add userCorpusId $ concat ids
(masterUserId, _, masterCorpusId) <- subFlowCorpus userMaster corpusMasterName -- User List Flow
ids <- insertDocuments masterUserId masterCorpusId NodeDocument hyperdataDocuments' -- ngs <- getNgramsElementsWithParentNodeId masterCorpusId
--_masterListId <- flowList masterUserId masterCorpusId ngs
--_userListId <- flowListUser userId userCorpusId ngs 100
(userId, _, userCorpusId) <- subFlowCorpus userArbitrary cName -- User Graph Flow
_ <- add userCorpusId (map reId ids) _ <- mkGraph userCorpusId userId
pure (ids, masterUserId, masterCorpusId, userId, userCorpusId) -- User Dashboard Flow
_ <- mkDashboard userCorpusId userId
-- Annuaire Flow
-- _ <- mkAnnuaire rootUserId userId
-- TODO-ACCESS: pure userCorpusId
-- check userId CanFillUserCorpus userCorpusId
-- check masterUserId CanFillMasterCorpus masterCorpusId
-- insertMasterDocs :: FlowCmdM env ServantErr m
-- TODO-EVENTS: => [HyperdataDocument] -> m [DocId]
-- InsertedNgrams ? insertMasterDocs hs = do
-- InsertedNodeNgrams ?
flowCorpus' :: FlowCmdM env err m let hyperdataDocuments' = map (\h -> ToDbDocument h) hs
=> NodeType -> [HyperdataDocument]
-> ([ReturnId], UserId, CorpusId, UserId, CorpusId) -- TODO put in State Monad
-> m CorpusId (masterUserId, _, masterCorpusId) <- getOrMkRootWithCorpus userMaster corpusMasterName
flowCorpus' NodeCorpus ids <- insertDocuments masterUserId masterCorpusId NodeDocument hyperdataDocuments'
hyperdataDocuments
( ids let documentsWithId = mergeData (toInserted ids) (toInsert hs)
, masterUserId
, masterCorpusId
, userId,userCorpusId) = do
--------------------------------------------------
let documentsWithId = mergeData (toInserted ids) (toInsert hyperdataDocuments)
--printDebug "documentsWithId" documentsWithId
docsWithNgrams <- documentIdWithNgrams extractNgramsT documentsWithId docsWithNgrams <- documentIdWithNgrams extractNgramsT documentsWithId
--printDebug "docsWithNgrams" docsWithNgrams
let maps = mapNodeIdNgrams docsWithNgrams let maps = mapNodeIdNgrams docsWithNgrams
--printDebug "maps" (maps) --printDebug "maps" (maps)
...@@ -125,57 +132,44 @@ flowCorpus' NodeCorpus ...@@ -125,57 +132,44 @@ flowCorpus' NodeCorpus
let indexedNgrams = DM.mapKeys (indexNgrams terms2id) maps let indexedNgrams = DM.mapKeys (indexNgrams terms2id) maps
--printDebug "inserted ngrams" indexedNgrams --printDebug "inserted ngrams" indexedNgrams
_ <- insertToNodeNgrams indexedNgrams _ <- insertToNodeNgrams indexedNgrams
pure $ map reId ids
getUserCorpusNgrams :: FlowCmdM env ServantErr m
=> CorpusId -> m [Ngrams]
getUserCorpusNgrams = undefined
-- List Ngrams Flow
-- get elements
-- filter by TFICF
--let ngs = ngrams2list' indexedNgrams
ngs <- getNgramsElementsWithParentNodeId masterCorpusId
_masterListId <- flowList masterUserId masterCorpusId ngs
_userListId <- flowListUser userId userCorpusId ngs 100
--------------------------------------------------
_ <- mkDashboard userCorpusId userId
_ <- mkGraph userCorpusId userId
-- Annuaire Flow
-- _ <- mkAnnuaire rootUserId userId
pure userCorpusId
-- del [corpusId2, corpusId]
flowCorpus' NodeAnnuaire
_hyperdataDocuments
( _ids
, _masterUserId
, _masterCorpusId
, _userId
, _userCorpusId) = undefined
flowCorpus' _ _ _ = undefined
type CorpusName = Text type CorpusName = Text
subFlowCorpus :: HasNodeError err getOrMkRootWithCorpus :: HasNodeError err
=> Username -> CorpusName => Username -> CorpusName
-> Cmd err (UserId, RootId, CorpusId) -> Cmd err (UserId, RootId, CorpusId)
subFlowCorpus username cName = do getOrMkRootWithCorpus username cName = do
maybeUserId <- getUser username maybeUserId <- getUser username
userId <- case maybeUserId of userId <- case maybeUserId of
Nothing -> nodeError NoUserFound Nothing -> nodeError NoUserFound
-- mk NodeUser gargantua_id "Node Gargantua"
Just user -> pure $ userLight_id user Just user -> pure $ userLight_id user
--printDebug "userId" userId
rootId' <- map _node_id <$> getRoot username rootId' <- map _node_id <$> getRoot username
--printDebug "rootId'" rootId'
rootId'' <- case rootId' of rootId'' <- case rootId' of
[] -> mkRoot username userId [] -> mkRoot username userId
n -> case length n >= 2 of n -> case length n >= 2 of
True -> nodeError ManyNodeUsers True -> nodeError ManyNodeUsers
False -> pure rootId' False -> pure rootId'
--printDebug "rootId''" rootId''
rootId <- maybe (nodeError NoRootFound) pure (head rootId'') rootId <- maybe (nodeError NoRootFound) pure (head rootId'')
corpusId'' <- if username == userMaster corpusId'' <- if username == userMaster
...@@ -191,11 +185,11 @@ subFlowCorpus username cName = do ...@@ -191,11 +185,11 @@ subFlowCorpus username cName = do
corpusId <- maybe (nodeError NoCorpusFound) pure (head corpusId') corpusId <- maybe (nodeError NoCorpusFound) pure (head corpusId')
--printDebug "(username, userId, rootId, corpusId)"
-- (username, userId, rootId, corpusId)
pure (userId, rootId, corpusId) pure (userId, rootId, corpusId)
------------------------------------------------------------------------ ------------------------------------------------------------------------
toInsert :: [HyperdataDocument] -> Map HashId HyperdataDocument toInsert :: [HyperdataDocument] -> Map HashId HyperdataDocument
toInsert = DM.fromList . map (\d -> (maybe err identity (_hyperdataDocument_uniqId d), d)) toInsert = DM.fromList . map (\d -> (maybe err identity (_hyperdataDocument_uniqId d), d))
...@@ -357,17 +351,6 @@ groupNgramsBy isEqual cId = DM.fromList $ catMaybes [ isEqual n1 n2 | n1 <- DM.k ...@@ -357,17 +351,6 @@ groupNgramsBy isEqual cId = DM.fromList $ catMaybes [ isEqual n1 n2 | n1 <- DM.k
-} -}
-- TODO check: do not insert duplicates
insertGroups :: HasNodeError err
=> ListId
-> Map NgramsIndexed NgramsIndexed
-> Cmd err Int
insertGroups lId ngrs =
insertNodeNgramsNgramsNew [ NodeNgramsNgrams lId ng1 ng2 (Just 1)
| (ng1, ng2) <- map (both _ngramsId) $ DM.toList ngrs
, ng1 /= ng2
]
------------------------------------------------------------------------ ------------------------------------------------------------------------
ngrams2list :: Map NgramsIndexed (Map NgramsType a) ngrams2list :: Map NgramsIndexed (Map NgramsType a)
-> [(ListType, (NgramsType, NgramsIndexed))] -> [(ListType, (NgramsType, NgramsIndexed))]
...@@ -388,17 +371,9 @@ ngrams2list' m = fromListWith (<>) ...@@ -388,17 +371,9 @@ ngrams2list' m = fromListWith (<>)
-- | TODO: weight of the list could be a probability
insertLists :: HasNodeError err
=> ListId
-> [(ListType, (NgramsType, NgramsIndexed))]
-> Cmd err Int
insertLists lId lngs = insertNodeNgrams [ NodeNgram lId (_ngramsId ng) Nothing (ngramsTypeId ngt) (fromIntegral $ listTypeId l) 1
| (l,(ngt, ng)) <- lngs
]
------------------------------------------------------------------------ ------------------------------------------------------------------------
{-
-- | Annuaire -- | Annuaire
flowAnnuaire :: FlowCmdM env ServantErr m => FilePath -> m () flowAnnuaire :: FlowCmdM env ServantErr m => FilePath -> m ()
...@@ -450,4 +425,4 @@ subFlowAnnuaire username _cName = do ...@@ -450,4 +425,4 @@ subFlowAnnuaire username _cName = do
(username, userId, rootId, corpusId) (username, userId, rootId, corpusId)
pure (userId, rootId, corpusId) pure (userId, rootId, corpusId)
-}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment