Commit cbae6ae4 authored by Grégoire Locqueville's avatar Grégoire Locqueville

Trying to recompute terms for the subcorpus

parent 13359943
Pipeline #6439 failed with stages
......@@ -101,6 +101,7 @@ library
import:
defaults
exposed-modules:
REPL
Gargantext
Gargantext.API
Gargantext.API.Admin.Auth.Types
......
......@@ -69,6 +69,9 @@ runCmdRepl f = withDevEnv defaultIniFile defaultSettingsFile $ \env -> runCmdDev
runCmdReplServantErr :: Cmd'' DevEnv ServerError a -> IO a
runCmdReplServantErr = runCmdRepl
runCmdReplBackendErr :: Cmd'' DevEnv BackendInternalError a -> IO a
runCmdReplBackendErr = runCmdRepl
-- In particular this writes the repo file after running
-- the command.
-- This function is constrained to the DevEnv rather than
......
......@@ -7,6 +7,9 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
See the whole flow graphically at:
https://dl.gargantext.org/workflow.svg
-- TODO-ACCESS:
-- check userId CanFillUserCorpus userCorpusId
-- check masterUserId CanFillMasterCorpus masterCorpusId
......@@ -37,6 +40,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
, insertMasterDocs
, saveDocNgramsWith
, addDocumentsToHyperCorpus
, configureNodes
, reIndexWith
......@@ -177,7 +181,7 @@ flowDataText :: forall env err m.
-> m CorpusId
flowDataText u (DataOld ids) tt cid mfslw _ = do
$(logLocM) DEBUG $ T.pack $ "Found " <> show (length ids) <> " old node IDs"
(_userId, userCorpusId, listId) <- createNodes (MkCorpusUserNormalCorpusIds u [cid]) corpusType
(_userId, userCorpusId, listId) <- configureNodes (MkCorpusUserNormalCorpusIds u [cid]) corpusType
_ <- Doc.add userCorpusId (map nodeId2ContextId ids)
flowCorpusUser (_tt_lang tt) u userCorpusId listId corpusType mfslw
where
......@@ -278,7 +282,7 @@ flow :: forall env err m a c.
-> JobHandle m
-> m CorpusId
flow c mkCorpusUser la mfslw (count, docsC) jobHandle = do
(_userId, userCorpusId, listId) <- createNodes mkCorpusUser c
(_userId, userCorpusId, listId) <- configureNodes mkCorpusUser c
-- TODO if public insertMasterDocs else insertUserDocs
nlpServer <- view $ nlpServerGet (_tt_lang la)
runConduit $ zipSources (yieldMany ([1..] :: [Int])) docsC
......@@ -319,13 +323,13 @@ addDocumentsToHyperCorpus ncs mb_hyper la corpusId docs = do
pure ids
------------------------------------------------------------------------
createNodes :: ( DbCmd' env err m, HasNodeError err, HasSettings env
configureNodes :: ( DbCmd' env err m, HasNodeError err, HasSettings env
, MkCorpus c
)
=> MkCorpusUser
-> Maybe c
-> m (UserId, CorpusId, ListId)
createNodes mkCorpusUser ctype = do
configureNodes mkCorpusUser ctype = do
-- User Flow
(userId, _rootId, userCorpusId) <- getOrMkRootWithCorpus mkCorpusUser ctype
-- NodeTexts is first
......
{-# LANGUAGE AllowAmbiguousTypes #-}
module REPL where
import Gargantext.Prelude
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Core.NodeStory.Types (HasNodeStory)
import Gargantext.API.Dev (runCmdReplBackendErr)
import Gargantext.API.Errors.Types (BackendInternalError)
import Gargantext.Core.Types.Individu (User (..))
import qualified Gargantext.Database.Query.Table.Node.Document.Add as Document (add)
import Gargantext.Database.Prelude (DbCmd')
import Gargantext.Database.Admin.Types.Node (CorpusId, nodeId2ContextId, NodeType (..))
import Gargantext.Database.Action.Search (searchInCorpus)
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Query.Table.Node (insertDefaultNode)
import Gargantext.Database.Query.Facet.Types (facetDoc_id)
import qualified Gargantext.Core.Text.Corpus.Query as Q
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.NLP (HasNLPServer)
import Gargantext.Database.Query.Tree.Error (HasTreeError)
import Gargantext.Core.Text.List.Group.WithStem (GroupParams (..))
import Gargantext.Database.Action.Flow (getOrMkRootWithCorpus)
import Gargantext.Database.Query.Tree.Root (MkCorpusUser (..))
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus)
execText :: IO (Maybe CorpusId)
execText = runCmdReplBackendErr $ testSubcorpusFunction "user1" 133 "information" False
testSubcorpusFunction ::
forall env m. ( DbCmd' env BackendInternalError m
, HasNodeStory env BackendInternalError m
, HasNLPServer env
, HasNLPServer env
, HasTreeError BackendInternalError
, HasSettings env
)
=> Text -- ^ Username
-> CorpusId -- ^ Parent corpus ID
-> Text -- ^ The query, in text form
-> Bool -- ^ Whether to reuse parent term list (True) or compute a new one
-- based only on the documents in the subcorpus (False)
-> m (Maybe CorpusId)
testSubcorpusFunction username parentId queryText reuseParentList =
let eitherQuery = Q.parseQuery . Q.RawQuery $ queryText in
case eitherQuery of
Left _ -> return Nothing -- putStrLn ("Error parsing query " <> queryText) >> return Nothing -- TODO emit an actual error
Right query -> Just <$> makeSubcorpusFromQuery (UserName username) parentId query reuseParentList
-- | (WIP) Given a "parent" corpus and a query, search for all docs in the parent
-- that match the query, and create a corpus from those. The created corpus
-- is inserted in the tree as a child of the parent corpus.
-- Creation of subcorpus "Docs" and "Terms" nodes is handled. The terms can be
-- either copied from the parent corpus or recomputed based on the subcorpus docs.
makeSubcorpusFromQuery ::
forall env err m. ( DbCmd' env err m
, HasNodeError err
, HasNodeStory env err m
, HasNLPServer env
, HasTreeError err
, HasSettings env
)
=> User -- ^ The corpus owner
-> CorpusId -- ^ ID of the parent corpus
-> Q.Query -- ^ The query to determine the subset of documents that will appear in the subcorpus
-> Bool -- ^ Whether to reuse parent term list (True) or compute a new one
-- based only on the documents in the subcorpus (False)
-> m CorpusId -- ^ The child corpus ID
makeSubcorpusFromQuery user parentId query reuseParentList = do
userId <- getUserId user
subcorpusId <- insertDefaultNode NodeCorpus parentId userId
(_, _, masterCorpusId) <- getOrMkRootWithCorpus MkCorpusUserMaster (Nothing :: Maybe HyperdataCorpus)
-- Get ahold of all documents that match the query
facetDocs <- searchInCorpus parentId False query Nothing Nothing Nothing
-- Create subcorpus node with all the documents
_ <- Document.add subcorpusId $ nodeId2ContextId . facetDoc_id <$> facetDocs
-- Create nodes for docs and terms as children of the subcorpus
void $ insertDefaultNode NodeTexts subcorpusId userId
void $ insertDefaultNode NodeList subcorpusId userId
-- Either simply copy parent terms... (TODO)
if reuseParentList then return ()
-- ... or rebuild a term list from scratch
else void $ buildNgramsLists user subcorpusId masterCorpusId Nothing GroupIdentity
return subcorpusId
-- TODO
-- Permettre de (mettre les 2 en option) :
-- [X] 1a. relancer le buildNgramsLists (cf. module Flow)
-- [ ] 1b. ou copier la liste
-- [ ] 2. générer un graphe
-- Problèmes :
-- * Permission error quand j'essaie d'avoir l'ID du master corpus.
-- Je pense que getOrMkRootWithCorpus n'est pas la bonne fonction à utiliser
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment