Commit df88acde authored by Grégoire Locqueville's avatar Grégoire Locqueville

Cleaned up, reorganized

parent 589c5aa4
module Gargantext.Database.Action.Corpus where
import Data.Set.Internal qualified as Set
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types (BackendInternalError)
import Gargantext.Core.NodeStory.Types (HasNodeStoryEnv)
import Gargantext.Core.NLP (HasNLPServer)
import Gargantext.Core.Text.Corpus.Query qualified as Q
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group.WithStem (GroupParams (..))
import Gargantext.Core.Text.List.Social (FlowSocialListWith (..), FlowSocialListPriority (..))
import Gargantext.Core.Text.Ngrams (NgramsType (..))
import Gargantext.Core.Types.Individu (User (..))
import Gargantext.Core.Types.Main (ListType (..))
import Gargantext.Database.Action.Flow (getOrMkRootWithCorpus, reIndexWith)
import Gargantext.Database.Action.Flow.List (flowList_DbRepo)
import Gargantext.Database.Action.Search (searchInCorpus)
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Node (CorpusId, nodeId2ContextId, NodeType (..))
import Gargantext.Database.Prelude (DBCmd')
import Gargantext.Database.Query.Facet.Types (facetDoc_id)
import Gargantext.Database.Query.Table.Node (insertDefaultNode, copyNode)
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Document (add)
import Gargantext.Database.Query.Tree.Root (MkCorpusUser (..))
import Gargantext.Prelude
-- | Given a "parent" corpus and a query, search for all docs in the parent
-- that match the query, and create a corpus from those. The created corpus
-- is inserted in the tree as a child of the parent corpus.
-- Creation of subcorpus "Docs" and "Terms" nodes is handled. The terms can be
-- either copied from the parent corpus or recomputed based on the subcorpus docs.
makeSubcorpusFromQuery :: ( HasSettings env
, HasNodeStoryEnv env
, HasNLPServer env
)
=> User -- ^ The corpus owner
-> CorpusId -- ^ ID of the parent corpus
-> Q.Query -- ^ The query to determine the subset of documents that will appear in the subcorpus
-> Bool -- ^ Whether to reuse parent term list (True) or compute a new one based only on the documents in the subcorpus (False)
-> DBCmd' env BackendInternalError CorpusId -- ^ The child corpus ID
makeSubcorpusFromQuery user parentId query reuseParentList = do
userId <- getUserId user
subcorpusId <- insertDefaultNode NodeCorpus parentId userId
(_, _, masterCorpusId) <- getOrMkRootWithCorpus MkCorpusUserMaster (Nothing :: Maybe HyperdataCorpus)
-- Get ahold of all documents that match the query
facetDocs <- searchInCorpus parentId False query Nothing Nothing Nothing
-- Create a subcorpus node with all the documents
_ <- Document.add subcorpusId $ nodeId2ContextId . facetDoc_id <$> facetDocs
-- Create nodes for docs and terms as children of the subcorpus
void $ insertDefaultNode NodeTexts subcorpusId userId
listId <- insertDefaultNode NodeList subcorpusId userId
-- Either simply copy parent terms... (TODO)
if reuseParentList then void $ copyNode True _parentList subcorpusId
-- ... or rebuild a term list from scratch
else do
ngrams <- buildNgramsLists
user
subcorpusId
masterCorpusId
(Just (FlowSocialListWithPriority MySelfFirst) :: Maybe FlowSocialListWith)
GroupIdentity
-- Save computed list
_ <- flowList_DbRepo listId ngrams
return ()
-- Reindex
_ <- reIndexWith subcorpusId listId NgramsTerms (Set.singleton MapTerm)
-- The following two lines (like the one just above) are copypasted from
-- the definition of flowCorpusUser, but I'm not sure whether they should be included
-- _ <- updateContextScore userCorpusId listId
-- _ <- updateNgramsOccurrences userCorpusId listId
return subcorpusId
-- TODO
-- Permettre de (mettre les 2 en option) :
-- [X] 1a. relancer le buildNgramsLists (cf. module Flow)
-- [ ] 1b. ou copier la liste (WIP)
-- [ ] Vérifier l'étape de réindexation reIndexWith
-- [ ] 2. générer un graphe
...@@ -25,6 +25,7 @@ import Control.Lens (set, view) ...@@ -25,6 +25,7 @@ import Control.Lens (set, view)
import Data.Aeson ( encode, Value ) import Data.Aeson ( encode, Value )
import Database.PostgreSQL.Simple qualified as PGS import Database.PostgreSQL.Simple qualified as PGS
import Database.PostgreSQL.Simple.SqlQQ (sql) import Database.PostgreSQL.Simple.SqlQQ (sql)
import Gargantext.API.Errors.Types (BackendInternalError (..))
import Gargantext.Core ( HasDBid(toDBid) ) import Gargantext.Core ( HasDBid(toDBid) )
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Core.Types.Query (Limit, Offset) import Gargantext.Core.Types.Query (Limit, Offset)
...@@ -37,6 +38,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Prelude ( Hyperdata ) ...@@ -37,6 +38,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Prelude ( Hyperdata )
import Gargantext.Database.Admin.Types.Hyperdata.Default ( defaultHyperdata, DefaultHyperdata(..) ) import Gargantext.Database.Admin.Types.Hyperdata.Default ( defaultHyperdata, DefaultHyperdata(..) )
import Gargantext.Database.Prelude (DBCmd, JSONB, mkCmd, runPGSQuery, runOpaQuery) import Gargantext.Database.Prelude (DBCmd, JSONB, mkCmd, runPGSQuery, runOpaQuery)
import Gargantext.Database.Query.Filter (limit', offset') import Gargantext.Database.Query.Filter (limit', offset')
import Gargantext.Database.Query.Table.Node.Children (getChildrenById)
import Gargantext.Database.Query.Table.Node.Error import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Schema.Node import Gargantext.Database.Schema.Node
import Gargantext.Prelude hiding (sum, head) import Gargantext.Prelude hiding (sum, head)
...@@ -431,82 +433,27 @@ defaultListMaybe cId = headMay <$> map (view node_id ) <$> getListsWithParentId ...@@ -431,82 +433,27 @@ defaultListMaybe cId = headMay <$> map (view node_id ) <$> getListsWithParentId
getListsWithParentId :: HasDBid NodeType => NodeId -> DBCmd err [Node HyperdataList] getListsWithParentId :: HasDBid NodeType => NodeId -> DBCmd err [Node HyperdataList]
getListsWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeList) getListsWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeList)
------------------------------------------------------------------------
-- INSERT INTO public.nodes (hash_id, typename, user_id, parent_id, name, date, hyperdata)
-- SELECT 'tutu', typename, user_id, 97, name, date, hyperdata FROM public.nodes WHERE id = 165;
copyNodeSingle :: NodeId -> NodeId -> DBCmd err NodeId -- | Copy a node somewhere else in the tree
copyNodeSingle idToCopy newParentId = do copyNode :: Bool -- ^ Whether to copy whole subtree (True) or just the node (False)
-> NodeId -- ^ ID of the node to be copied
-> NodeId -- ^ ID of the node which will become the parent of the copied node
-> DBCmd BackendInternalError NodeId -- ^ ID of the copied node
copyNode copySubtree idToCopy newParentId = if copySubtree
then do
copiedNode <- copyNode False idToCopy newParentId
children <- getChildrenById idToCopy
for_ children $ \child -> copyNode True child copiedNode
return copiedNode
else do
newNodes <- runPGSQuery newNodes <- runPGSQuery
-- Copy node. Should return exactly one ID, that of the new node:
[sql| [sql|
INSERT INTO public.nodes (typename, user_id, parent_id, name, date, hyperdata) INSERT INTO public.nodes (typename, user_id, parent_id, name, date, hyperdata)
SELECT typename, user_id, ?, name, date, hyperdata FROM public.nodes WHERE id = ? SELECT typename, user_id, ?, name, date, hyperdata FROM public.nodes WHERE id = ?
RETURNING id; RETURNING id;
|] (newParentId, idToCopy) |] (newParentId, idToCopy)
case newNodes of case newNodes of
[newNode] -> return newNode [copiedNode] -> return copiedNode
_ -> panicTrace "Error" -- TODO specify error _ -> throwError $ InternalUnexpectedError $ SomeException $ PatternMatchFail $
"SQL insert returned zero or more than one node"
-- TODO Enforce a maximal depth level?
-- TODO Use SQL builtin recursivity?
copyNodeRecursive :: NodeId -> NodeId -> DBCmd err NodeId
copyNodeRecursive idToCopy newParentId = do
copiedNode <- copyNodeSingle idToCopy newParentId
children <- getChildren' idToCopy
for_ children $ \child -> copyNodeRecursive child copiedNode
return copiedNode
-- TODO delete this and replace calls to it by calls to getChildren
getChildren' :: NodeId -> DBCmd err [NodeId]
getChildren' nodeId = runPGSQuery
[sql|
SELECT id FROM public.nodes WHERE parent_id = ?;
|]
nodeId
-- INSERT INTO public.nodes (typename, user_id, parent_id, name, date, hyperdata)
-- SELECT typename, user_id, 137, name, date, hyperdata FROM public.nodes WHERE id = 165
-- RETURNING id;
--
-- SELECT id FROM public.nodes WHERE parent_id = 137;
-- digest(CONCAT(?, NEW.typename, NEW.name, NEW.id, NEW.hyperdata), 'sha256')
-- copyNode :: (HasNodeError err) => NodeId -> DBCmd err Int64
-- copyNode nodeIdToCopy = mkCmd $ \connection -> proc
-- runSelect
-- TODO
-- [ ] Performer la substitution
-- [ ] Gérer le hash_id
-- nodeToCopy <- getNode nodeIdToCopy constant
-- _ -- return nodeToCopy
-- where
-- valueToHyperdata v = case fromJSON v of
-- Success a -> pure a
-- Error _err -> returnError ConversionFailed field
-- $ DL.unwords [ "cannot parse hyperdata for JSON: "
-- , show v
-- ]
-- nodeExists :: (HasNodeError err) => NodeId -> DBCmd err Bool
-- nodeExists nId = (== [PGS.Only True])
-- <$> runPGSQuery [sql|SELECT true FROM nodes WHERE id = ? |] (PGS.Only nId)
--
-- getNode :: HasNodeError err => NodeId -> DBCmd err (Node Value)
-- getNode nId = do
-- maybeNode <- headMay <$> runOpaQuery (selectNode (pgNodeId nId))
-- case maybeNode of
-- Nothing -> nodeError (DoesNotExist nId)
-- Just r -> pure r
--
-- getNodeWith :: (HasNodeError err, JSONB a)
-- => NodeId -> proxy a -> DBCmd err (Node a)
-- getNodeWith nId _ = do
-- maybeNode <- headMay <$> runOpaQuery (selectNode (pgNodeId nId))
-- case maybeNode of
-- Nothing -> nodeError (DoesNotExist nId)
-- Just r -> pure r
...@@ -11,6 +11,7 @@ Portability : POSIX ...@@ -11,6 +11,7 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-} {-# LANGUAGE Arrows #-}
{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Query.Table.Node.Children module Gargantext.Database.Query.Table.Node.Children
where where
...@@ -21,7 +22,8 @@ import Gargantext.Core.Types ...@@ -21,7 +22,8 @@ import Gargantext.Core.Types
import Gargantext.Core.Types.Query (Limit, Offset) import Gargantext.Core.Types.Query (Limit, Offset)
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact ) import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument ) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument )
import Gargantext.Database.Prelude (DBCmd, JSONB, runCountOpaQuery, runOpaQuery) import Database.PostgreSQL.Simple.SqlQQ (sql)
import Gargantext.Database.Prelude (DBCmd, JSONB, runCountOpaQuery, runOpaQuery, runPGSQuery)
import Gargantext.Database.Query.Filter ( limit', offset' ) import Gargantext.Database.Query.Filter ( limit', offset' )
import Gargantext.Database.Query.Table.NodeContext ( NodeContextPoly(NodeContext), queryNodeContextTable ) import Gargantext.Database.Query.Table.NodeContext ( NodeContextPoly(NodeContext), queryNodeContextTable )
import Gargantext.Database.Schema.Context import Gargantext.Database.Schema.Context
...@@ -60,6 +62,14 @@ getChildren pId p t@(Just NodeContact ) maybeOffset maybeLimit = getChildrenCont ...@@ -60,6 +62,14 @@ getChildren pId p t@(Just NodeContact ) maybeOffset maybeLimit = getChildrenCont
getChildren a b c d e = getChildrenNode a b c d e getChildren a b c d e = getChildrenNode a b c d e
-- | Get the list of (IDs of) children of a given node (ID)
getChildrenById :: NodeId -- ^ ID of the parent node
-> DBCmd err [NodeId] -- ^ List of IDs of the children nodes
getChildrenById parentId = runPGSQuery
[sql| SELECT id FROM public.nodes WHERE parent_id = ?; |]
parentId
getChildrenNode :: (JSONB a, HasDBid NodeType) getChildrenNode :: (JSONB a, HasDBid NodeType)
=> ParentId => ParentId
-> proxy a -> proxy a
......
module REPL where module REPL where
import Gargantext.Prelude import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError) import Gargantext.API.Dev (runCmdReplEasy)
import Gargantext.Core.NodeStory.Types (HasNodeStory)
import Gargantext.API.Dev (runCmdReplBackendErr)
import Gargantext.API.Errors.Types (BackendInternalError) import Gargantext.API.Errors.Types (BackendInternalError)
import Gargantext.Core.NLP (HasNLPServer)
import Gargantext.Core.NodeStory.Types (HasNodeStory)
import Gargantext.Core.Text.Corpus.Query qualified as Q
import Gargantext.Core.Types.Individu (User (..)) import Gargantext.Core.Types.Individu (User (..))
import qualified Gargantext.Database.Query.Table.Node.Document.Add as Document (add) import Gargantext.Database.Action.Corpus (makeSubcorpusFromQuery)
import Gargantext.Database.Admin.Types.Node (CorpusId)
import Gargantext.Database.Prelude (DbCmd') import Gargantext.Database.Prelude (DbCmd')
import Gargantext.Database.Admin.Types.Node (CorpusId, nodeId2ContextId, NodeType (..))
import Gargantext.Database.Action.Search (searchInCorpus)
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Query.Table.Node (insertDefaultNode)
import Gargantext.Database.Query.Facet.Types (facetDoc_id)
import qualified Gargantext.Core.Text.Corpus.Query as Q
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.NLP (HasNLPServer)
import Gargantext.Database.Query.Tree.Error (HasTreeError) import Gargantext.Database.Query.Tree.Error (HasTreeError)
import Gargantext.Core.Text.List.Group.WithStem (GroupParams (..)) import Gargantext.Prelude
import Gargantext.Database.Action.Flow (getOrMkRootWithCorpus)
import Gargantext.Database.Query.Tree.Root (MkCorpusUser (..))
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus)
import Gargantext.Database.Action.Flow.List (flowList_DbRepo)
import Gargantext.Core.Types (HasValidationError)
import Gargantext.Core.Text.List.Social (FlowSocialListWith (..), FlowSocialListPriority (..))
execText :: IO (Maybe CorpusId) execText :: IO (Maybe CorpusId)
execText = runCmdReplBackendErr $ testSubcorpusFunction "user1" 133 "information" False execText = runCmdReplEasy $ testSubcorpusFunction "user1" 133 "information" False
testSubcorpusFunction :: testSubcorpusFunction ::
forall env m. ( DbCmd' env BackendInternalError m forall env m. ( DbCmd' env BackendInternalError m
...@@ -51,63 +38,3 @@ testSubcorpusFunction username parentId queryText reuseParentList = ...@@ -51,63 +38,3 @@ testSubcorpusFunction username parentId queryText reuseParentList =
Left _ -> return Nothing -- putStrLn ("Error parsing query " <> queryText) >> return Nothing -- TODO emit an actual error Left _ -> return Nothing -- putStrLn ("Error parsing query " <> queryText) >> return Nothing -- TODO emit an actual error
Right query -> Just <$> makeSubcorpusFromQuery (UserName username) parentId query reuseParentList Right query -> Just <$> makeSubcorpusFromQuery (UserName username) parentId query reuseParentList
-- | (WIP) Given a "parent" corpus and a query, search for all docs in the parent
-- that match the query, and create a corpus from those. The created corpus
-- is inserted in the tree as a child of the parent corpus.
-- Creation of subcorpus "Docs" and "Terms" nodes is handled. The terms can be
-- either copied from the parent corpus or recomputed based on the subcorpus docs.
makeSubcorpusFromQuery ::
forall env err m. ( DbCmd' env err m
, HasValidationError err
, HasNodeError err
, HasNodeStory env err m
, HasNLPServer env
, HasTreeError err
, HasSettings env
)
=> User -- ^ The corpus owner
-> CorpusId -- ^ ID of the parent corpus
-> Q.Query -- ^ The query to determine the subset of documents that will appear in the subcorpus
-> Bool -- ^ Whether to reuse parent term list (True) or compute a new one based only on the documents in the subcorpus (False)
-> m CorpusId -- ^ The child corpus ID
makeSubcorpusFromQuery user parentId query reuseParentList = do
userId <- getUserId user
subcorpusId <- insertDefaultNode NodeCorpus parentId userId
(_, _, masterCorpusId) <- getOrMkRootWithCorpus MkCorpusUserMaster (Nothing :: Maybe HyperdataCorpus)
-- Get ahold of all documents that match the query
facetDocs <- searchInCorpus parentId False query Nothing Nothing Nothing
-- Create a subcorpus node with all the documents
_ <- Document.add subcorpusId $ nodeId2ContextId . facetDoc_id <$> facetDocs
-- Create nodes for docs and terms as children of the subcorpus
void $ insertDefaultNode NodeTexts subcorpusId userId
listId <- insertDefaultNode NodeList subcorpusId userId
-- Either simply copy parent terms... (TODO)
if reuseParentList then return ()
-- ... or rebuild a term list from scratch
else do
ngrams <- buildNgramsLists
user
subcorpusId
masterCorpusId
(Just (FlowSocialListWithPriority MySelfFirst) :: Maybe FlowSocialListWith)
GroupIdentity
-- Save computed list
_ <- flowList_DbRepo listId ngrams
return ()
-- Reindex
reIndexWith subcorpusId listId NgramsType
-- CorpusId
-- ListId
-- NgramsType
-- Set ListType
-- m ()
return subcorpusId
-- TODO
-- Permettre de (mettre les 2 en option) :
-- [X] 1a. relancer le buildNgramsLists (cf. module Flow)
-- [ ] 1b. ou copier la liste
-- Ajouter l'étape de réindexation reIndexWith
-- [ ] 2. générer un graphe
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment