Commit 67532a54 authored by Grégoire Locqueville's avatar Grégoire Locqueville Committed by Grégoire Locqueville

Add commands for subcorpus and node copy

parent f14a73d6
......@@ -352,6 +352,7 @@ library
Gargantext.Core.Methods.Similarities.Accelerate.Distributional
Gargantext.Core.Methods.Similarities.Accelerate.SpeGen
Gargantext.Core.Statistics
Gargantext.Core.Text.Corpus
Gargantext.Core.Text.Corpus.API.Hal
Gargantext.Core.Text.Corpus.API.Isidore
Gargantext.Core.Text.Corpus.API.Istex
......
module Gargantext.Core.Text.Corpus (makeSubcorpusFromQuery, subcorpusEasy) where
import Control.Lens (view)
import Data.Set.Internal qualified as Set (singleton)
import Data.Text qualified as T
import Gargantext.API.Dev (runCmdReplEasy)
import Gargantext.API.Errors.Types (BackendInternalError(InternalNodeError))
import Gargantext.Core (Lang(EN))
import Gargantext.Core.NodeStory.Types (HasNodeStoryEnv)
import Gargantext.Core.NLP (HasNLPServer)
import Gargantext.Core.Text.Corpus.Query qualified as Q
import Gargantext.Core.Text.List.Social (FlowSocialListWith (..), FlowSocialListPriority (..))
import Gargantext.Core.Text.Ngrams (NgramsType (..))
import Gargantext.Core.Types.Individu (User (..))
import Gargantext.Core.Types.Main (ListType (..))
import Gargantext.Database.Action.Flow (buildSocialList, reIndexWith)
import Gargantext.Database.Action.Metrics (updateContextScore, updateNgramsOccurrences)
import Gargantext.Database.Action.Search (searchInCorpus)
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus, hc_lang)
import Gargantext.Database.Admin.Types.Node (CorpusId, NodeId(UnsafeMkNodeId), NodeType(..), nodeId2ContextId)
import Gargantext.Database.Prelude (DBCmd')
import Gargantext.Database.Query.Facet.Types (facetDoc_id)
import Gargantext.Database.Query.Table.Node (insertDefaultNode, copyNodeStories, defaultList, getNodeWithType)
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Document (add)
import Gargantext.Database.Query.Table.Node.Error (NodeError(NoCorpusFound))
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude
-- | A version of the below function for use in the REPL (so you don't need to
-- manually import tons of constructors etc.)
subcorpusEasy :: Text -- ^ Username
-> Int -- ^ Original corpus ID
-> Text -- ^ Search string
-> Bool -- ^ Whether to reuse the parent term list (True) or recompute one from scratch (False)
-> IO ()
subcorpusEasy username cId rawQuery reuseParentList = do
let eitherQuery = Q.parseQuery $ Q.RawQuery rawQuery
case eitherQuery of
Left msg -> print $ "Error parsing query \"" <> rawQuery <> "\": " <> T.pack msg
Right query -> void $ runCmdReplEasy $ makeSubcorpusFromQuery (UserName username) (UnsafeMkNodeId cId) query reuseParentList
-- | Given a "parent" corpus and a query, search for all docs in the parent
-- that match the query, and create a corpus from those. The created corpus
-- is inserted in the tree as a child of the parent corpus.
-- Creation of subcorpus "Docs" and "Terms" nodes is handled. The terms can be
-- either copied from the parent corpus or recomputed based on the subcorpus docs.
makeSubcorpusFromQuery :: ( HasNodeStoryEnv env
, HasNLPServer env
)
=> User -- ^ The corpus owner
-> CorpusId -- ^ ID of the parent corpus
-> Q.Query -- ^ The query to determine the subset of documents that will appear in the subcorpus
-> Bool -- ^ Whether to reuse parent term list (True) or compute a new one based only on the documents in the subcorpus (False)
-> DBCmd' env BackendInternalError CorpusId -- ^ The child corpus ID
makeSubcorpusFromQuery user supercorpusId query reuseParentList = do
userId <- getUserId user
-- Insert the required nodes:
-- 1. The subcorpus root (under the original corpus root)
subcorpusId <- insertDefaultNode NodeCorpus supercorpusId userId
-- 2. The context (aka "Docs", aka "Terms") node (under the subcorpus root)
_ <- insertDefaultNode NodeTexts subcorpusId userId
-- 3. The terms (aka "List") node
subListId <- insertDefaultNode NodeList subcorpusId userId
-- Get the ID of the original terms node
superListId <- defaultList supercorpusId
-- Get ahold of all contexts that match the query, and add them to the subcorpus
-- (note that contexts are attached to a *corpus* node, not a *docs* node,
-- notwithstanding what you might think from th UI)
facetDocs <- searchInCorpus supercorpusId False query Nothing Nothing Nothing
_ <- Document.add subcorpusId $ nodeId2ContextId . facetDoc_id <$> facetDocs
if reuseParentList
-- Either simply copy parent terms...
then void $ copyNodeStories superListId subListId
-- ... or rebuild a term list from scratch
-- TODO Check whether reusing the parent hyperdata is the right thing to do
else do
-- Get hyperdata from the original corpus
supercorpuses <- getNodeWithType supercorpusId NodeCorpus (Proxy :: Proxy HyperdataCorpus)
superHyperdata <- case supercorpuses of
[supercorpus] -> return $ view node_hyperdata supercorpus
_ -> throwError $ InternalNodeError NoCorpusFound
buildSocialList
(fromMaybe EN $ view hc_lang superHyperdata)
user
subcorpusId
subListId
(Just superHyperdata)
-- TODO Not completely sure what the following parameter is for
-- but I am guessing there should be a dialog to let the user decide
-- what it should be
(Just (FlowSocialListWithPriority MySelfFirst) :: Maybe FlowSocialListWith)
-- In both cases we'll need to reindex our terms list so it matches the contexts
-- in the newly created subcorpus
reIndexWith subcorpusId subListId NgramsTerms (Set.singleton MapTerm)
_ <- updateContextScore subcorpusId subListId
_ <- updateNgramsOccurrences subcorpusId subListId
return subcorpusId
......@@ -63,7 +63,8 @@ goodMapListSize :: Int
goodMapListSize = 350
-- | TODO improve grouping functions of Authors, Sources, Institutes..
-- | Consider using `buildSocialList` instead of this function.
-- TODO improve grouping functions of Authors, Sources, Institutes..
buildNgramsLists :: ( HasNodeStory env err m
, HasNLPServer env
, HasTreeError err
......
......@@ -35,6 +35,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
, flowCorpusUser
, flowAnnuaire
, insertMasterDocs
, buildSocialList
, saveDocNgramsWith
, addDocumentsToHyperCorpus
......
......@@ -13,7 +13,6 @@ Portability : POSIX
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TemplateHaskell #-}
......@@ -39,6 +38,10 @@ import Data.Text (pack, unpack)
import Data.Text qualified as T
import Data.Time (UTCTime)
import Data.TreeDiff
import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField)
import Database.PostgreSQL.Simple.ToField (ToField, toField, toJSONField)
import Database.PostgreSQL.Simple.FromRow (FromRow, fromRow, field)
import Database.PostgreSQL.Simple.ToRow (ToRow, toRow)
import Fmt ( Buildable(..) )
import Gargantext.Core (HasDBid(..))
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema)
......@@ -270,10 +273,13 @@ instance ToField NodeId where
toField (UnsafeMkNodeId n) = toField n
instance ToRow NodeId where
toRow (UnsafeMkNodeId i) = [toField i]
instance FromRow NodeId where
fromRow = UnsafeMkNodeId <$> field
instance FromField NodeId where
fromField field mdata = do
n <- UnsafeMkNodeId <$> fromField field mdata
fromField fld mdata = do
n <- UnsafeMkNodeId <$> fromField fld mdata
if isPositive n
then pure n
else mzero
......
......@@ -59,23 +59,31 @@ module Gargantext.Database.Query.Table.Node
, deleteNode
, deleteNodes
-- * Copying data
, copyNode
, copyNodeStories
) where
import Control.Arrow (returnA)
import Control.Lens (set, view)
import Data.Aeson ( encode, Value )
import Data.Bimap ((!>))
import Database.PostgreSQL.Simple qualified as PGS
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Gargantext.API.Errors.Types (BackendInternalError (..))
import Gargantext.Core ( HasDBid(toDBid) )
import Gargantext.Core.Types
import Gargantext.Core.Types.Query (Limit, Offset)
import Gargantext.Database.Admin.Config ( nodeTypes )
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataAnnuaire, HyperdataCorpus )
import Gargantext.Database.Admin.Types.Hyperdata.Default ( defaultHyperdata, DefaultHyperdata(..) )
import Gargantext.Database.Admin.Types.Hyperdata.Folder (HyperdataFolder)
import Gargantext.Database.Admin.Types.Hyperdata.List ( HyperdataList )
import Gargantext.Database.Admin.Types.Hyperdata.Prelude ( Hyperdata )
import Gargantext.Database.Prelude (DBCmd, JSONB, mkCmd, runPGSQuery, runOpaQuery)
import Gargantext.Database.Prelude (DBCmd, JSONB, mkCmd, execPGSQuery, runPGSQuery, runOpaQuery)
import Gargantext.Database.Query.Filter (limit', offset')
import Gargantext.Database.Query.Table.Node.Children (getChildrenById)
import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Schema.Node
import Gargantext.Prelude hiding (sum, head)
......@@ -468,3 +476,69 @@ isUserNode userNodeId = (== [PGS.Only True])
WHERE n.id = ? AND n.typename = ? AND n.parent_id = NULL
)
|] (userNodeId, toDBid NodeUser)
-- | Copy a node somewhere else in the tree
copyNode :: Bool -- ^ Whether to copy whole subtree (`True`) or just the node (`False`)
-> Bool -- ^ Whether to deal with ngrams and contexts (`True`) or just the data in the `nodes` table (`False`)
-> NodeId -- ^ ID of the node to be copied
-> NodeId -- ^ ID of the node which will become the parent of the copied node
-> DBCmd BackendInternalError NodeId -- ^ ID of the copied node
copyNode copySubtree smart idToCopy newParentId = if copySubtree
-- Recursive copy:
then do
-- Non-recursively copy the node itself, then recursively copy its children:
copiedNode <- copyNode False smart idToCopy newParentId
children <- getChildrenById idToCopy
for_ children $ \child -> copyNode True smart child copiedNode
return copiedNode
-- Single-node (non-recursive) copy:
else do
newNodes <- runPGSQuery
-- Copy node. Should return exactly one ID, that of the new node:
[sql|
INSERT INTO public.nodes (typename, user_id, parent_id, name, date, hyperdata)
SELECT typename, user_id, ?, name, date, hyperdata FROM public.nodes WHERE id = ?
RETURNING id;
|] (newParentId, idToCopy)
case newNodes of -- Check that we got exactly one node back
[copiedNode] -> do
-- Copy node stories/contexts if applicable
when smart $ do
nodeToCopy <- getNode idToCopy
case nodeTypes !> view node_typename nodeToCopy of
NodeList -> copyNodeStories idToCopy copiedNode
-- Contexts are attached to a corpus node, not to the docs node:
NodeCorpus -> copyNodeContexts idToCopy copiedNode
_ -> return ()
return copiedNode
_ -> throwError $ InternalUnexpectedError $ SomeException $ PatternMatchFail $
"SQL insert returned zero or more than one node"
-- | Given two IDs of terms nodes, copies the node stories of the first into
-- node stories of the second. This effectively copies the terms from one terms
-- node to another.
-- TODO add a check that we are looking at the right type of node?
copyNodeStories :: NodeId -- ^ The ID of the node whose stories are to be copied
-> NodeId -- ^ The ID of the node under which to copy the stories
-> DBCmd BackendInternalError ()
copyNodeStories oldNodeId newNodeId = void $ execPGSQuery
[sql|
INSERT INTO node_stories
(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element)
SELECT ?, version, ngrams_type_id, ngrams_id, ngrams_repo_element
FROM node_stories
WHERE node_id = ?;
|] (newNodeId, oldNodeId)
-- | Given two IDs of Docs nodes, add to the second the contexts associated to
-- the first. Functionally, this copies the contexts from the first node to the
-- second, although the contexts are not technically duplicated in the database.
copyNodeContexts :: NodeId -- ^ The ID of the node whose contexts are to be "copied"
-> NodeId -- ^ The ID of the node under which to "copy" the contexts
-> DBCmd BackendInternalError ()
copyNodeContexts oldNodeId newNodeId = void $ execPGSQuery
[sql|
INSERT INTO node_contexts (node_id, context_id, score, category)
SELECT ?, context_id, score, category FROM node_stories WHERE node_id = ?
|] (newNodeId, oldNodeId)
......@@ -8,18 +8,22 @@ Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE Arrows #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Query.Table.Node.Children
where
import Control.Arrow (returnA)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Gargantext.Core ( HasDBid(toDBid) )
import Gargantext.Core.Types
import Gargantext.Core.Types.Query (Limit, Offset)
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument )
import Gargantext.Database.Prelude (DBCmd, JSONB, runCountOpaQuery, runOpaQuery)
import Gargantext.Database.Prelude (DBCmd, JSONB, runCountOpaQuery, runOpaQuery, runPGSQuery)
import Gargantext.Database.Query.Filter ( limit', offset' )
import Gargantext.Database.Query.Table.NodeContext ( NodeContextPoly(NodeContext), queryNodeContextTable )
import Gargantext.Database.Schema.Context
......@@ -58,6 +62,14 @@ getChildren pId p t@(Just NodeContact ) maybeOffset maybeLimit = getChildrenCont
getChildren a b c d e = getChildrenNode a b c d e
-- | Get the list of (IDs of) children of a given node (ID)
getChildrenById :: NodeId -- ^ ID of the parent node
-> DBCmd err [NodeId] -- ^ List of IDs of the children nodes
getChildrenById parentId = runPGSQuery
[sql| SELECT id FROM public.nodes WHERE parent_id = ?; |]
parentId
getChildrenNode :: (JSONB a, HasDBid NodeType)
=> ParentId
-> proxy a
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment