Commit 420bad76 authored by Grégoire Locqueville's avatar Grégoire Locqueville

Merge branch 'tmp-subcorpus' into subcorpus

parents e26da1cb 2b4c4297
Pipeline #6499 failed with stages
...@@ -178,6 +178,7 @@ library ...@@ -178,6 +178,7 @@ library
Gargantext.Core.NodeStory.Types Gargantext.Core.NodeStory.Types
Gargantext.Core.Text Gargantext.Core.Text
Gargantext.Core.Text.Context Gargantext.Core.Text.Context
Gargantext.Core.Text.Corpus
Gargantext.Core.Text.Corpus.API Gargantext.Core.Text.Corpus.API
Gargantext.Core.Text.Corpus.API.Arxiv Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.EPO Gargantext.Core.Text.Corpus.API.EPO
......
module Gargantext.Core.Text.Corpus (makeSubcorpusFromQuery) where
import Data.Set.Internal qualified as Set
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types (BackendInternalError)
import Gargantext.Core.NodeStory.Types (HasNodeStoryEnv)
import Gargantext.Core.NLP (HasNLPServer)
import Gargantext.Core.Text.Corpus.Query qualified as Q
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group.WithStem (GroupParams (..))
import Gargantext.Core.Text.List.Social (FlowSocialListWith (..), FlowSocialListPriority (..))
import Gargantext.Core.Text.Ngrams (NgramsType (..))
import Gargantext.Core.Types.Individu (User (..))
import Gargantext.Core.Types.Main (ListType (..))
import Gargantext.Database.Action.Flow (getOrMkRootWithCorpus, reIndexWith)
import Gargantext.Database.Action.Flow.List (flowList_DbRepo)
import Gargantext.Database.Action.Search (searchInCorpus)
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Node (CorpusId, nodeId2ContextId, NodeType (..))
import Gargantext.Database.Prelude (DBCmd')
import Gargantext.Database.Query.Facet.Types (facetDoc_id)
import Gargantext.Database.Query.Table.Node (insertDefaultNode, copyNode)
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Document (add)
import Gargantext.Database.Query.Tree.Root (MkCorpusUser (..))
import Gargantext.Prelude
-- | Given a "parent" corpus and a query, search for all docs in the parent
-- that match the query, and create a corpus from those. The created corpus
-- is inserted in the tree as a child of the parent corpus.
-- Creation of subcorpus "Docs" and "Terms" nodes is handled. The terms can be
-- either copied from the parent corpus or recomputed based on the subcorpus docs.
makeSubcorpusFromQuery :: ( HasSettings env
, HasNodeStoryEnv env
, HasNLPServer env
)
=> User -- ^ The corpus owner
-> CorpusId -- ^ ID of the parent corpus
-> Q.Query -- ^ The query to determine the subset of documents that will appear in the subcorpus
-> Bool -- ^ Whether to reuse parent term list (True) or compute a new one based only on the documents in the subcorpus (False)
-> DBCmd' env BackendInternalError CorpusId -- ^ The child corpus ID
makeSubcorpusFromQuery user parentId query reuseParentList = do
userId <- getUserId user
subcorpusId <- insertDefaultNode NodeCorpus parentId userId
(_, _, masterCorpusId) <- getOrMkRootWithCorpus MkCorpusUserMaster (Nothing :: Maybe HyperdataCorpus)
-- Get ahold of all documents that match the query
facetDocs <- searchInCorpus parentId False query Nothing Nothing Nothing
-- Create a subcorpus node with all the documents
_ <- Document.add subcorpusId $ nodeId2ContextId . facetDoc_id <$> facetDocs
-- Create nodes for docs and terms as children of the subcorpus
void $ insertDefaultNode NodeTexts subcorpusId userId
listId <- insertDefaultNode NodeList subcorpusId userId
-- Either simply copy parent terms...
-- (TODO this does not keep subterms)
if reuseParentList then void $ copyNode True listId subcorpusId
-- ... or rebuild a term list from scratch
else do
ngrams <- buildNgramsLists
user
subcorpusId
masterCorpusId
(Just (FlowSocialListWithPriority MySelfFirst) :: Maybe FlowSocialListWith)
GroupIdentity
-- Save computed list
_ <- flowList_DbRepo listId ngrams
return ()
-- Reindex
_ <- reIndexWith subcorpusId listId NgramsTerms (Set.singleton MapTerm)
-- The following two lines (like the one just above) are copypasted from
-- the definition of flowCorpusUser, but I'm not sure whether they should be included
-- _ <- updateContextScore userCorpusId listId
-- _ <- updateNgramsOccurrences userCorpusId listId
return subcorpusId
...@@ -37,6 +37,7 @@ import Data.TreeDiff ...@@ -37,6 +37,7 @@ import Data.TreeDiff
import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField) import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField)
import Database.PostgreSQL.Simple.ToField (ToField, toField, toJSONField) import Database.PostgreSQL.Simple.ToField (ToField, toField, toJSONField)
import Database.PostgreSQL.Simple.ToRow (ToRow, toRow) import Database.PostgreSQL.Simple.ToRow (ToRow, toRow)
import Database.PostgreSQL.Simple.FromRow (FromRow, fromRow, field)
import Fmt ( Buildable(..) ) import Fmt ( Buildable(..) )
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Schema.Context import Gargantext.Database.Schema.Context
...@@ -267,10 +268,13 @@ instance ToField NodeId where ...@@ -267,10 +268,13 @@ instance ToField NodeId where
toField (UnsafeMkNodeId n) = toField n toField (UnsafeMkNodeId n) = toField n
instance ToRow NodeId where instance ToRow NodeId where
toRow (UnsafeMkNodeId i) = [toField i] toRow (UnsafeMkNodeId i) = [toField i]
instance FromRow NodeId where
fromRow = UnsafeMkNodeId <$> field
instance FromField NodeId where instance FromField NodeId where
fromField field mdata = do fromField fld mdata = do
n <- UnsafeMkNodeId <$> fromField field mdata n <- UnsafeMkNodeId <$> fromField fld mdata
if isPositive n if isPositive n
then pure n then pure n
else mzero else mzero
......
...@@ -25,6 +25,7 @@ import Control.Lens (set, view) ...@@ -25,6 +25,7 @@ import Control.Lens (set, view)
import Data.Aeson ( encode, Value ) import Data.Aeson ( encode, Value )
import Database.PostgreSQL.Simple qualified as PGS import Database.PostgreSQL.Simple qualified as PGS
import Database.PostgreSQL.Simple.SqlQQ (sql) import Database.PostgreSQL.Simple.SqlQQ (sql)
import Gargantext.API.Errors.Types (BackendInternalError (..))
import Gargantext.Core ( HasDBid(toDBid) ) import Gargantext.Core ( HasDBid(toDBid) )
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Core.Types.Query (Limit, Offset) import Gargantext.Core.Types.Query (Limit, Offset)
...@@ -37,6 +38,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Prelude ( Hyperdata ) ...@@ -37,6 +38,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Prelude ( Hyperdata )
import Gargantext.Database.Admin.Types.Hyperdata.Default ( defaultHyperdata, DefaultHyperdata(..) ) import Gargantext.Database.Admin.Types.Hyperdata.Default ( defaultHyperdata, DefaultHyperdata(..) )
import Gargantext.Database.Prelude (DBCmd, JSONB, mkCmd, runPGSQuery, runOpaQuery) import Gargantext.Database.Prelude (DBCmd, JSONB, mkCmd, runPGSQuery, runOpaQuery)
import Gargantext.Database.Query.Filter (limit', offset') import Gargantext.Database.Query.Filter (limit', offset')
import Gargantext.Database.Query.Table.Node.Children (getChildrenById)
import Gargantext.Database.Query.Table.Node.Error import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Schema.Node import Gargantext.Database.Schema.Node
import Gargantext.Prelude hiding (sum, head) import Gargantext.Prelude hiding (sum, head)
...@@ -344,10 +346,10 @@ insertNodes' ns = mkCmd $ \conn -> runInsert_ conn ...@@ -344,10 +346,10 @@ insertNodes' ns = mkCmd $ \conn -> runInsert_ conn
(pgNodeId <$> p) (pgNodeId <$> p)
(sqlStrictText n) (sqlStrictText n)
(pgUTCTime <$> d) (pgUTCTime <$> d)
(pgJSONB $ cs $ encode h)
(pgJSONB $ cs $ encode h)
) ns ) ns
-} -}
insertNodesR :: [NodeWrite] -> DBCmd err [NodeId] insertNodesR :: [NodeWrite] -> DBCmd err [NodeId]
insertNodesR ns = mkCmd $ \conn -> insertNodesR ns = mkCmd $ \conn ->
runInsert_ conn (Insert nodeTable ns (rReturning (\(Node i _ _ _ _ _ _ _) -> i)) Nothing) runInsert_ conn (Insert nodeTable ns (rReturning (\(Node i _ _ _ _ _ _ _) -> i)) Nothing)
...@@ -430,3 +432,28 @@ defaultListMaybe cId = headMay <$> map (view node_id ) <$> getListsWithParentId ...@@ -430,3 +432,28 @@ defaultListMaybe cId = headMay <$> map (view node_id ) <$> getListsWithParentId
getListsWithParentId :: HasDBid NodeType => NodeId -> DBCmd err [Node HyperdataList] getListsWithParentId :: HasDBid NodeType => NodeId -> DBCmd err [Node HyperdataList]
getListsWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeList) getListsWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeList)
-- | Copy a node somewhere else in the tree
copyNode :: Bool -- ^ Whether to copy whole subtree (True) or just the node (False)
-> NodeId -- ^ ID of the node to be copied
-> NodeId -- ^ ID of the node which will become the parent of the copied node
-> DBCmd BackendInternalError NodeId -- ^ ID of the copied node
copyNode copySubtree idToCopy newParentId = if copySubtree
then do
copiedNode <- copyNode False idToCopy newParentId
children <- getChildrenById idToCopy
for_ children $ \child -> copyNode True child copiedNode
return copiedNode
else do
newNodes <- runPGSQuery
-- Copy node. Should return exactly one ID, that of the new node:
[sql|
INSERT INTO public.nodes (typename, user_id, parent_id, name, date, hyperdata)
SELECT typename, user_id, ?, name, date, hyperdata FROM public.nodes WHERE id = ?
RETURNING id;
|] (newParentId, idToCopy)
case newNodes of
[copiedNode] -> return copiedNode
_ -> throwError $ InternalUnexpectedError $ SomeException $ PatternMatchFail $
"SQL insert returned zero or more than one node"
...@@ -9,8 +9,9 @@ Portability : POSIX ...@@ -9,8 +9,9 @@ Portability : POSIX
-} -}
{-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-} {-# LANGUAGE Arrows #-}
{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Query.Table.Node.Children module Gargantext.Database.Query.Table.Node.Children
where where
...@@ -21,7 +22,8 @@ import Gargantext.Core.Types ...@@ -21,7 +22,8 @@ import Gargantext.Core.Types
import Gargantext.Core.Types.Query (Limit, Offset) import Gargantext.Core.Types.Query (Limit, Offset)
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact ) import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument ) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument )
import Gargantext.Database.Prelude (DBCmd, JSONB, runCountOpaQuery, runOpaQuery) import Database.PostgreSQL.Simple.SqlQQ (sql)
import Gargantext.Database.Prelude (DBCmd, JSONB, runCountOpaQuery, runOpaQuery, runPGSQuery)
import Gargantext.Database.Query.Filter ( limit', offset' ) import Gargantext.Database.Query.Filter ( limit', offset' )
import Gargantext.Database.Query.Table.NodeContext ( NodeContextPoly(NodeContext), queryNodeContextTable ) import Gargantext.Database.Query.Table.NodeContext ( NodeContextPoly(NodeContext), queryNodeContextTable )
import Gargantext.Database.Schema.Context import Gargantext.Database.Schema.Context
...@@ -60,6 +62,14 @@ getChildren pId p t@(Just NodeContact ) maybeOffset maybeLimit = getChildrenCont ...@@ -60,6 +62,14 @@ getChildren pId p t@(Just NodeContact ) maybeOffset maybeLimit = getChildrenCont
getChildren a b c d e = getChildrenNode a b c d e getChildren a b c d e = getChildrenNode a b c d e
-- | Get the list of (IDs of) children of a given node (ID)
getChildrenById :: NodeId -- ^ ID of the parent node
-> DBCmd err [NodeId] -- ^ List of IDs of the children nodes
getChildrenById parentId = runPGSQuery
[sql| SELECT id FROM public.nodes WHERE parent_id = ?; |]
parentId
getChildrenNode :: (JSONB a, HasDBid NodeType) getChildrenNode :: (JSONB a, HasDBid NodeType)
=> ParentId => ParentId
-> proxy a -> proxy a
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment