Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
144
Issues
144
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
67532a54
Commit
67532a54
authored
Aug 06, 2024
by
Grégoire Locqueville
Committed by
Grégoire Locqueville
Nov 20, 2024
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add commands for subcorpus and node copy
parent
f14a73d6
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
203 additions
and
7 deletions
+203
-7
gargantext.cabal
gargantext.cabal
+1
-0
Corpus.hs
src/Gargantext/Core/Text/Corpus.hs
+101
-0
List.hs
src/Gargantext/Core/Text/List.hs
+2
-1
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+1
-0
Node.hs
src/Gargantext/Database/Admin/Types/Node.hs
+9
-3
Node.hs
src/Gargantext/Database/Query/Table/Node.hs
+75
-1
Children.hs
src/Gargantext/Database/Query/Table/Node/Children.hs
+14
-2
No files found.
gargantext.cabal
View file @
67532a54
...
...
@@ -352,6 +352,7 @@ library
Gargantext.Core.Methods.Similarities.Accelerate.Distributional
Gargantext.Core.Methods.Similarities.Accelerate.SpeGen
Gargantext.Core.Statistics
Gargantext.Core.Text.Corpus
Gargantext.Core.Text.Corpus.API.Hal
Gargantext.Core.Text.Corpus.API.Isidore
Gargantext.Core.Text.Corpus.API.Istex
...
...
src/Gargantext/Core/Text/Corpus.hs
0 → 100644
View file @
67532a54
module
Gargantext.Core.Text.Corpus
(
makeSubcorpusFromQuery
,
subcorpusEasy
)
where
import
Control.Lens
(
view
)
import
Data.Set.Internal
qualified
as
Set
(
singleton
)
import
Data.Text
qualified
as
T
import
Gargantext.API.Dev
(
runCmdReplEasy
)
import
Gargantext.API.Errors.Types
(
BackendInternalError
(
InternalNodeError
))
import
Gargantext.Core
(
Lang
(
EN
))
import
Gargantext.Core.NodeStory.Types
(
HasNodeStoryEnv
)
import
Gargantext.Core.NLP
(
HasNLPServer
)
import
Gargantext.Core.Text.Corpus.Query
qualified
as
Q
import
Gargantext.Core.Text.List.Social
(
FlowSocialListWith
(
..
),
FlowSocialListPriority
(
..
))
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Database.Action.Flow
(
buildSocialList
,
reIndexWith
)
import
Gargantext.Database.Action.Metrics
(
updateContextScore
,
updateNgramsOccurrences
)
import
Gargantext.Database.Action.Search
(
searchInCorpus
)
import
Gargantext.Database.Action.User
(
getUserId
)
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataCorpus
,
hc_lang
)
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
NodeId
(
UnsafeMkNodeId
),
NodeType
(
..
),
nodeId2ContextId
)
import
Gargantext.Database.Prelude
(
DBCmd
'
)
import
Gargantext.Database.Query.Facet.Types
(
facetDoc_id
)
import
Gargantext.Database.Query.Table.Node
(
insertDefaultNode
,
copyNodeStories
,
defaultList
,
getNodeWithType
)
import
Gargantext.Database.Query.Table.Node.Document.Add
qualified
as
Document
(
add
)
import
Gargantext.Database.Query.Table.Node.Error
(
NodeError
(
NoCorpusFound
))
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Prelude
-- | A version of the below function for use in the REPL (so you don't need to
-- manually import tons of constructors etc.)
subcorpusEasy
::
Text
-- ^ Username
->
Int
-- ^ Original corpus ID
->
Text
-- ^ Search string
->
Bool
-- ^ Whether to reuse the parent term list (True) or recompute one from scratch (False)
->
IO
()
subcorpusEasy
username
cId
rawQuery
reuseParentList
=
do
let
eitherQuery
=
Q
.
parseQuery
$
Q
.
RawQuery
rawQuery
case
eitherQuery
of
Left
msg
->
print
$
"Error parsing query
\"
"
<>
rawQuery
<>
"
\"
: "
<>
T
.
pack
msg
Right
query
->
void
$
runCmdReplEasy
$
makeSubcorpusFromQuery
(
UserName
username
)
(
UnsafeMkNodeId
cId
)
query
reuseParentList
-- | Given a "parent" corpus and a query, search for all docs in the parent
-- that match the query, and create a corpus from those. The created corpus
-- is inserted in the tree as a child of the parent corpus.
-- Creation of subcorpus "Docs" and "Terms" nodes is handled. The terms can be
-- either copied from the parent corpus or recomputed based on the subcorpus docs.
makeSubcorpusFromQuery
::
(
HasNodeStoryEnv
env
,
HasNLPServer
env
)
=>
User
-- ^ The corpus owner
->
CorpusId
-- ^ ID of the parent corpus
->
Q
.
Query
-- ^ The query to determine the subset of documents that will appear in the subcorpus
->
Bool
-- ^ Whether to reuse parent term list (True) or compute a new one based only on the documents in the subcorpus (False)
->
DBCmd'
env
BackendInternalError
CorpusId
-- ^ The child corpus ID
makeSubcorpusFromQuery
user
supercorpusId
query
reuseParentList
=
do
userId
<-
getUserId
user
-- Insert the required nodes:
-- 1. The subcorpus root (under the original corpus root)
subcorpusId
<-
insertDefaultNode
NodeCorpus
supercorpusId
userId
-- 2. The context (aka "Docs", aka "Terms") node (under the subcorpus root)
_
<-
insertDefaultNode
NodeTexts
subcorpusId
userId
-- 3. The terms (aka "List") node
subListId
<-
insertDefaultNode
NodeList
subcorpusId
userId
-- Get the ID of the original terms node
superListId
<-
defaultList
supercorpusId
-- Get ahold of all contexts that match the query, and add them to the subcorpus
-- (note that contexts are attached to a *corpus* node, not a *docs* node,
-- notwithstanding what you might think from th UI)
facetDocs
<-
searchInCorpus
supercorpusId
False
query
Nothing
Nothing
Nothing
_
<-
Document
.
add
subcorpusId
$
nodeId2ContextId
.
facetDoc_id
<$>
facetDocs
if
reuseParentList
-- Either simply copy parent terms...
then
void
$
copyNodeStories
superListId
subListId
-- ... or rebuild a term list from scratch
-- TODO Check whether reusing the parent hyperdata is the right thing to do
else
do
-- Get hyperdata from the original corpus
supercorpuses
<-
getNodeWithType
supercorpusId
NodeCorpus
(
Proxy
::
Proxy
HyperdataCorpus
)
superHyperdata
<-
case
supercorpuses
of
[
supercorpus
]
->
return
$
view
node_hyperdata
supercorpus
_
->
throwError
$
InternalNodeError
NoCorpusFound
buildSocialList
(
fromMaybe
EN
$
view
hc_lang
superHyperdata
)
user
subcorpusId
subListId
(
Just
superHyperdata
)
-- TODO Not completely sure what the following parameter is for
-- but I am guessing there should be a dialog to let the user decide
-- what it should be
(
Just
(
FlowSocialListWithPriority
MySelfFirst
)
::
Maybe
FlowSocialListWith
)
-- In both cases we'll need to reindex our terms list so it matches the contexts
-- in the newly created subcorpus
reIndexWith
subcorpusId
subListId
NgramsTerms
(
Set
.
singleton
MapTerm
)
_
<-
updateContextScore
subcorpusId
subListId
_
<-
updateNgramsOccurrences
subcorpusId
subListId
return
subcorpusId
src/Gargantext/Core/Text/List.hs
View file @
67532a54
...
...
@@ -63,7 +63,8 @@ goodMapListSize :: Int
goodMapListSize
=
350
-- | TODO improve grouping functions of Authors, Sources, Institutes..
-- | Consider using `buildSocialList` instead of this function.
-- TODO improve grouping functions of Authors, Sources, Institutes..
buildNgramsLists
::
(
HasNodeStory
env
err
m
,
HasNLPServer
env
,
HasTreeError
err
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
67532a54
...
...
@@ -35,6 +35,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
,
flowCorpusUser
,
flowAnnuaire
,
insertMasterDocs
,
buildSocialList
,
saveDocNgramsWith
,
addDocumentsToHyperCorpus
...
...
src/Gargantext/Database/Admin/Types/Node.hs
View file @
67532a54
...
...
@@ -13,7 +13,6 @@ Portability : POSIX
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TemplateHaskell #-}
...
...
@@ -39,6 +38,10 @@ import Data.Text (pack, unpack)
import
Data.Text
qualified
as
T
import
Data.Time
(
UTCTime
)
import
Data.TreeDiff
import
Database.PostgreSQL.Simple.FromField
(
FromField
,
fromField
,
fromJSONField
)
import
Database.PostgreSQL.Simple.ToField
(
ToField
,
toField
,
toJSONField
)
import
Database.PostgreSQL.Simple.FromRow
(
FromRow
,
fromRow
,
field
)
import
Database.PostgreSQL.Simple.ToRow
(
ToRow
,
toRow
)
import
Fmt
(
Buildable
(
..
)
)
import
Gargantext.Core
(
HasDBid
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
,
wellNamedSchema
)
...
...
@@ -270,10 +273,13 @@ instance ToField NodeId where
toField
(
UnsafeMkNodeId
n
)
=
toField
n
instance
ToRow
NodeId
where
toRow
(
UnsafeMkNodeId
i
)
=
[
toField
i
]
instance
FromRow
NodeId
where
fromRow
=
UnsafeMkNodeId
<$>
field
instance
FromField
NodeId
where
fromField
f
ie
ld
mdata
=
do
n
<-
UnsafeMkNodeId
<$>
fromField
f
ie
ld
mdata
fromField
fld
mdata
=
do
n
<-
UnsafeMkNodeId
<$>
fromField
fld
mdata
if
isPositive
n
then
pure
n
else
mzero
...
...
src/Gargantext/Database/Query/Table/Node.hs
View file @
67532a54
...
...
@@ -59,23 +59,31 @@ module Gargantext.Database.Query.Table.Node
,
deleteNode
,
deleteNodes
-- * Copying data
,
copyNode
,
copyNodeStories
)
where
import
Control.Arrow
(
returnA
)
import
Control.Lens
(
set
,
view
)
import
Data.Aeson
(
encode
,
Value
)
import
Data.Bimap
((
!>
))
import
Database.PostgreSQL.Simple
qualified
as
PGS
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Gargantext.API.Errors.Types
(
BackendInternalError
(
..
))
import
Gargantext.Core
(
HasDBid
(
toDBid
)
)
import
Gargantext.Core.Types
import
Gargantext.Core.Types.Query
(
Limit
,
Offset
)
import
Gargantext.Database.Admin.Config
(
nodeTypes
)
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataAnnuaire
,
HyperdataCorpus
)
import
Gargantext.Database.Admin.Types.Hyperdata.Default
(
defaultHyperdata
,
DefaultHyperdata
(
..
)
)
import
Gargantext.Database.Admin.Types.Hyperdata.Folder
(
HyperdataFolder
)
import
Gargantext.Database.Admin.Types.Hyperdata.List
(
HyperdataList
)
import
Gargantext.Database.Admin.Types.Hyperdata.Prelude
(
Hyperdata
)
import
Gargantext.Database.Prelude
(
DBCmd
,
JSONB
,
mkCmd
,
runPGSQuery
,
runOpaQuery
)
import
Gargantext.Database.Prelude
(
DBCmd
,
JSONB
,
mkCmd
,
execPGSQuery
,
runPGSQuery
,
runOpaQuery
)
import
Gargantext.Database.Query.Filter
(
limit'
,
offset'
)
import
Gargantext.Database.Query.Table.Node.Children
(
getChildrenById
)
import
Gargantext.Database.Query.Table.Node.Error
import
Gargantext.Database.Schema.Node
import
Gargantext.Prelude
hiding
(
sum
,
head
)
...
...
@@ -468,3 +476,69 @@ isUserNode userNodeId = (== [PGS.Only True])
WHERE n.id = ? AND n.typename = ? AND n.parent_id = NULL
)
|]
(
userNodeId
,
toDBid
NodeUser
)
-- | Copy a node somewhere else in the tree
copyNode
::
Bool
-- ^ Whether to copy whole subtree (`True`) or just the node (`False`)
->
Bool
-- ^ Whether to deal with ngrams and contexts (`True`) or just the data in the `nodes` table (`False`)
->
NodeId
-- ^ ID of the node to be copied
->
NodeId
-- ^ ID of the node which will become the parent of the copied node
->
DBCmd
BackendInternalError
NodeId
-- ^ ID of the copied node
copyNode
copySubtree
smart
idToCopy
newParentId
=
if
copySubtree
-- Recursive copy:
then
do
-- Non-recursively copy the node itself, then recursively copy its children:
copiedNode
<-
copyNode
False
smart
idToCopy
newParentId
children
<-
getChildrenById
idToCopy
for_
children
$
\
child
->
copyNode
True
smart
child
copiedNode
return
copiedNode
-- Single-node (non-recursive) copy:
else
do
newNodes
<-
runPGSQuery
-- Copy node. Should return exactly one ID, that of the new node:
[
sql
|
INSERT INTO public.nodes (typename, user_id, parent_id, name, date, hyperdata)
SELECT typename, user_id, ?, name, date, hyperdata FROM public.nodes WHERE id = ?
RETURNING id;
|]
(
newParentId
,
idToCopy
)
case
newNodes
of
-- Check that we got exactly one node back
[
copiedNode
]
->
do
-- Copy node stories/contexts if applicable
when
smart
$
do
nodeToCopy
<-
getNode
idToCopy
case
nodeTypes
!>
view
node_typename
nodeToCopy
of
NodeList
->
copyNodeStories
idToCopy
copiedNode
-- Contexts are attached to a corpus node, not to the docs node:
NodeCorpus
->
copyNodeContexts
idToCopy
copiedNode
_
->
return
()
return
copiedNode
_
->
throwError
$
InternalUnexpectedError
$
SomeException
$
PatternMatchFail
$
"SQL insert returned zero or more than one node"
-- | Given two IDs of terms nodes, copies the node stories of the first into
-- node stories of the second. This effectively copies the terms from one terms
-- node to another.
-- TODO add a check that we are looking at the right type of node?
copyNodeStories
::
NodeId
-- ^ The ID of the node whose stories are to be copied
->
NodeId
-- ^ The ID of the node under which to copy the stories
->
DBCmd
BackendInternalError
()
copyNodeStories
oldNodeId
newNodeId
=
void
$
execPGSQuery
[
sql
|
INSERT INTO node_stories
(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element)
SELECT ?, version, ngrams_type_id, ngrams_id, ngrams_repo_element
FROM node_stories
WHERE node_id = ?;
|]
(
newNodeId
,
oldNodeId
)
-- | Given two IDs of Docs nodes, add to the second the contexts associated to
-- the first. Functionally, this copies the contexts from the first node to the
-- second, although the contexts are not technically duplicated in the database.
copyNodeContexts
::
NodeId
-- ^ The ID of the node whose contexts are to be "copied"
->
NodeId
-- ^ The ID of the node under which to "copy" the contexts
->
DBCmd
BackendInternalError
()
copyNodeContexts
oldNodeId
newNodeId
=
void
$
execPGSQuery
[
sql
|
INSERT INTO node_contexts (node_id, context_id, score, category)
SELECT ?, context_id, score, category FROM node_stories WHERE node_id = ?
|]
(
newNodeId
,
oldNodeId
)
src/Gargantext/Database/Query/Table/Node/Children.hs
View file @
67532a54
...
...
@@ -8,18 +8,22 @@ Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE Arrows #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE QuasiQuotes #-}
module
Gargantext.Database.Query.Table.Node.Children
where
import
Control.Arrow
(
returnA
)
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Gargantext.Core
(
HasDBid
(
toDBid
)
)
import
Gargantext.Core.Types
import
Gargantext.Core.Types.Query
(
Limit
,
Offset
)
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
HyperdataContact
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
)
import
Gargantext.Database.Prelude
(
DBCmd
,
JSONB
,
runCountOpaQuery
,
runOpaQuery
)
import
Gargantext.Database.Prelude
(
DBCmd
,
JSONB
,
runCountOpaQuery
,
runOpaQuery
,
runPGSQuery
)
import
Gargantext.Database.Query.Filter
(
limit'
,
offset'
)
import
Gargantext.Database.Query.Table.NodeContext
(
NodeContextPoly
(
NodeContext
),
queryNodeContextTable
)
import
Gargantext.Database.Schema.Context
...
...
@@ -58,6 +62,14 @@ getChildren pId p t@(Just NodeContact ) maybeOffset maybeLimit = getChildrenCont
getChildren
a
b
c
d
e
=
getChildrenNode
a
b
c
d
e
-- | Get the list of (IDs of) children of a given node (ID)
getChildrenById
::
NodeId
-- ^ ID of the parent node
->
DBCmd
err
[
NodeId
]
-- ^ List of IDs of the children nodes
getChildrenById
parentId
=
runPGSQuery
[
sql
|
SELECT id FROM public.nodes WHERE parent_id = ?;
|]
parentId
getChildrenNode
::
(
JSONB
a
,
HasDBid
NodeType
)
=>
ParentId
->
proxy
a
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment