Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Grégoire Locqueville
haskell-gargantext
Commits
420bad76
Commit
420bad76
authored
Aug 06, 2024
by
Grégoire Locqueville
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'tmp-subcorpus' into subcorpus
parents
e26da1cb
2b4c4297
Pipeline
#6499
failed with stages
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
123 additions
and
7 deletions
+123
-7
gargantext.cabal
gargantext.cabal
+1
-0
Corpus.hs
src/Gargantext/Core/Text/Corpus.hs
+74
-0
Node.hs
src/Gargantext/Database/Admin/Types/Node.hs
+6
-2
Node.hs
src/Gargantext/Database/Query/Table/Node.hs
+29
-2
Children.hs
src/Gargantext/Database/Query/Table/Node/Children.hs
+13
-3
No files found.
gargantext.cabal
View file @
420bad76
...
@@ -178,6 +178,7 @@ library
...
@@ -178,6 +178,7 @@ library
Gargantext.Core.NodeStory.Types
Gargantext.Core.NodeStory.Types
Gargantext.Core.Text
Gargantext.Core.Text
Gargantext.Core.Text.Context
Gargantext.Core.Text.Context
Gargantext.Core.Text.Corpus
Gargantext.Core.Text.Corpus.API
Gargantext.Core.Text.Corpus.API
Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.EPO
Gargantext.Core.Text.Corpus.API.EPO
...
...
src/Gargantext/Core/Text/Corpus.hs
0 → 100644
View file @
420bad76
module
Gargantext.Core.Text.Corpus
(
makeSubcorpusFromQuery
)
where
import
Data.Set.Internal
qualified
as
Set
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Errors.Types
(
BackendInternalError
)
import
Gargantext.Core.NodeStory.Types
(
HasNodeStoryEnv
)
import
Gargantext.Core.NLP
(
HasNLPServer
)
import
Gargantext.Core.Text.Corpus.Query
qualified
as
Q
import
Gargantext.Core.Text.List
(
buildNgramsLists
)
import
Gargantext.Core.Text.List.Group.WithStem
(
GroupParams
(
..
))
import
Gargantext.Core.Text.List.Social
(
FlowSocialListWith
(
..
),
FlowSocialListPriority
(
..
))
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Database.Action.Flow
(
getOrMkRootWithCorpus
,
reIndexWith
)
import
Gargantext.Database.Action.Flow.List
(
flowList_DbRepo
)
import
Gargantext.Database.Action.Search
(
searchInCorpus
)
import
Gargantext.Database.Action.User
(
getUserId
)
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataCorpus
)
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
nodeId2ContextId
,
NodeType
(
..
))
import
Gargantext.Database.Prelude
(
DBCmd
'
)
import
Gargantext.Database.Query.Facet.Types
(
facetDoc_id
)
import
Gargantext.Database.Query.Table.Node
(
insertDefaultNode
,
copyNode
)
import
Gargantext.Database.Query.Table.Node.Document.Add
qualified
as
Document
(
add
)
import
Gargantext.Database.Query.Tree.Root
(
MkCorpusUser
(
..
))
import
Gargantext.Prelude
-- | Given a "parent" corpus and a query, search for all docs in the parent
-- that match the query, and create a corpus from those. The created corpus
-- is inserted in the tree as a child of the parent corpus.
-- Creation of subcorpus "Docs" and "Terms" nodes is handled. The terms can be
-- either copied from the parent corpus or recomputed based on the subcorpus docs.
makeSubcorpusFromQuery
::
(
HasSettings
env
,
HasNodeStoryEnv
env
,
HasNLPServer
env
)
=>
User
-- ^ The corpus owner
->
CorpusId
-- ^ ID of the parent corpus
->
Q
.
Query
-- ^ The query to determine the subset of documents that will appear in the subcorpus
->
Bool
-- ^ Whether to reuse parent term list (True) or compute a new one based only on the documents in the subcorpus (False)
->
DBCmd'
env
BackendInternalError
CorpusId
-- ^ The child corpus ID
makeSubcorpusFromQuery
user
parentId
query
reuseParentList
=
do
userId
<-
getUserId
user
subcorpusId
<-
insertDefaultNode
NodeCorpus
parentId
userId
(
_
,
_
,
masterCorpusId
)
<-
getOrMkRootWithCorpus
MkCorpusUserMaster
(
Nothing
::
Maybe
HyperdataCorpus
)
-- Get ahold of all documents that match the query
facetDocs
<-
searchInCorpus
parentId
False
query
Nothing
Nothing
Nothing
-- Create a subcorpus node with all the documents
_
<-
Document
.
add
subcorpusId
$
nodeId2ContextId
.
facetDoc_id
<$>
facetDocs
-- Create nodes for docs and terms as children of the subcorpus
void
$
insertDefaultNode
NodeTexts
subcorpusId
userId
listId
<-
insertDefaultNode
NodeList
subcorpusId
userId
-- Either simply copy parent terms...
-- (TODO this does not keep subterms)
if
reuseParentList
then
void
$
copyNode
True
listId
subcorpusId
-- ... or rebuild a term list from scratch
else
do
ngrams
<-
buildNgramsLists
user
subcorpusId
masterCorpusId
(
Just
(
FlowSocialListWithPriority
MySelfFirst
)
::
Maybe
FlowSocialListWith
)
GroupIdentity
-- Save computed list
_
<-
flowList_DbRepo
listId
ngrams
return
()
-- Reindex
_
<-
reIndexWith
subcorpusId
listId
NgramsTerms
(
Set
.
singleton
MapTerm
)
-- The following two lines (like the one just above) are copypasted from
-- the definition of flowCorpusUser, but I'm not sure whether they should be included
-- _ <- updateContextScore userCorpusId listId
-- _ <- updateNgramsOccurrences userCorpusId listId
return
subcorpusId
src/Gargantext/Database/Admin/Types/Node.hs
View file @
420bad76
...
@@ -37,6 +37,7 @@ import Data.TreeDiff
...
@@ -37,6 +37,7 @@ import Data.TreeDiff
import
Database.PostgreSQL.Simple.FromField
(
FromField
,
fromField
,
fromJSONField
)
import
Database.PostgreSQL.Simple.FromField
(
FromField
,
fromField
,
fromJSONField
)
import
Database.PostgreSQL.Simple.ToField
(
ToField
,
toField
,
toJSONField
)
import
Database.PostgreSQL.Simple.ToField
(
ToField
,
toField
,
toJSONField
)
import
Database.PostgreSQL.Simple.ToRow
(
ToRow
,
toRow
)
import
Database.PostgreSQL.Simple.ToRow
(
ToRow
,
toRow
)
import
Database.PostgreSQL.Simple.FromRow
(
FromRow
,
fromRow
,
field
)
import
Fmt
(
Buildable
(
..
)
)
import
Fmt
(
Buildable
(
..
)
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
,
wellNamedSchema
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
,
wellNamedSchema
)
import
Gargantext.Database.Schema.Context
import
Gargantext.Database.Schema.Context
...
@@ -267,10 +268,13 @@ instance ToField NodeId where
...
@@ -267,10 +268,13 @@ instance ToField NodeId where
toField
(
UnsafeMkNodeId
n
)
=
toField
n
toField
(
UnsafeMkNodeId
n
)
=
toField
n
instance
ToRow
NodeId
where
instance
ToRow
NodeId
where
toRow
(
UnsafeMkNodeId
i
)
=
[
toField
i
]
toRow
(
UnsafeMkNodeId
i
)
=
[
toField
i
]
instance
FromRow
NodeId
where
fromRow
=
UnsafeMkNodeId
<$>
field
instance
FromField
NodeId
where
instance
FromField
NodeId
where
fromField
f
ie
ld
mdata
=
do
fromField
fld
mdata
=
do
n
<-
UnsafeMkNodeId
<$>
fromField
f
ie
ld
mdata
n
<-
UnsafeMkNodeId
<$>
fromField
fld
mdata
if
isPositive
n
if
isPositive
n
then
pure
n
then
pure
n
else
mzero
else
mzero
...
...
src/Gargantext/Database/Query/Table/Node.hs
View file @
420bad76
...
@@ -25,6 +25,7 @@ import Control.Lens (set, view)
...
@@ -25,6 +25,7 @@ import Control.Lens (set, view)
import
Data.Aeson
(
encode
,
Value
)
import
Data.Aeson
(
encode
,
Value
)
import
Database.PostgreSQL.Simple
qualified
as
PGS
import
Database.PostgreSQL.Simple
qualified
as
PGS
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Gargantext.API.Errors.Types
(
BackendInternalError
(
..
))
import
Gargantext.Core
(
HasDBid
(
toDBid
)
)
import
Gargantext.Core
(
HasDBid
(
toDBid
)
)
import
Gargantext.Core.Types
import
Gargantext.Core.Types
import
Gargantext.Core.Types.Query
(
Limit
,
Offset
)
import
Gargantext.Core.Types.Query
(
Limit
,
Offset
)
...
@@ -37,6 +38,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Prelude ( Hyperdata )
...
@@ -37,6 +38,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Prelude ( Hyperdata )
import
Gargantext.Database.Admin.Types.Hyperdata.Default
(
defaultHyperdata
,
DefaultHyperdata
(
..
)
)
import
Gargantext.Database.Admin.Types.Hyperdata.Default
(
defaultHyperdata
,
DefaultHyperdata
(
..
)
)
import
Gargantext.Database.Prelude
(
DBCmd
,
JSONB
,
mkCmd
,
runPGSQuery
,
runOpaQuery
)
import
Gargantext.Database.Prelude
(
DBCmd
,
JSONB
,
mkCmd
,
runPGSQuery
,
runOpaQuery
)
import
Gargantext.Database.Query.Filter
(
limit'
,
offset'
)
import
Gargantext.Database.Query.Filter
(
limit'
,
offset'
)
import
Gargantext.Database.Query.Table.Node.Children
(
getChildrenById
)
import
Gargantext.Database.Query.Table.Node.Error
import
Gargantext.Database.Query.Table.Node.Error
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Schema.Node
import
Gargantext.Prelude
hiding
(
sum
,
head
)
import
Gargantext.Prelude
hiding
(
sum
,
head
)
...
@@ -344,10 +346,10 @@ insertNodes' ns = mkCmd $ \conn -> runInsert_ conn
...
@@ -344,10 +346,10 @@ insertNodes' ns = mkCmd $ \conn -> runInsert_ conn
(pgNodeId <$> p)
(pgNodeId <$> p)
(sqlStrictText n)
(sqlStrictText n)
(pgUTCTime <$> d)
(pgUTCTime <$> d)
(pgJSONB $ cs $ encode h)
(pgJSONB $ cs $ encode h)
) ns
) ns
-}
-}
insertNodesR
::
[
NodeWrite
]
->
DBCmd
err
[
NodeId
]
insertNodesR
::
[
NodeWrite
]
->
DBCmd
err
[
NodeId
]
insertNodesR
ns
=
mkCmd
$
\
conn
->
insertNodesR
ns
=
mkCmd
$
\
conn
->
runInsert_
conn
(
Insert
nodeTable
ns
(
rReturning
(
\
(
Node
i
_
_
_
_
_
_
_
)
->
i
))
Nothing
)
runInsert_
conn
(
Insert
nodeTable
ns
(
rReturning
(
\
(
Node
i
_
_
_
_
_
_
_
)
->
i
))
Nothing
)
...
@@ -430,3 +432,28 @@ defaultListMaybe cId = headMay <$> map (view node_id ) <$> getListsWithParentId
...
@@ -430,3 +432,28 @@ defaultListMaybe cId = headMay <$> map (view node_id ) <$> getListsWithParentId
getListsWithParentId
::
HasDBid
NodeType
=>
NodeId
->
DBCmd
err
[
Node
HyperdataList
]
getListsWithParentId
::
HasDBid
NodeType
=>
NodeId
->
DBCmd
err
[
Node
HyperdataList
]
getListsWithParentId
n
=
runOpaQuery
$
selectNodesWith'
n
(
Just
NodeList
)
getListsWithParentId
n
=
runOpaQuery
$
selectNodesWith'
n
(
Just
NodeList
)
-- | Copy a node somewhere else in the tree
copyNode
::
Bool
-- ^ Whether to copy whole subtree (True) or just the node (False)
->
NodeId
-- ^ ID of the node to be copied
->
NodeId
-- ^ ID of the node which will become the parent of the copied node
->
DBCmd
BackendInternalError
NodeId
-- ^ ID of the copied node
copyNode
copySubtree
idToCopy
newParentId
=
if
copySubtree
then
do
copiedNode
<-
copyNode
False
idToCopy
newParentId
children
<-
getChildrenById
idToCopy
for_
children
$
\
child
->
copyNode
True
child
copiedNode
return
copiedNode
else
do
newNodes
<-
runPGSQuery
-- Copy node. Should return exactly one ID, that of the new node:
[
sql
|
INSERT INTO public.nodes (typename, user_id, parent_id, name, date, hyperdata)
SELECT typename, user_id, ?, name, date, hyperdata FROM public.nodes WHERE id = ?
RETURNING id;
|]
(
newParentId
,
idToCopy
)
case
newNodes
of
[
copiedNode
]
->
return
copiedNode
_
->
throwError
$
InternalUnexpectedError
$
SomeException
$
PatternMatchFail
$
"SQL insert returned zero or more than one node"
src/Gargantext/Database/Query/Table/Node/Children.hs
View file @
420bad76
...
@@ -9,8 +9,9 @@ Portability : POSIX
...
@@ -9,8 +9,9 @@ Portability : POSIX
-}
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE QuasiQuotes #-}
module
Gargantext.Database.Query.Table.Node.Children
module
Gargantext.Database.Query.Table.Node.Children
where
where
...
@@ -21,7 +22,8 @@ import Gargantext.Core.Types
...
@@ -21,7 +22,8 @@ import Gargantext.Core.Types
import
Gargantext.Core.Types.Query
(
Limit
,
Offset
)
import
Gargantext.Core.Types.Query
(
Limit
,
Offset
)
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
HyperdataContact
)
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
HyperdataContact
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
)
import
Gargantext.Database.Prelude
(
DBCmd
,
JSONB
,
runCountOpaQuery
,
runOpaQuery
)
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Gargantext.Database.Prelude
(
DBCmd
,
JSONB
,
runCountOpaQuery
,
runOpaQuery
,
runPGSQuery
)
import
Gargantext.Database.Query.Filter
(
limit'
,
offset'
)
import
Gargantext.Database.Query.Filter
(
limit'
,
offset'
)
import
Gargantext.Database.Query.Table.NodeContext
(
NodeContextPoly
(
NodeContext
),
queryNodeContextTable
)
import
Gargantext.Database.Query.Table.NodeContext
(
NodeContextPoly
(
NodeContext
),
queryNodeContextTable
)
import
Gargantext.Database.Schema.Context
import
Gargantext.Database.Schema.Context
...
@@ -60,6 +62,14 @@ getChildren pId p t@(Just NodeContact ) maybeOffset maybeLimit = getChildrenCont
...
@@ -60,6 +62,14 @@ getChildren pId p t@(Just NodeContact ) maybeOffset maybeLimit = getChildrenCont
getChildren
a
b
c
d
e
=
getChildrenNode
a
b
c
d
e
getChildren
a
b
c
d
e
=
getChildrenNode
a
b
c
d
e
-- | Get the list of (IDs of) children of a given node (ID)
getChildrenById
::
NodeId
-- ^ ID of the parent node
->
DBCmd
err
[
NodeId
]
-- ^ List of IDs of the children nodes
getChildrenById
parentId
=
runPGSQuery
[
sql
|
SELECT id FROM public.nodes WHERE parent_id = ?;
|]
parentId
getChildrenNode
::
(
JSONB
a
,
HasDBid
NodeType
)
getChildrenNode
::
(
JSONB
a
,
HasDBid
NodeType
)
=>
ParentId
=>
ParentId
->
proxy
a
->
proxy
a
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment