Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Grégoire Locqueville
haskell-gargantext
Commits
df88acde
Commit
df88acde
authored
Aug 02, 2024
by
Grégoire Locqueville
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Cleaned up, reorganized
parent
589c5aa4
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
129 additions
and
164 deletions
+129
-164
Corpus.hs
src/Gargantext/Database/Action/Corpus.hs
+81
-0
Node.hs
src/Gargantext/Database/Query/Table/Node.hs
+25
-78
Children.hs
src/Gargantext/Database/Query/Table/Node/Children.hs
+13
-3
REPL.hs
src/REPL.hs
+10
-83
No files found.
src/Gargantext/Database/Action/Corpus.hs
0 → 100644
View file @
df88acde
module
Gargantext.Database.Action.Corpus
where
import
Data.Set.Internal
qualified
as
Set
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Errors.Types
(
BackendInternalError
)
import
Gargantext.Core.NodeStory.Types
(
HasNodeStoryEnv
)
import
Gargantext.Core.NLP
(
HasNLPServer
)
import
Gargantext.Core.Text.Corpus.Query
qualified
as
Q
import
Gargantext.Core.Text.List
(
buildNgramsLists
)
import
Gargantext.Core.Text.List.Group.WithStem
(
GroupParams
(
..
))
import
Gargantext.Core.Text.List.Social
(
FlowSocialListWith
(
..
),
FlowSocialListPriority
(
..
))
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Database.Action.Flow
(
getOrMkRootWithCorpus
,
reIndexWith
)
import
Gargantext.Database.Action.Flow.List
(
flowList_DbRepo
)
import
Gargantext.Database.Action.Search
(
searchInCorpus
)
import
Gargantext.Database.Action.User
(
getUserId
)
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataCorpus
)
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
nodeId2ContextId
,
NodeType
(
..
))
import
Gargantext.Database.Prelude
(
DBCmd
'
)
import
Gargantext.Database.Query.Facet.Types
(
facetDoc_id
)
import
Gargantext.Database.Query.Table.Node
(
insertDefaultNode
,
copyNode
)
import
Gargantext.Database.Query.Table.Node.Document.Add
qualified
as
Document
(
add
)
import
Gargantext.Database.Query.Tree.Root
(
MkCorpusUser
(
..
))
import
Gargantext.Prelude
-- | Given a "parent" corpus and a query, search for all docs in the parent
-- that match the query, and create a corpus from those. The created corpus
-- is inserted in the tree as a child of the parent corpus.
-- Creation of subcorpus "Docs" and "Terms" nodes is handled. The terms can be
-- either copied from the parent corpus or recomputed based on the subcorpus docs.
makeSubcorpusFromQuery
::
(
HasSettings
env
,
HasNodeStoryEnv
env
,
HasNLPServer
env
)
=>
User
-- ^ The corpus owner
->
CorpusId
-- ^ ID of the parent corpus
->
Q
.
Query
-- ^ The query to determine the subset of documents that will appear in the subcorpus
->
Bool
-- ^ Whether to reuse parent term list (True) or compute a new one based only on the documents in the subcorpus (False)
->
DBCmd'
env
BackendInternalError
CorpusId
-- ^ The child corpus ID
makeSubcorpusFromQuery
user
parentId
query
reuseParentList
=
do
userId
<-
getUserId
user
subcorpusId
<-
insertDefaultNode
NodeCorpus
parentId
userId
(
_
,
_
,
masterCorpusId
)
<-
getOrMkRootWithCorpus
MkCorpusUserMaster
(
Nothing
::
Maybe
HyperdataCorpus
)
-- Get ahold of all documents that match the query
facetDocs
<-
searchInCorpus
parentId
False
query
Nothing
Nothing
Nothing
-- Create a subcorpus node with all the documents
_
<-
Document
.
add
subcorpusId
$
nodeId2ContextId
.
facetDoc_id
<$>
facetDocs
-- Create nodes for docs and terms as children of the subcorpus
void
$
insertDefaultNode
NodeTexts
subcorpusId
userId
listId
<-
insertDefaultNode
NodeList
subcorpusId
userId
-- Either simply copy parent terms... (TODO)
if
reuseParentList
then
void
$
copyNode
True
_parentList
subcorpusId
-- ... or rebuild a term list from scratch
else
do
ngrams
<-
buildNgramsLists
user
subcorpusId
masterCorpusId
(
Just
(
FlowSocialListWithPriority
MySelfFirst
)
::
Maybe
FlowSocialListWith
)
GroupIdentity
-- Save computed list
_
<-
flowList_DbRepo
listId
ngrams
return
()
-- Reindex
_
<-
reIndexWith
subcorpusId
listId
NgramsTerms
(
Set
.
singleton
MapTerm
)
-- The following two lines (like the one just above) are copypasted from
-- the definition of flowCorpusUser, but I'm not sure whether they should be included
-- _ <- updateContextScore userCorpusId listId
-- _ <- updateNgramsOccurrences userCorpusId listId
return
subcorpusId
-- TODO
-- Permettre de (mettre les 2 en option) :
-- [X] 1a. relancer le buildNgramsLists (cf. module Flow)
-- [ ] 1b. ou copier la liste (WIP)
-- [ ] Vérifier l'étape de réindexation reIndexWith
-- [ ] 2. générer un graphe
src/Gargantext/Database/Query/Table/Node.hs
View file @
df88acde
...
...
@@ -25,6 +25,7 @@ import Control.Lens (set, view)
import
Data.Aeson
(
encode
,
Value
)
import
Database.PostgreSQL.Simple
qualified
as
PGS
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Gargantext.API.Errors.Types
(
BackendInternalError
(
..
))
import
Gargantext.Core
(
HasDBid
(
toDBid
)
)
import
Gargantext.Core.Types
import
Gargantext.Core.Types.Query
(
Limit
,
Offset
)
...
...
@@ -37,6 +38,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Prelude ( Hyperdata )
import
Gargantext.Database.Admin.Types.Hyperdata.Default
(
defaultHyperdata
,
DefaultHyperdata
(
..
)
)
import
Gargantext.Database.Prelude
(
DBCmd
,
JSONB
,
mkCmd
,
runPGSQuery
,
runOpaQuery
)
import
Gargantext.Database.Query.Filter
(
limit'
,
offset'
)
import
Gargantext.Database.Query.Table.Node.Children
(
getChildrenById
)
import
Gargantext.Database.Query.Table.Node.Error
import
Gargantext.Database.Schema.Node
import
Gargantext.Prelude
hiding
(
sum
,
head
)
...
...
@@ -431,82 +433,27 @@ defaultListMaybe cId = headMay <$> map (view node_id ) <$> getListsWithParentId
getListsWithParentId
::
HasDBid
NodeType
=>
NodeId
->
DBCmd
err
[
Node
HyperdataList
]
getListsWithParentId
n
=
runOpaQuery
$
selectNodesWith'
n
(
Just
NodeList
)
------------------------------------------------------------------------
-- INSERT INTO public.nodes (hash_id, typename, user_id, parent_id, name, date, hyperdata)
-- SELECT 'tutu', typename, user_id, 97, name, date, hyperdata FROM public.nodes WHERE id = 165;
copyNodeSingle
::
NodeId
->
NodeId
->
DBCmd
err
NodeId
copyNodeSingle
idToCopy
newParentId
=
do
newNodes
<-
runPGSQuery
[
sql
|
INSERT INTO public.nodes (typename, user_id, parent_id, name, date, hyperdata)
SELECT typename, user_id, ?, name, date, hyperdata FROM public.nodes WHERE id = ?
RETURNING id;
|]
(
newParentId
,
idToCopy
)
case
newNodes
of
[
newNode
]
->
return
newNode
_
->
panicTrace
"Error"
-- TODO specify error
-- TODO Enforce a maximal depth level?
-- TODO Use SQL builtin recursivity?
copyNodeRecursive
::
NodeId
->
NodeId
->
DBCmd
err
NodeId
copyNodeRecursive
idToCopy
newParentId
=
do
copiedNode
<-
copyNodeSingle
idToCopy
newParentId
children
<-
getChildren'
idToCopy
for_
children
$
\
child
->
copyNodeRecursive
child
copiedNode
return
copiedNode
-- TODO delete this and replace calls to it by calls to getChildren
getChildren'
::
NodeId
->
DBCmd
err
[
NodeId
]
getChildren'
nodeId
=
runPGSQuery
[
sql
|
SELECT id FROM public.nodes WHERE parent_id = ?;
|]
nodeId
-- INSERT INTO public.nodes (typename, user_id, parent_id, name, date, hyperdata)
-- SELECT typename, user_id, 137, name, date, hyperdata FROM public.nodes WHERE id = 165
-- RETURNING id;
--
-- SELECT id FROM public.nodes WHERE parent_id = 137;
-- digest(CONCAT(?, NEW.typename, NEW.name, NEW.id, NEW.hyperdata), 'sha256')
-- copyNode :: (HasNodeError err) => NodeId -> DBCmd err Int64
-- copyNode nodeIdToCopy = mkCmd $ \connection -> proc
-- runSelect
-- TODO
-- [ ] Performer la substitution
-- [ ] Gérer le hash_id
-- nodeToCopy <- getNode nodeIdToCopy constant
-- _ -- return nodeToCopy
-- where
-- valueToHyperdata v = case fromJSON v of
-- Success a -> pure a
-- Error _err -> returnError ConversionFailed field
-- $ DL.unwords [ "cannot parse hyperdata for JSON: "
-- , show v
-- ]
-- nodeExists :: (HasNodeError err) => NodeId -> DBCmd err Bool
-- nodeExists nId = (== [PGS.Only True])
-- <$> runPGSQuery [sql|SELECT true FROM nodes WHERE id = ? |] (PGS.Only nId)
--
-- getNode :: HasNodeError err => NodeId -> DBCmd err (Node Value)
-- getNode nId = do
-- maybeNode <- headMay <$> runOpaQuery (selectNode (pgNodeId nId))
-- case maybeNode of
-- Nothing -> nodeError (DoesNotExist nId)
-- Just r -> pure r
--
-- getNodeWith :: (HasNodeError err, JSONB a)
-- => NodeId -> proxy a -> DBCmd err (Node a)
-- getNodeWith nId _ = do
-- maybeNode <- headMay <$> runOpaQuery (selectNode (pgNodeId nId))
-- case maybeNode of
-- Nothing -> nodeError (DoesNotExist nId)
-- Just r -> pure r
-- | Copy a node somewhere else in the tree
copyNode
::
Bool
-- ^ Whether to copy whole subtree (True) or just the node (False)
->
NodeId
-- ^ ID of the node to be copied
->
NodeId
-- ^ ID of the node which will become the parent of the copied node
->
DBCmd
BackendInternalError
NodeId
-- ^ ID of the copied node
copyNode
copySubtree
idToCopy
newParentId
=
if
copySubtree
then
do
copiedNode
<-
copyNode
False
idToCopy
newParentId
children
<-
getChildrenById
idToCopy
for_
children
$
\
child
->
copyNode
True
child
copiedNode
return
copiedNode
else
do
newNodes
<-
runPGSQuery
-- Copy node. Should return exactly one ID, that of the new node:
[
sql
|
INSERT INTO public.nodes (typename, user_id, parent_id, name, date, hyperdata)
SELECT typename, user_id, ?, name, date, hyperdata FROM public.nodes WHERE id = ?
RETURNING id;
|]
(
newParentId
,
idToCopy
)
case
newNodes
of
[
copiedNode
]
->
return
copiedNode
_
->
throwError
$
InternalUnexpectedError
$
SomeException
$
PatternMatchFail
$
"SQL insert returned zero or more than one node"
src/Gargantext/Database/Query/Table/Node/Children.hs
View file @
df88acde
...
...
@@ -9,8 +9,9 @@ Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE QuasiQuotes #-}
module
Gargantext.Database.Query.Table.Node.Children
where
...
...
@@ -21,7 +22,8 @@ import Gargantext.Core.Types
import
Gargantext.Core.Types.Query
(
Limit
,
Offset
)
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
HyperdataContact
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
)
import
Gargantext.Database.Prelude
(
DBCmd
,
JSONB
,
runCountOpaQuery
,
runOpaQuery
)
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Gargantext.Database.Prelude
(
DBCmd
,
JSONB
,
runCountOpaQuery
,
runOpaQuery
,
runPGSQuery
)
import
Gargantext.Database.Query.Filter
(
limit'
,
offset'
)
import
Gargantext.Database.Query.Table.NodeContext
(
NodeContextPoly
(
NodeContext
),
queryNodeContextTable
)
import
Gargantext.Database.Schema.Context
...
...
@@ -60,6 +62,14 @@ getChildren pId p t@(Just NodeContact ) maybeOffset maybeLimit = getChildrenCont
getChildren
a
b
c
d
e
=
getChildrenNode
a
b
c
d
e
-- | Get the list of (IDs of) children of a given node (ID)
getChildrenById
::
NodeId
-- ^ ID of the parent node
->
DBCmd
err
[
NodeId
]
-- ^ List of IDs of the children nodes
getChildrenById
parentId
=
runPGSQuery
[
sql
|
SELECT id FROM public.nodes WHERE parent_id = ?;
|]
parentId
getChildrenNode
::
(
JSONB
a
,
HasDBid
NodeType
)
=>
ParentId
->
proxy
a
...
...
src/REPL.hs
View file @
df88acde
module
REPL
where
import
Gargantext.Prelude
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
)
import
Gargantext.Core.NodeStory.Types
(
HasNodeStory
)
import
Gargantext.API.Dev
(
runCmdReplBackendErr
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Dev
(
runCmdReplEasy
)
import
Gargantext.API.Errors.Types
(
BackendInternalError
)
import
Gargantext.Core.NLP
(
HasNLPServer
)
import
Gargantext.Core.NodeStory.Types
(
HasNodeStory
)
import
Gargantext.Core.Text.Corpus.Query
qualified
as
Q
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
qualified
Gargantext.Database.Query.Table.Node.Document.Add
as
Document
(
add
)
import
Gargantext.Database.Action.Corpus
(
makeSubcorpusFromQuery
)
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
)
import
Gargantext.Database.Prelude
(
DbCmd
'
)
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
nodeId2ContextId
,
NodeType
(
..
))
import
Gargantext.Database.Action.Search
(
searchInCorpus
)
import
Gargantext.Database.Action.User
(
getUserId
)
import
Gargantext.Database.Query.Table.Node
(
insertDefaultNode
)
import
Gargantext.Database.Query.Facet.Types
(
facetDoc_id
)
import
qualified
Gargantext.Core.Text.Corpus.Query
as
Q
import
Gargantext.Core.Text.List
(
buildNgramsLists
)
import
Gargantext.Core.NLP
(
HasNLPServer
)
import
Gargantext.Database.Query.Tree.Error
(
HasTreeError
)
import
Gargantext.Core.Text.List.Group.WithStem
(
GroupParams
(
..
))
import
Gargantext.Database.Action.Flow
(
getOrMkRootWithCorpus
)
import
Gargantext.Database.Query.Tree.Root
(
MkCorpusUser
(
..
))
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataCorpus
)
import
Gargantext.Database.Action.Flow.List
(
flowList_DbRepo
)
import
Gargantext.Core.Types
(
HasValidationError
)
import
Gargantext.Core.Text.List.Social
(
FlowSocialListWith
(
..
),
FlowSocialListPriority
(
..
))
import
Gargantext.Prelude
execText
::
IO
(
Maybe
CorpusId
)
execText
=
runCmdRepl
BackendErr
$
testSubcorpusFunction
"user1"
133
"information"
False
execText
=
runCmdRepl
Easy
$
testSubcorpusFunction
"user1"
133
"information"
False
testSubcorpusFunction
::
forall
env
m
.
(
DbCmd'
env
BackendInternalError
m
...
...
@@ -51,63 +38,3 @@ testSubcorpusFunction username parentId queryText reuseParentList =
Left
_
->
return
Nothing
-- putStrLn ("Error parsing query " <> queryText) >> return Nothing -- TODO emit an actual error
Right
query
->
Just
<$>
makeSubcorpusFromQuery
(
UserName
username
)
parentId
query
reuseParentList
-- | (WIP) Given a "parent" corpus and a query, search for all docs in the parent
-- that match the query, and create a corpus from those. The created corpus
-- is inserted in the tree as a child of the parent corpus.
-- Creation of subcorpus "Docs" and "Terms" nodes is handled. The terms can be
-- either copied from the parent corpus or recomputed based on the subcorpus docs.
makeSubcorpusFromQuery
::
forall
env
err
m
.
(
DbCmd'
env
err
m
,
HasValidationError
err
,
HasNodeError
err
,
HasNodeStory
env
err
m
,
HasNLPServer
env
,
HasTreeError
err
,
HasSettings
env
)
=>
User
-- ^ The corpus owner
->
CorpusId
-- ^ ID of the parent corpus
->
Q
.
Query
-- ^ The query to determine the subset of documents that will appear in the subcorpus
->
Bool
-- ^ Whether to reuse parent term list (True) or compute a new one based only on the documents in the subcorpus (False)
->
m
CorpusId
-- ^ The child corpus ID
makeSubcorpusFromQuery
user
parentId
query
reuseParentList
=
do
userId
<-
getUserId
user
subcorpusId
<-
insertDefaultNode
NodeCorpus
parentId
userId
(
_
,
_
,
masterCorpusId
)
<-
getOrMkRootWithCorpus
MkCorpusUserMaster
(
Nothing
::
Maybe
HyperdataCorpus
)
-- Get ahold of all documents that match the query
facetDocs
<-
searchInCorpus
parentId
False
query
Nothing
Nothing
Nothing
-- Create a subcorpus node with all the documents
_
<-
Document
.
add
subcorpusId
$
nodeId2ContextId
.
facetDoc_id
<$>
facetDocs
-- Create nodes for docs and terms as children of the subcorpus
void
$
insertDefaultNode
NodeTexts
subcorpusId
userId
listId
<-
insertDefaultNode
NodeList
subcorpusId
userId
-- Either simply copy parent terms... (TODO)
if
reuseParentList
then
return
()
-- ... or rebuild a term list from scratch
else
do
ngrams
<-
buildNgramsLists
user
subcorpusId
masterCorpusId
(
Just
(
FlowSocialListWithPriority
MySelfFirst
)
::
Maybe
FlowSocialListWith
)
GroupIdentity
-- Save computed list
_
<-
flowList_DbRepo
listId
ngrams
return
()
-- Reindex
reIndexWith
subcorpusId
listId
NgramsType
-- CorpusId
-- ListId
-- NgramsType
-- Set ListType
-- m ()
return
subcorpusId
-- TODO
-- Permettre de (mettre les 2 en option) :
-- [X] 1a. relancer le buildNgramsLists (cf. module Flow)
-- [ ] 1b. ou copier la liste
-- Ajouter l'étape de réindexation reIndexWith
-- [ ] 2. générer un graphe
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment