Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Grégoire Locqueville
haskell-gargantext
Commits
cbae6ae4
Commit
cbae6ae4
authored
Jul 22, 2024
by
Grégoire Locqueville
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Trying to recompute terms for the subcorpus
parent
13359943
Pipeline
#6439
failed with stages
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
110 additions
and
4 deletions
+110
-4
gargantext.cabal
gargantext.cabal
+1
-0
Dev.hs
src/Gargantext/API/Dev.hs
+3
-0
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+8
-4
REPL.hs
src/REPL.hs
+98
-0
No files found.
gargantext.cabal
View file @
cbae6ae4
...
...
@@ -101,6 +101,7 @@ library
import:
defaults
exposed-modules:
REPL
Gargantext
Gargantext.API
Gargantext.API.Admin.Auth.Types
...
...
src/Gargantext/API/Dev.hs
View file @
cbae6ae4
...
...
@@ -69,6 +69,9 @@ runCmdRepl f = withDevEnv defaultIniFile defaultSettingsFile $ \env -> runCmdDev
runCmdReplServantErr
::
Cmd''
DevEnv
ServerError
a
->
IO
a
runCmdReplServantErr
=
runCmdRepl
runCmdReplBackendErr
::
Cmd''
DevEnv
BackendInternalError
a
->
IO
a
runCmdReplBackendErr
=
runCmdRepl
-- In particular this writes the repo file after running
-- the command.
-- This function is constrained to the DevEnv rather than
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
cbae6ae4
...
...
@@ -7,6 +7,9 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
See the whole flow graphically at:
https://dl.gargantext.org/workflow.svg
-- TODO-ACCESS:
-- check userId CanFillUserCorpus userCorpusId
-- check masterUserId CanFillMasterCorpus masterCorpusId
...
...
@@ -37,6 +40,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
,
insertMasterDocs
,
saveDocNgramsWith
,
addDocumentsToHyperCorpus
,
configureNodes
,
reIndexWith
...
...
@@ -177,7 +181,7 @@ flowDataText :: forall env err m.
->
m
CorpusId
flowDataText
u
(
DataOld
ids
)
tt
cid
mfslw
_
=
do
$
(
logLocM
)
DEBUG
$
T
.
pack
$
"Found "
<>
show
(
length
ids
)
<>
" old node IDs"
(
_userId
,
userCorpusId
,
listId
)
<-
c
reat
eNodes
(
MkCorpusUserNormalCorpusIds
u
[
cid
])
corpusType
(
_userId
,
userCorpusId
,
listId
)
<-
c
onfigur
eNodes
(
MkCorpusUserNormalCorpusIds
u
[
cid
])
corpusType
_
<-
Doc
.
add
userCorpusId
(
map
nodeId2ContextId
ids
)
flowCorpusUser
(
_tt_lang
tt
)
u
userCorpusId
listId
corpusType
mfslw
where
...
...
@@ -278,7 +282,7 @@ flow :: forall env err m a c.
->
JobHandle
m
->
m
CorpusId
flow
c
mkCorpusUser
la
mfslw
(
count
,
docsC
)
jobHandle
=
do
(
_userId
,
userCorpusId
,
listId
)
<-
c
reat
eNodes
mkCorpusUser
c
(
_userId
,
userCorpusId
,
listId
)
<-
c
onfigur
eNodes
mkCorpusUser
c
-- TODO if public insertMasterDocs else insertUserDocs
nlpServer
<-
view
$
nlpServerGet
(
_tt_lang
la
)
runConduit
$
zipSources
(
yieldMany
([
1
..
]
::
[
Int
]))
docsC
...
...
@@ -319,13 +323,13 @@ addDocumentsToHyperCorpus ncs mb_hyper la corpusId docs = do
pure
ids
------------------------------------------------------------------------
c
reat
eNodes
::
(
DbCmd'
env
err
m
,
HasNodeError
err
,
HasSettings
env
c
onfigur
eNodes
::
(
DbCmd'
env
err
m
,
HasNodeError
err
,
HasSettings
env
,
MkCorpus
c
)
=>
MkCorpusUser
->
Maybe
c
->
m
(
UserId
,
CorpusId
,
ListId
)
c
reat
eNodes
mkCorpusUser
ctype
=
do
c
onfigur
eNodes
mkCorpusUser
ctype
=
do
-- User Flow
(
userId
,
_rootId
,
userCorpusId
)
<-
getOrMkRootWithCorpus
mkCorpusUser
ctype
-- NodeTexts is first
...
...
src/REPL.hs
0 → 100644
View file @
cbae6ae4
{-# LANGUAGE AllowAmbiguousTypes #-}
module
REPL
where
import
Gargantext.Prelude
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
)
import
Gargantext.Core.NodeStory.Types
(
HasNodeStory
)
import
Gargantext.API.Dev
(
runCmdReplBackendErr
)
import
Gargantext.API.Errors.Types
(
BackendInternalError
)
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
qualified
Gargantext.Database.Query.Table.Node.Document.Add
as
Document
(
add
)
import
Gargantext.Database.Prelude
(
DbCmd
'
)
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
nodeId2ContextId
,
NodeType
(
..
))
import
Gargantext.Database.Action.Search
(
searchInCorpus
)
import
Gargantext.Database.Action.User
(
getUserId
)
import
Gargantext.Database.Query.Table.Node
(
insertDefaultNode
)
import
Gargantext.Database.Query.Facet.Types
(
facetDoc_id
)
import
qualified
Gargantext.Core.Text.Corpus.Query
as
Q
import
Gargantext.Core.Text.List
(
buildNgramsLists
)
import
Gargantext.Core.NLP
(
HasNLPServer
)
import
Gargantext.Database.Query.Tree.Error
(
HasTreeError
)
import
Gargantext.Core.Text.List.Group.WithStem
(
GroupParams
(
..
))
import
Gargantext.Database.Action.Flow
(
getOrMkRootWithCorpus
)
import
Gargantext.Database.Query.Tree.Root
(
MkCorpusUser
(
..
))
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataCorpus
)
execText
::
IO
(
Maybe
CorpusId
)
execText
=
runCmdReplBackendErr
$
testSubcorpusFunction
"user1"
133
"information"
False
testSubcorpusFunction
::
forall
env
m
.
(
DbCmd'
env
BackendInternalError
m
,
HasNodeStory
env
BackendInternalError
m
,
HasNLPServer
env
,
HasNLPServer
env
,
HasTreeError
BackendInternalError
,
HasSettings
env
)
=>
Text
-- ^ Username
->
CorpusId
-- ^ Parent corpus ID
->
Text
-- ^ The query, in text form
->
Bool
-- ^ Whether to reuse parent term list (True) or compute a new one
-- based only on the documents in the subcorpus (False)
->
m
(
Maybe
CorpusId
)
testSubcorpusFunction
username
parentId
queryText
reuseParentList
=
let
eitherQuery
=
Q
.
parseQuery
.
Q
.
RawQuery
$
queryText
in
case
eitherQuery
of
Left
_
->
return
Nothing
-- putStrLn ("Error parsing query " <> queryText) >> return Nothing -- TODO emit an actual error
Right
query
->
Just
<$>
makeSubcorpusFromQuery
(
UserName
username
)
parentId
query
reuseParentList
-- | (WIP) Given a "parent" corpus and a query, search for all docs in the parent
-- that match the query, and create a corpus from those. The created corpus
-- is inserted in the tree as a child of the parent corpus.
-- Creation of subcorpus "Docs" and "Terms" nodes is handled. The terms can be
-- either copied from the parent corpus or recomputed based on the subcorpus docs.
makeSubcorpusFromQuery
::
forall
env
err
m
.
(
DbCmd'
env
err
m
,
HasNodeError
err
,
HasNodeStory
env
err
m
,
HasNLPServer
env
,
HasTreeError
err
,
HasSettings
env
)
=>
User
-- ^ The corpus owner
->
CorpusId
-- ^ ID of the parent corpus
->
Q
.
Query
-- ^ The query to determine the subset of documents that will appear in the subcorpus
->
Bool
-- ^ Whether to reuse parent term list (True) or compute a new one
-- based only on the documents in the subcorpus (False)
->
m
CorpusId
-- ^ The child corpus ID
makeSubcorpusFromQuery
user
parentId
query
reuseParentList
=
do
userId
<-
getUserId
user
subcorpusId
<-
insertDefaultNode
NodeCorpus
parentId
userId
(
_
,
_
,
masterCorpusId
)
<-
getOrMkRootWithCorpus
MkCorpusUserMaster
(
Nothing
::
Maybe
HyperdataCorpus
)
-- Get ahold of all documents that match the query
facetDocs
<-
searchInCorpus
parentId
False
query
Nothing
Nothing
Nothing
-- Create subcorpus node with all the documents
_
<-
Document
.
add
subcorpusId
$
nodeId2ContextId
.
facetDoc_id
<$>
facetDocs
-- Create nodes for docs and terms as children of the subcorpus
void
$
insertDefaultNode
NodeTexts
subcorpusId
userId
void
$
insertDefaultNode
NodeList
subcorpusId
userId
-- Either simply copy parent terms... (TODO)
if
reuseParentList
then
return
()
-- ... or rebuild a term list from scratch
else
void
$
buildNgramsLists
user
subcorpusId
masterCorpusId
Nothing
GroupIdentity
return
subcorpusId
-- TODO
-- Permettre de (mettre les 2 en option) :
-- [X] 1a. relancer le buildNgramsLists (cf. module Flow)
-- [ ] 1b. ou copier la liste
-- [ ] 2. générer un graphe
-- Problèmes :
-- * Permission error quand j'essaie d'avoir l'ID du master corpus.
-- Je pense que getOrMkRootWithCorpus n'est pas la bonne fonction à utiliser
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment