Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
191
Issues
191
List
Board
Labels
Milestones
Merge Requests
8
Merge Requests
8
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
e7735135
Verified
Commit
e7735135
authored
Jun 20, 2025
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev' into 477-dev-flow-zip-file-upload-2
parents
25123b89
d362b468
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
56 additions
and
43 deletions
+56
-43
gargantext.cabal
gargantext.cabal
+0
-1
GraphQL.hs
src/Gargantext/API/GraphQL.hs
+0
-1
Context.hs
src/Gargantext/API/GraphQL/Context.hs
+5
-5
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+2
-2
Multi.hs
src/Gargantext/Core/Text/Terms/Multi.hs
+21
-2
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+16
-18
Extract.hs
src/Gargantext/Database/Action/Flow/Extract.hs
+6
-6
Types.hs
src/Gargantext/Database/Action/Flow/Types.hs
+4
-6
NodeContext.hs
src/Gargantext/Database/Query/Table/NodeContext.hs
+2
-2
No files found.
gargantext.cabal
View file @
e7735135
...
...
@@ -644,7 +644,6 @@ library
, transformers-base ^>= 0.4.6
, tree-diff
, tuple ^>= 0.3.0.2
, unbounded-delays >= 0.1.1 && < 0.2
, unicode-collation >= 0.1.3.5
, unordered-containers ^>= 0.2.16.0
-- needed for Worker / System.Posix.Signals
...
...
src/Gargantext/API/GraphQL.hs
View file @
e7735135
...
...
@@ -15,7 +15,6 @@ Portability : POSIX
{-# LANGUAGE KindSignatures #-}
-- for use of Endpoint (name :: Symbol)
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE PartialTypeSignatures #-}
-- to automatically use suggested type hole signatures during compilation
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module
Gargantext.API.GraphQL
where
...
...
src/Gargantext/API/GraphQL/Context.hs
View file @
e7735135
...
...
@@ -22,8 +22,7 @@ import Data.Morpheus.Types
,
ResolverM
,
QUERY
)
import
Data.Text
(
pack
,
unpack
)
import
Data.Text
qualified
as
Text
import
Data.Text
(
pack
)
import
Data.Time.Format.ISO8601
(
iso8601Show
)
import
Gargantext.API.Admin.Auth.Types
(
AuthenticatedUser
)
import
Gargantext.API.Auth.PolicyCheck
(
nodeWriteChecks
,
AccessPolicyManager
)
...
...
@@ -97,7 +96,7 @@ data ContextsForNgramsArgs
=
ContextsForNgramsArgs
{
corpus_id
::
Int
,
ngrams_terms
::
[
Text
]
,
and_logic
::
Text
,
and_logic
::
Bool
}
deriving
(
Generic
,
GQLType
)
data
NodeContextCategoryMArgs
=
NodeContextCategoryMArgs
...
...
@@ -153,9 +152,10 @@ dbNodeContext context_id node_id = do
-- | Returns list of `ContextGQL` for given ngrams in given corpus id.
dbContextForNgrams
::
(
IsDBEnvExtra
env
)
=>
Int
->
[
Text
]
->
Text
->
GqlM
e
env
[
ContextGQL
]
=>
Int
->
[
Text
]
->
Bool
->
GqlM
e
env
[
ContextGQL
]
dbContextForNgrams
node_id
ngrams_terms
and_logic
=
do
contextsForNgramsTerms
<-
lift
$
runDBQuery
$
getContextsForNgramsTerms
(
UnsafeMkNodeId
node_id
)
ngrams_terms
(
readMaybe
$
unpack
$
Text
.
toTitle
and_logic
)
contextsForNgramsTerms
<-
lift
$
runDBQuery
$
getContextsForNgramsTerms
(
UnsafeMkNodeId
node_id
)
ngrams_terms
and_logic
--lift $ printDebug "[dbContextForNgrams] contextsForNgramsTerms" contextsForNgramsTerms
pure
$
toContextGQL
<$>
contextsForNgramsTerms
...
...
src/Gargantext/Core/Text/Terms.hs
View file @
e7735135
...
...
@@ -116,11 +116,11 @@ data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams }
instance
Hashable
ExtractedNgrams
-- | A typeclass that represents extracting ngrams from an entity.
class
ExtractNgrams
h
where
class
Monad
m
=>
ExtractNgrams
m
h
where
extractNgrams
::
NLPServerConfig
->
TermType
Lang
->
h
->
DBCmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
->
m
(
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
------------------------------------------------------------------------
enrichedTerms
::
Lang
->
PosTagAlgo
->
POS
->
Terms
->
NgramsPostag
...
...
src/Gargantext/Core/Text/Terms/Multi.hs
View file @
e7735135
...
...
@@ -11,9 +11,19 @@ Multi-terms are ngrams where n > 1.
-}
module
Gargantext.Core.Text.Terms.Multi
(
multiterms
,
Terms
(
..
),
tokenTag2terms
,
multiterms_rake
,
tokenTagsWith
,
tokenTags
,
cleanTextForNLP
)
module
Gargantext.Core.Text.Terms.Multi
(
multiterms
,
Terms
(
..
)
,
MultitermsExtractionException
(
..
)
,
tokenTag2terms
,
multiterms_rake
,
tokenTagsWith
,
tokenTags
,
cleanTextForNLP
)
where
import
Control.Exception.Safe
qualified
as
Safe
import
Data.Attoparsec.Text
as
DAT
(
space
,
notChar
,
string
)
import
Gargantext.Core
(
Lang
(
..
),
NLPServerConfig
(
..
),
PosTagAlgo
(
..
))
import
Gargantext.Core.Text.Terms.Multi.Lang.En
qualified
as
En
...
...
@@ -25,14 +35,23 @@ import Gargantext.Core.Types ( POS(NP), Terms(Terms), TermsWithCount, TokenTag(T
import
Gargantext.Core.Utils
(
groupWithCounts
)
import
Gargantext.Prelude
import
Gargantext.Utils.SpacyNLP
qualified
as
SpacyNLP
import
Network.HTTP.Client
import
Replace.Attoparsec.Text
as
RAT
(
streamEdit
)
-------------------------------------------------------------------
type
NLP_API
=
Lang
->
Text
->
IO
PosSentences
data
MultitermsExtractionException
=
MEE_nlp_server_http_exception
!
NLPServerConfig
!
HttpException
deriving
Show
instance
Exception
MultitermsExtractionException
-------------------------------------------------------------------
-- | Extracts the terms from the input 'txt'. Throws a
-- 'MultitermExtractionException' in case we fail.
multiterms
::
NLPServerConfig
->
Lang
->
Text
->
IO
[
TermsWithCount
]
multiterms
nsc
l
txt
=
do
multiterms
nsc
l
txt
=
handle
(
\
ex
->
Safe
.
throwIO
$
MEE_nlp_server_http_exception
nsc
ex
)
$
do
let
txt'
=
cleanTextForNLP
txt
if
txt'
==
""
then
do
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
e7735135
...
...
@@ -54,11 +54,10 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
)
where
import
Async.Worker
qualified
as
W
import
Conduit
import
Control.Concurrent.Timeout
qualified
as
Timeout
import
Control.Exception.Safe
qualified
as
CES
import
Control.Lens
(
to
,
view
)
import
Control.Exception.Safe
(
catch
,
MonadCatch
)
import
Data.Conduit
qualified
as
C
import
Data.Conduit.Internal
(
zipSources
)
import
Data.Conduit.List
qualified
as
CL
...
...
@@ -118,6 +117,7 @@ import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..) )
------------------------------------------------------------------------
-- Imports for upgrade function
import
Gargantext.Database.Query.Tree.Error
(
HasTreeError
)
import
Gargantext.Core.Text.Terms.Multi
(
MultitermsExtractionException
)
------------------------------------------------------------------------
...
...
@@ -258,9 +258,11 @@ flowCorpus :: ( IsDBCmd env err m
,
HasTreeError
err
,
HasValidationError
err
,
FlowCorpus
a
,
ExtractNgrams
m
a
,
MonadJobStatus
m
,
CES
.
MonadCatch
m
,
HasCentralExchangeNotification
env
,
Show
a
)
,
MonadCatch
m
,
HasCentralExchangeNotification
env
)
=>
MkCorpusUser
->
TermType
Lang
->
Maybe
FlowSocialListWith
...
...
@@ -278,11 +280,11 @@ flow :: forall env err m a c.
,
HasTreeError
err
,
HasValidationError
err
,
FlowCorpus
a
,
ExtractNgrams
m
a
,
MkCorpus
c
,
MonadJobStatus
m
,
HasCentralExchangeNotification
env
,
CES
.
MonadCatch
m
,
Show
a
,
MonadCatch
m
)
=>
Maybe
c
->
MkCorpusUser
...
...
@@ -320,10 +322,10 @@ addDocumentsToHyperCorpus :: ( IsDBCmd env err m
,
HasNodeError
err
,
HasNLPServer
env
,
FlowCorpus
document
,
ExtractNgrams
m
document
,
MkCorpus
corpus
,
MonadLogger
m
,
CES
.
MonadCatch
m
,
Show
document
,
MonadCatch
m
)
=>
Maybe
corpus
->
TermType
Lang
...
...
@@ -475,7 +477,7 @@ data InsertDocError
extractNgramsFromDocument
::
(
UniqParameters
doc
,
HasText
doc
,
ExtractNgrams
doc
,
ExtractNgrams
m
doc
,
IsDBCmd
err
env
m
,
MonadLogger
m
,
CES
.
MonadCatch
m
...
...
@@ -491,13 +493,9 @@ extractNgramsFromDocument nlpServer lang doc =
-- will still be added to the corpus and we can try to regen the ngrams at a later stage.
UncommittedNgrams
.
Map
.
singleton
docId
<$>
(
documentIdWithNgrams
(
extractNgrams
nlpServer
$
withLang
lang
[
doc
])
(
Indexed
docId
doc
)
`
CES
.
catches
`
[
CES
.
Handler
$
\
(
e
::
Timeout
.
Timeout
)
->
CES
.
throw
e
,
CES
.
Handler
$
\
(
e
::
W
.
KillWorkerSafely
)
->
CES
.
throw
e
,
CES
.
Handler
$
\
(
e
::
CES
.
SomeException
)
->
do
$
(
logLocM
)
ERROR
$
T
.
pack
$
"Document with hash "
<>
show
docId
<>
" failed ngrams extraction due to an exception: "
<>
displayException
e
pure
$
DocumentIdWithNgrams
(
Indexed
docId
doc
)
mempty
]
`
catch
`
\
(
e
::
MultitermsExtractionException
)
->
do
$
(
logLocM
)
ERROR
$
T
.
pack
$
"Document with hash "
<>
show
docId
<>
" failed ngrams extraction due to an exception: "
<>
displayException
e
pure
$
DocumentIdWithNgrams
(
Indexed
docId
doc
)
mempty
)
where
docId
=
DocumentHashId
$
newUniqIdHash
doc
...
...
@@ -525,7 +523,7 @@ commitNgramsForDocument (UncommittedNgrams ng) (Indexed oldIx node) = do
extractNgramsFromDocuments
::
forall
doc
env
err
m
.
(
HasText
doc
,
UniqParameters
doc
,
ExtractNgrams
doc
,
ExtractNgrams
m
doc
,
IsDBCmd
env
err
m
,
MonadLogger
m
,
CES
.
MonadCatch
m
...
...
@@ -553,7 +551,7 @@ commitNgramsForDocuments ng nodes =
insertMasterDocs
::
(
HasNodeError
err
,
UniqParameters
doc
,
FlowCorpus
doc
,
MkCorpus
c
,
Show
do
c
,
MkCorpus
c
)
=>
GargConfig
->
UncommittedNgrams
doc
...
...
src/Gargantext/Database/Action/Flow/Extract.hs
View file @
e7735135
...
...
@@ -13,6 +13,7 @@ Portability : POSIX
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE UndecidableInstances #-}
module
Gargantext.Database.Action.Flow.Extract
...
...
@@ -30,7 +31,6 @@ import Gargantext.Core.Types (POS(NP), TermsCount, TermsWeight)
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
HyperdataContact
,
cw_lastName
,
hc_who
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
,
hd_authors
,
hd_bdd
,
hd_institutes
,
hd_source
)
import
Gargantext.Database.Admin.Types.Node
(
Node
)
import
Gargantext.Database.Prelude
(
DBCmd
)
import
Gargantext.Database.Query.Table.NgramsPostag
(
NgramsPostag
)
import
Gargantext.Database.Schema.Ngrams
(
text2ngrams
)
import
Gargantext.Database.Schema.Node
(
NodePoly
(
..
))
...
...
@@ -39,7 +39,7 @@ import Gargantext.Prelude
------------------------------------------------------------------------
instance
ExtractNgrams
HyperdataContact
where
instance
Monad
m
=>
ExtractNgrams
m
HyperdataContact
where
extractNgrams
_ncs
_l
=
pure
.
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
.
extract
where
extract
::
HyperdataContact
...
...
@@ -55,15 +55,15 @@ instance ExtractNgrams HyperdataContact where
-- | Main ngrams extraction functionality.
-- For NgramsTerms, this calls NLP server under the hood.
-- For Sources, Institutes, Authors, this uses simple split on " ".
instance
ExtractNgrams
HyperdataDocument
where
instance
(
Monad
m
,
MonadBase
IO
m
)
=>
ExtractNgrams
m
HyperdataDocument
where
extractNgrams
::
NLPServerConfig
->
TermType
Lang
->
HyperdataDocument
->
DBCmd
err
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
->
m
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
extractNgrams
ncs
lang
hd
=
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
<$>
extractNgramsT'
hd
where
extractNgramsT'
::
HyperdataDocument
->
DBCmd
err
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
->
m
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
extractNgramsT'
doc
=
do
let
source
=
text2ngrams
$
maybe
"Nothing"
identity
...
...
@@ -87,7 +87,7 @@ instance ExtractNgrams HyperdataDocument where
<>
[(
SimpleNgrams
a'
,
(
DM
.
singleton
Authors
1
,
1
))
|
a'
<-
authors
]
<>
[(
EnrichedNgrams
t'
,
(
DM
.
singleton
NgramsTerms
1
,
cnt'
))
|
(
t'
,
cnt'
)
<-
termsWithCounts'
]
instance
(
ExtractNgrams
a
,
HasText
a
)
=>
ExtractNgrams
(
Node
a
)
instance
(
ExtractNgrams
m
a
,
HasText
a
)
=>
ExtractNgrams
m
(
Node
a
)
where
extractNgrams
ncs
l
(
Node
{
_node_hyperdata
=
h
})
=
extractNgrams
ncs
l
h
...
...
src/Gargantext/Database/Action/Flow/Types.hs
View file @
e7735135
...
...
@@ -25,7 +25,6 @@ import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import
Gargantext.Core.Text
(
HasText
)
import
Gargantext.API.Admin.Orchestrator.Types
qualified
as
API
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms
(
ExtractNgrams
)
import
Gargantext.Core.Types
(
HasValidationError
,
TermsCount
,
TermsWeight
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
)
...
...
@@ -51,11 +50,10 @@ type FlowCmdM env err m =
)
type
FlowCorpus
a
=
(
UniqParameters
a
,
InsertDb
a
,
ExtractNgrams
a
,
HasText
a
,
ToNode
a
,
ToJSON
a
,
InsertDb
a
,
HasText
a
,
ToNode
a
,
ToJSON
a
)
type
FlowInsertDB
a
=
(
AddUniqId
a
...
...
src/Gargantext/Database/Query/Table/NodeContext.hs
View file @
e7735135
...
...
@@ -151,9 +151,9 @@ data ContextForNgramsTerms =
getContextsForNgramsTerms
::
HasNodeError
err
=>
NodeId
->
[
Text
]
->
Maybe
Bool
->
Bool
->
DBQuery
err
x
[
ContextForNgramsTerms
]
getContextsForNgramsTerms
cId
ngramsTerms
(
Just
True
)
=
do
getContextsForNgramsTerms
cId
ngramsTerms
True
=
do
let
terms_length
=
length
ngramsTerms
res
<-
mkPGQuery
query
(
cId
,
PGS
.
In
ngramsTerms
,
terms_length
)
pure
$
(
\
(
_cfnt_nodeId
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment