Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
199
Issues
199
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
e7735135
Verified
Commit
e7735135
authored
Jun 20, 2025
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev' into 477-dev-flow-zip-file-upload-2
parents
25123b89
d362b468
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
56 additions
and
43 deletions
+56
-43
gargantext.cabal
gargantext.cabal
+0
-1
GraphQL.hs
src/Gargantext/API/GraphQL.hs
+0
-1
Context.hs
src/Gargantext/API/GraphQL/Context.hs
+5
-5
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+2
-2
Multi.hs
src/Gargantext/Core/Text/Terms/Multi.hs
+21
-2
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+16
-18
Extract.hs
src/Gargantext/Database/Action/Flow/Extract.hs
+6
-6
Types.hs
src/Gargantext/Database/Action/Flow/Types.hs
+4
-6
NodeContext.hs
src/Gargantext/Database/Query/Table/NodeContext.hs
+2
-2
No files found.
gargantext.cabal
View file @
e7735135
...
...
@@ -644,7 +644,6 @@ library
, transformers-base ^>= 0.4.6
, tree-diff
, tuple ^>= 0.3.0.2
, unbounded-delays >= 0.1.1 && < 0.2
, unicode-collation >= 0.1.3.5
, unordered-containers ^>= 0.2.16.0
-- needed for Worker / System.Posix.Signals
...
...
src/Gargantext/API/GraphQL.hs
View file @
e7735135
...
...
@@ -15,7 +15,6 @@ Portability : POSIX
{-# LANGUAGE KindSignatures #-}
-- for use of Endpoint (name :: Symbol)
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE PartialTypeSignatures #-}
-- to automatically use suggested type hole signatures during compilation
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module
Gargantext.API.GraphQL
where
...
...
src/Gargantext/API/GraphQL/Context.hs
View file @
e7735135
...
...
@@ -22,8 +22,7 @@ import Data.Morpheus.Types
,
ResolverM
,
QUERY
)
import
Data.Text
(
pack
,
unpack
)
import
Data.Text
qualified
as
Text
import
Data.Text
(
pack
)
import
Data.Time.Format.ISO8601
(
iso8601Show
)
import
Gargantext.API.Admin.Auth.Types
(
AuthenticatedUser
)
import
Gargantext.API.Auth.PolicyCheck
(
nodeWriteChecks
,
AccessPolicyManager
)
...
...
@@ -97,7 +96,7 @@ data ContextsForNgramsArgs
=
ContextsForNgramsArgs
{
corpus_id
::
Int
,
ngrams_terms
::
[
Text
]
,
and_logic
::
Text
,
and_logic
::
Bool
}
deriving
(
Generic
,
GQLType
)
data
NodeContextCategoryMArgs
=
NodeContextCategoryMArgs
...
...
@@ -153,9 +152,10 @@ dbNodeContext context_id node_id = do
-- | Returns list of `ContextGQL` for given ngrams in given corpus id.
dbContextForNgrams
::
(
IsDBEnvExtra
env
)
=>
Int
->
[
Text
]
->
Text
->
GqlM
e
env
[
ContextGQL
]
=>
Int
->
[
Text
]
->
Bool
->
GqlM
e
env
[
ContextGQL
]
dbContextForNgrams
node_id
ngrams_terms
and_logic
=
do
contextsForNgramsTerms
<-
lift
$
runDBQuery
$
getContextsForNgramsTerms
(
UnsafeMkNodeId
node_id
)
ngrams_terms
(
readMaybe
$
unpack
$
Text
.
toTitle
and_logic
)
contextsForNgramsTerms
<-
lift
$
runDBQuery
$
getContextsForNgramsTerms
(
UnsafeMkNodeId
node_id
)
ngrams_terms
and_logic
--lift $ printDebug "[dbContextForNgrams] contextsForNgramsTerms" contextsForNgramsTerms
pure
$
toContextGQL
<$>
contextsForNgramsTerms
...
...
src/Gargantext/Core/Text/Terms.hs
View file @
e7735135
...
...
@@ -116,11 +116,11 @@ data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams }
instance
Hashable
ExtractedNgrams
-- | A typeclass that represents extracting ngrams from an entity.
class
ExtractNgrams
h
where
class
Monad
m
=>
ExtractNgrams
m
h
where
extractNgrams
::
NLPServerConfig
->
TermType
Lang
->
h
->
DBCmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
->
m
(
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
------------------------------------------------------------------------
enrichedTerms
::
Lang
->
PosTagAlgo
->
POS
->
Terms
->
NgramsPostag
...
...
src/Gargantext/Core/Text/Terms/Multi.hs
View file @
e7735135
...
...
@@ -11,9 +11,19 @@ Multi-terms are ngrams where n > 1.
-}
module
Gargantext.Core.Text.Terms.Multi
(
multiterms
,
Terms
(
..
),
tokenTag2terms
,
multiterms_rake
,
tokenTagsWith
,
tokenTags
,
cleanTextForNLP
)
module
Gargantext.Core.Text.Terms.Multi
(
multiterms
,
Terms
(
..
)
,
MultitermsExtractionException
(
..
)
,
tokenTag2terms
,
multiterms_rake
,
tokenTagsWith
,
tokenTags
,
cleanTextForNLP
)
where
import
Control.Exception.Safe
qualified
as
Safe
import
Data.Attoparsec.Text
as
DAT
(
space
,
notChar
,
string
)
import
Gargantext.Core
(
Lang
(
..
),
NLPServerConfig
(
..
),
PosTagAlgo
(
..
))
import
Gargantext.Core.Text.Terms.Multi.Lang.En
qualified
as
En
...
...
@@ -25,14 +35,23 @@ import Gargantext.Core.Types ( POS(NP), Terms(Terms), TermsWithCount, TokenTag(T
import
Gargantext.Core.Utils
(
groupWithCounts
)
import
Gargantext.Prelude
import
Gargantext.Utils.SpacyNLP
qualified
as
SpacyNLP
import
Network.HTTP.Client
import
Replace.Attoparsec.Text
as
RAT
(
streamEdit
)
-------------------------------------------------------------------
type
NLP_API
=
Lang
->
Text
->
IO
PosSentences
data
MultitermsExtractionException
=
MEE_nlp_server_http_exception
!
NLPServerConfig
!
HttpException
deriving
Show
instance
Exception
MultitermsExtractionException
-------------------------------------------------------------------
-- | Extracts the terms from the input 'txt'. Throws a
-- 'MultitermExtractionException' in case we fail.
multiterms
::
NLPServerConfig
->
Lang
->
Text
->
IO
[
TermsWithCount
]
multiterms
nsc
l
txt
=
do
multiterms
nsc
l
txt
=
handle
(
\
ex
->
Safe
.
throwIO
$
MEE_nlp_server_http_exception
nsc
ex
)
$
do
let
txt'
=
cleanTextForNLP
txt
if
txt'
==
""
then
do
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
e7735135
...
...
@@ -54,11 +54,10 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
)
where
import
Async.Worker
qualified
as
W
import
Conduit
import
Control.Concurrent.Timeout
qualified
as
Timeout
import
Control.Exception.Safe
qualified
as
CES
import
Control.Lens
(
to
,
view
)
import
Control.Exception.Safe
(
catch
,
MonadCatch
)
import
Data.Conduit
qualified
as
C
import
Data.Conduit.Internal
(
zipSources
)
import
Data.Conduit.List
qualified
as
CL
...
...
@@ -118,6 +117,7 @@ import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..) )
------------------------------------------------------------------------
-- Imports for upgrade function
import
Gargantext.Database.Query.Tree.Error
(
HasTreeError
)
import
Gargantext.Core.Text.Terms.Multi
(
MultitermsExtractionException
)
------------------------------------------------------------------------
...
...
@@ -258,9 +258,11 @@ flowCorpus :: ( IsDBCmd env err m
,
HasTreeError
err
,
HasValidationError
err
,
FlowCorpus
a
,
ExtractNgrams
m
a
,
MonadJobStatus
m
,
CES
.
MonadCatch
m
,
HasCentralExchangeNotification
env
,
Show
a
)
,
MonadCatch
m
,
HasCentralExchangeNotification
env
)
=>
MkCorpusUser
->
TermType
Lang
->
Maybe
FlowSocialListWith
...
...
@@ -278,11 +280,11 @@ flow :: forall env err m a c.
,
HasTreeError
err
,
HasValidationError
err
,
FlowCorpus
a
,
ExtractNgrams
m
a
,
MkCorpus
c
,
MonadJobStatus
m
,
HasCentralExchangeNotification
env
,
CES
.
MonadCatch
m
,
Show
a
,
MonadCatch
m
)
=>
Maybe
c
->
MkCorpusUser
...
...
@@ -320,10 +322,10 @@ addDocumentsToHyperCorpus :: ( IsDBCmd env err m
,
HasNodeError
err
,
HasNLPServer
env
,
FlowCorpus
document
,
ExtractNgrams
m
document
,
MkCorpus
corpus
,
MonadLogger
m
,
CES
.
MonadCatch
m
,
Show
document
,
MonadCatch
m
)
=>
Maybe
corpus
->
TermType
Lang
...
...
@@ -475,7 +477,7 @@ data InsertDocError
extractNgramsFromDocument
::
(
UniqParameters
doc
,
HasText
doc
,
ExtractNgrams
doc
,
ExtractNgrams
m
doc
,
IsDBCmd
err
env
m
,
MonadLogger
m
,
CES
.
MonadCatch
m
...
...
@@ -491,13 +493,9 @@ extractNgramsFromDocument nlpServer lang doc =
-- will still be added to the corpus and we can try to regen the ngrams at a later stage.
UncommittedNgrams
.
Map
.
singleton
docId
<$>
(
documentIdWithNgrams
(
extractNgrams
nlpServer
$
withLang
lang
[
doc
])
(
Indexed
docId
doc
)
`
CES
.
catches
`
[
CES
.
Handler
$
\
(
e
::
Timeout
.
Timeout
)
->
CES
.
throw
e
,
CES
.
Handler
$
\
(
e
::
W
.
KillWorkerSafely
)
->
CES
.
throw
e
,
CES
.
Handler
$
\
(
e
::
CES
.
SomeException
)
->
do
$
(
logLocM
)
ERROR
$
T
.
pack
$
"Document with hash "
<>
show
docId
<>
" failed ngrams extraction due to an exception: "
<>
displayException
e
pure
$
DocumentIdWithNgrams
(
Indexed
docId
doc
)
mempty
]
`
catch
`
\
(
e
::
MultitermsExtractionException
)
->
do
$
(
logLocM
)
ERROR
$
T
.
pack
$
"Document with hash "
<>
show
docId
<>
" failed ngrams extraction due to an exception: "
<>
displayException
e
pure
$
DocumentIdWithNgrams
(
Indexed
docId
doc
)
mempty
)
where
docId
=
DocumentHashId
$
newUniqIdHash
doc
...
...
@@ -525,7 +523,7 @@ commitNgramsForDocument (UncommittedNgrams ng) (Indexed oldIx node) = do
extractNgramsFromDocuments
::
forall
doc
env
err
m
.
(
HasText
doc
,
UniqParameters
doc
,
ExtractNgrams
doc
,
ExtractNgrams
m
doc
,
IsDBCmd
env
err
m
,
MonadLogger
m
,
CES
.
MonadCatch
m
...
...
@@ -553,7 +551,7 @@ commitNgramsForDocuments ng nodes =
insertMasterDocs
::
(
HasNodeError
err
,
UniqParameters
doc
,
FlowCorpus
doc
,
MkCorpus
c
,
Show
do
c
,
MkCorpus
c
)
=>
GargConfig
->
UncommittedNgrams
doc
...
...
src/Gargantext/Database/Action/Flow/Extract.hs
View file @
e7735135
...
...
@@ -13,6 +13,7 @@ Portability : POSIX
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE UndecidableInstances #-}
module
Gargantext.Database.Action.Flow.Extract
...
...
@@ -30,7 +31,6 @@ import Gargantext.Core.Types (POS(NP), TermsCount, TermsWeight)
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
HyperdataContact
,
cw_lastName
,
hc_who
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
,
hd_authors
,
hd_bdd
,
hd_institutes
,
hd_source
)
import
Gargantext.Database.Admin.Types.Node
(
Node
)
import
Gargantext.Database.Prelude
(
DBCmd
)
import
Gargantext.Database.Query.Table.NgramsPostag
(
NgramsPostag
)
import
Gargantext.Database.Schema.Ngrams
(
text2ngrams
)
import
Gargantext.Database.Schema.Node
(
NodePoly
(
..
))
...
...
@@ -39,7 +39,7 @@ import Gargantext.Prelude
------------------------------------------------------------------------
instance
ExtractNgrams
HyperdataContact
where
instance
Monad
m
=>
ExtractNgrams
m
HyperdataContact
where
extractNgrams
_ncs
_l
=
pure
.
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
.
extract
where
extract
::
HyperdataContact
...
...
@@ -55,15 +55,15 @@ instance ExtractNgrams HyperdataContact where
-- | Main ngrams extraction functionality.
-- For NgramsTerms, this calls NLP server under the hood.
-- For Sources, Institutes, Authors, this uses simple split on " ".
instance
ExtractNgrams
HyperdataDocument
where
instance
(
Monad
m
,
MonadBase
IO
m
)
=>
ExtractNgrams
m
HyperdataDocument
where
extractNgrams
::
NLPServerConfig
->
TermType
Lang
->
HyperdataDocument
->
DBCmd
err
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
->
m
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
extractNgrams
ncs
lang
hd
=
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
<$>
extractNgramsT'
hd
where
extractNgramsT'
::
HyperdataDocument
->
DBCmd
err
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
->
m
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
extractNgramsT'
doc
=
do
let
source
=
text2ngrams
$
maybe
"Nothing"
identity
...
...
@@ -87,7 +87,7 @@ instance ExtractNgrams HyperdataDocument where
<>
[(
SimpleNgrams
a'
,
(
DM
.
singleton
Authors
1
,
1
))
|
a'
<-
authors
]
<>
[(
EnrichedNgrams
t'
,
(
DM
.
singleton
NgramsTerms
1
,
cnt'
))
|
(
t'
,
cnt'
)
<-
termsWithCounts'
]
instance
(
ExtractNgrams
a
,
HasText
a
)
=>
ExtractNgrams
(
Node
a
)
instance
(
ExtractNgrams
m
a
,
HasText
a
)
=>
ExtractNgrams
m
(
Node
a
)
where
extractNgrams
ncs
l
(
Node
{
_node_hyperdata
=
h
})
=
extractNgrams
ncs
l
h
...
...
src/Gargantext/Database/Action/Flow/Types.hs
View file @
e7735135
...
...
@@ -25,7 +25,6 @@ import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import
Gargantext.Core.Text
(
HasText
)
import
Gargantext.API.Admin.Orchestrator.Types
qualified
as
API
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms
(
ExtractNgrams
)
import
Gargantext.Core.Types
(
HasValidationError
,
TermsCount
,
TermsWeight
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
)
...
...
@@ -51,11 +50,10 @@ type FlowCmdM env err m =
)
type
FlowCorpus
a
=
(
UniqParameters
a
,
InsertDb
a
,
ExtractNgrams
a
,
HasText
a
,
ToNode
a
,
ToJSON
a
,
InsertDb
a
,
HasText
a
,
ToNode
a
,
ToJSON
a
)
type
FlowInsertDB
a
=
(
AddUniqId
a
...
...
src/Gargantext/Database/Query/Table/NodeContext.hs
View file @
e7735135
...
...
@@ -151,9 +151,9 @@ data ContextForNgramsTerms =
getContextsForNgramsTerms
::
HasNodeError
err
=>
NodeId
->
[
Text
]
->
Maybe
Bool
->
Bool
->
DBQuery
err
x
[
ContextForNgramsTerms
]
getContextsForNgramsTerms
cId
ngramsTerms
(
Just
True
)
=
do
getContextsForNgramsTerms
cId
ngramsTerms
True
=
do
let
terms_length
=
length
ngramsTerms
res
<-
mkPGQuery
query
(
cId
,
PGS
.
In
ngramsTerms
,
terms_length
)
pure
$
(
\
(
_cfnt_nodeId
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment