Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
191
Issues
191
List
Board
Labels
Milestones
Merge Requests
8
Merge Requests
8
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
4eee890f
Commit
4eee890f
authored
Jun 16, 2025
by
Alfredo Di Napoli
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ExtractNgrams is now polymorphic over a monad 'm'
parent
d19839d8
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
43 additions
and
22 deletions
+43
-22
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+2
-2
Multi.hs
src/Gargantext/Core/Text/Terms/Multi.hs
+21
-2
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+10
-6
Extract.hs
src/Gargantext/Database/Action/Flow/Extract.hs
+6
-6
Types.hs
src/Gargantext/Database/Action/Flow/Types.hs
+4
-6
No files found.
src/Gargantext/Core/Text/Terms.hs
View file @
4eee890f
...
@@ -116,11 +116,11 @@ data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams }
...
@@ -116,11 +116,11 @@ data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams }
instance
Hashable
ExtractedNgrams
instance
Hashable
ExtractedNgrams
-- | A typeclass that represents extracting ngrams from an entity.
-- | A typeclass that represents extracting ngrams from an entity.
class
ExtractNgrams
h
where
class
Monad
m
=>
ExtractNgrams
m
h
where
extractNgrams
::
NLPServerConfig
extractNgrams
::
NLPServerConfig
->
TermType
Lang
->
TermType
Lang
->
h
->
h
->
DBCmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
->
m
(
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
------------------------------------------------------------------------
------------------------------------------------------------------------
enrichedTerms
::
Lang
->
PosTagAlgo
->
POS
->
Terms
->
NgramsPostag
enrichedTerms
::
Lang
->
PosTagAlgo
->
POS
->
Terms
->
NgramsPostag
...
...
src/Gargantext/Core/Text/Terms/Multi.hs
View file @
4eee890f
...
@@ -11,9 +11,19 @@ Multi-terms are ngrams where n > 1.
...
@@ -11,9 +11,19 @@ Multi-terms are ngrams where n > 1.
-}
-}
module
Gargantext.Core.Text.Terms.Multi
(
multiterms
,
Terms
(
..
),
tokenTag2terms
,
multiterms_rake
,
tokenTagsWith
,
tokenTags
,
cleanTextForNLP
)
module
Gargantext.Core.Text.Terms.Multi
(
multiterms
,
Terms
(
..
)
,
MultitermsExtractionException
(
..
)
,
tokenTag2terms
,
multiterms_rake
,
tokenTagsWith
,
tokenTags
,
cleanTextForNLP
)
where
where
import
Control.Exception.Safe
qualified
as
Safe
import
Data.Attoparsec.Text
as
DAT
(
space
,
notChar
,
string
)
import
Data.Attoparsec.Text
as
DAT
(
space
,
notChar
,
string
)
import
Gargantext.Core
(
Lang
(
..
),
NLPServerConfig
(
..
),
PosTagAlgo
(
..
))
import
Gargantext.Core
(
Lang
(
..
),
NLPServerConfig
(
..
),
PosTagAlgo
(
..
))
import
Gargantext.Core.Text.Terms.Multi.Lang.En
qualified
as
En
import
Gargantext.Core.Text.Terms.Multi.Lang.En
qualified
as
En
...
@@ -25,14 +35,23 @@ import Gargantext.Core.Types ( POS(NP), Terms(Terms), TermsWithCount, TokenTag(T
...
@@ -25,14 +35,23 @@ import Gargantext.Core.Types ( POS(NP), Terms(Terms), TermsWithCount, TokenTag(T
import
Gargantext.Core.Utils
(
groupWithCounts
)
import
Gargantext.Core.Utils
(
groupWithCounts
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Utils.SpacyNLP
qualified
as
SpacyNLP
import
Gargantext.Utils.SpacyNLP
qualified
as
SpacyNLP
import
Network.HTTP.Client
import
Replace.Attoparsec.Text
as
RAT
(
streamEdit
)
import
Replace.Attoparsec.Text
as
RAT
(
streamEdit
)
-------------------------------------------------------------------
-------------------------------------------------------------------
type
NLP_API
=
Lang
->
Text
->
IO
PosSentences
type
NLP_API
=
Lang
->
Text
->
IO
PosSentences
data
MultitermsExtractionException
=
MEE_nlp_server_http_exception
!
NLPServerConfig
!
HttpException
deriving
Show
instance
Exception
MultitermsExtractionException
-------------------------------------------------------------------
-------------------------------------------------------------------
-- | Extracts the terms from the input 'txt'. Throws a
-- 'MultitermExtractionException' in case we fail.
multiterms
::
NLPServerConfig
->
Lang
->
Text
->
IO
[
TermsWithCount
]
multiterms
::
NLPServerConfig
->
Lang
->
Text
->
IO
[
TermsWithCount
]
multiterms
nsc
l
txt
=
do
multiterms
nsc
l
txt
=
handle
(
\
ex
->
Safe
.
throwIO
$
MEE_nlp_server_http_exception
nsc
ex
)
$
do
let
txt'
=
cleanTextForNLP
txt
let
txt'
=
cleanTextForNLP
txt
if
txt'
==
""
if
txt'
==
""
then
do
then
do
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
4eee890f
...
@@ -256,9 +256,11 @@ flowCorpus :: ( IsDBCmd env err m
...
@@ -256,9 +256,11 @@ flowCorpus :: ( IsDBCmd env err m
,
HasTreeError
err
,
HasTreeError
err
,
HasValidationError
err
,
HasValidationError
err
,
FlowCorpus
a
,
FlowCorpus
a
,
ExtractNgrams
m
a
,
MonadJobStatus
m
,
MonadJobStatus
m
,
MonadCatch
m
,
MonadCatch
m
,
HasCentralExchangeNotification
env
,
Show
a
)
,
HasCentralExchangeNotification
env
)
=>
MkCorpusUser
=>
MkCorpusUser
->
TermType
Lang
->
TermType
Lang
->
Maybe
FlowSocialListWith
->
Maybe
FlowSocialListWith
...
@@ -276,10 +278,11 @@ flow :: forall env err m a c.
...
@@ -276,10 +278,11 @@ flow :: forall env err m a c.
,
HasTreeError
err
,
HasTreeError
err
,
HasValidationError
err
,
HasValidationError
err
,
FlowCorpus
a
,
FlowCorpus
a
,
ExtractNgrams
m
a
,
MkCorpus
c
,
MkCorpus
c
,
MonadJobStatus
m
,
MonadJobStatus
m
,
HasCentralExchangeNotification
env
,
HasCentralExchangeNotification
env
,
MonadCatch
m
,
Show
a
,
MonadCatch
m
)
)
=>
Maybe
c
=>
Maybe
c
->
MkCorpusUser
->
MkCorpusUser
...
@@ -317,9 +320,10 @@ addDocumentsToHyperCorpus :: ( IsDBCmd env err m
...
@@ -317,9 +320,10 @@ addDocumentsToHyperCorpus :: ( IsDBCmd env err m
,
HasNodeError
err
,
HasNodeError
err
,
HasNLPServer
env
,
HasNLPServer
env
,
FlowCorpus
document
,
FlowCorpus
document
,
ExtractNgrams
m
document
,
MkCorpus
corpus
,
MkCorpus
corpus
,
MonadLogger
m
,
MonadLogger
m
,
MonadCatch
m
,
Show
document
,
MonadCatch
m
)
)
=>
Maybe
corpus
=>
Maybe
corpus
->
TermType
Lang
->
TermType
Lang
...
@@ -471,7 +475,7 @@ data InsertDocError
...
@@ -471,7 +475,7 @@ data InsertDocError
extractNgramsFromDocument
::
(
UniqParameters
doc
extractNgramsFromDocument
::
(
UniqParameters
doc
,
HasText
doc
,
HasText
doc
,
ExtractNgrams
doc
,
ExtractNgrams
m
doc
,
IsDBCmd
err
env
m
,
IsDBCmd
err
env
m
,
MonadLogger
m
,
MonadLogger
m
,
MonadCatch
m
,
MonadCatch
m
...
@@ -517,7 +521,7 @@ commitNgramsForDocument (UncommittedNgrams ng) (Indexed oldIx node) = do
...
@@ -517,7 +521,7 @@ commitNgramsForDocument (UncommittedNgrams ng) (Indexed oldIx node) = do
extractNgramsFromDocuments
::
forall
doc
env
err
m
.
extractNgramsFromDocuments
::
forall
doc
env
err
m
.
(
HasText
doc
(
HasText
doc
,
UniqParameters
doc
,
UniqParameters
doc
,
ExtractNgrams
doc
,
ExtractNgrams
m
doc
,
IsDBCmd
env
err
m
,
IsDBCmd
env
err
m
,
MonadLogger
m
,
MonadLogger
m
,
MonadCatch
m
,
MonadCatch
m
...
@@ -545,7 +549,7 @@ commitNgramsForDocuments ng nodes =
...
@@ -545,7 +549,7 @@ commitNgramsForDocuments ng nodes =
insertMasterDocs
::
(
HasNodeError
err
insertMasterDocs
::
(
HasNodeError
err
,
UniqParameters
doc
,
UniqParameters
doc
,
FlowCorpus
doc
,
FlowCorpus
doc
,
MkCorpus
c
,
Show
do
c
,
MkCorpus
c
)
)
=>
GargConfig
=>
GargConfig
->
UncommittedNgrams
doc
->
UncommittedNgrams
doc
...
...
src/Gargantext/Database/Action/Flow/Extract.hs
View file @
4eee890f
...
@@ -13,6 +13,7 @@ Portability : POSIX
...
@@ -13,6 +13,7 @@ Portability : POSIX
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE UndecidableInstances #-}
module
Gargantext.Database.Action.Flow.Extract
module
Gargantext.Database.Action.Flow.Extract
...
@@ -30,7 +31,6 @@ import Gargantext.Core.Types (POS(NP), TermsCount, TermsWeight)
...
@@ -30,7 +31,6 @@ import Gargantext.Core.Types (POS(NP), TermsCount, TermsWeight)
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
HyperdataContact
,
cw_lastName
,
hc_who
)
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
HyperdataContact
,
cw_lastName
,
hc_who
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
,
hd_authors
,
hd_bdd
,
hd_institutes
,
hd_source
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
,
hd_authors
,
hd_bdd
,
hd_institutes
,
hd_source
)
import
Gargantext.Database.Admin.Types.Node
(
Node
)
import
Gargantext.Database.Admin.Types.Node
(
Node
)
import
Gargantext.Database.Prelude
(
DBCmd
)
import
Gargantext.Database.Query.Table.NgramsPostag
(
NgramsPostag
)
import
Gargantext.Database.Query.Table.NgramsPostag
(
NgramsPostag
)
import
Gargantext.Database.Schema.Ngrams
(
text2ngrams
)
import
Gargantext.Database.Schema.Ngrams
(
text2ngrams
)
import
Gargantext.Database.Schema.Node
(
NodePoly
(
..
))
import
Gargantext.Database.Schema.Node
(
NodePoly
(
..
))
...
@@ -39,7 +39,7 @@ import Gargantext.Prelude
...
@@ -39,7 +39,7 @@ import Gargantext.Prelude
------------------------------------------------------------------------
------------------------------------------------------------------------
instance
ExtractNgrams
HyperdataContact
where
instance
Monad
m
=>
ExtractNgrams
m
HyperdataContact
where
extractNgrams
_ncs
_l
=
pure
.
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
.
extract
extractNgrams
_ncs
_l
=
pure
.
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
.
extract
where
where
extract
::
HyperdataContact
extract
::
HyperdataContact
...
@@ -55,15 +55,15 @@ instance ExtractNgrams HyperdataContact where
...
@@ -55,15 +55,15 @@ instance ExtractNgrams HyperdataContact where
-- | Main ngrams extraction functionality.
-- | Main ngrams extraction functionality.
-- For NgramsTerms, this calls NLP server under the hood.
-- For NgramsTerms, this calls NLP server under the hood.
-- For Sources, Institutes, Authors, this uses simple split on " ".
-- For Sources, Institutes, Authors, this uses simple split on " ".
instance
ExtractNgrams
HyperdataDocument
where
instance
(
Monad
m
,
MonadBase
IO
m
)
=>
ExtractNgrams
m
HyperdataDocument
where
extractNgrams
::
NLPServerConfig
extractNgrams
::
NLPServerConfig
->
TermType
Lang
->
TermType
Lang
->
HyperdataDocument
->
HyperdataDocument
->
DBCmd
err
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
->
m
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
extractNgrams
ncs
lang
hd
=
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
<$>
extractNgramsT'
hd
extractNgrams
ncs
lang
hd
=
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
<$>
extractNgramsT'
hd
where
where
extractNgramsT'
::
HyperdataDocument
extractNgramsT'
::
HyperdataDocument
->
DBCmd
err
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
->
m
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
extractNgramsT'
doc
=
do
extractNgramsT'
doc
=
do
let
source
=
text2ngrams
let
source
=
text2ngrams
$
maybe
"Nothing"
identity
$
maybe
"Nothing"
identity
...
@@ -87,7 +87,7 @@ instance ExtractNgrams HyperdataDocument where
...
@@ -87,7 +87,7 @@ instance ExtractNgrams HyperdataDocument where
<>
[(
SimpleNgrams
a'
,
(
DM
.
singleton
Authors
1
,
1
))
|
a'
<-
authors
]
<>
[(
SimpleNgrams
a'
,
(
DM
.
singleton
Authors
1
,
1
))
|
a'
<-
authors
]
<>
[(
EnrichedNgrams
t'
,
(
DM
.
singleton
NgramsTerms
1
,
cnt'
))
|
(
t'
,
cnt'
)
<-
termsWithCounts'
]
<>
[(
EnrichedNgrams
t'
,
(
DM
.
singleton
NgramsTerms
1
,
cnt'
))
|
(
t'
,
cnt'
)
<-
termsWithCounts'
]
instance
(
ExtractNgrams
a
,
HasText
a
)
=>
ExtractNgrams
(
Node
a
)
instance
(
ExtractNgrams
m
a
,
HasText
a
)
=>
ExtractNgrams
m
(
Node
a
)
where
where
extractNgrams
ncs
l
(
Node
{
_node_hyperdata
=
h
})
=
extractNgrams
ncs
l
h
extractNgrams
ncs
l
(
Node
{
_node_hyperdata
=
h
})
=
extractNgrams
ncs
l
h
...
...
src/Gargantext/Database/Action/Flow/Types.hs
View file @
4eee890f
...
@@ -25,7 +25,6 @@ import Gargantext.Core.NodeStory.Types ( HasNodeStory )
...
@@ -25,7 +25,6 @@ import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import
Gargantext.Core.Text
(
HasText
)
import
Gargantext.Core.Text
(
HasText
)
import
Gargantext.API.Admin.Orchestrator.Types
qualified
as
API
import
Gargantext.API.Admin.Orchestrator.Types
qualified
as
API
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms
(
ExtractNgrams
)
import
Gargantext.Core.Types
(
HasValidationError
,
TermsCount
,
TermsWeight
)
import
Gargantext.Core.Types
(
HasValidationError
,
TermsCount
,
TermsWeight
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
)
...
@@ -51,11 +50,10 @@ type FlowCmdM env err m =
...
@@ -51,11 +50,10 @@ type FlowCmdM env err m =
)
)
type
FlowCorpus
a
=
(
UniqParameters
a
type
FlowCorpus
a
=
(
UniqParameters
a
,
InsertDb
a
,
InsertDb
a
,
ExtractNgrams
a
,
HasText
a
,
HasText
a
,
ToNode
a
,
ToNode
a
,
ToJSON
a
,
ToJSON
a
)
)
type
FlowInsertDB
a
=
(
AddUniqId
a
type
FlowInsertDB
a
=
(
AddUniqId
a
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment