Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
199
Issues
199
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
4eee890f
Commit
4eee890f
authored
Jun 16, 2025
by
Alfredo Di Napoli
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ExtractNgrams is now polymorphic over a monad 'm'
parent
d19839d8
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
43 additions
and
22 deletions
+43
-22
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+2
-2
Multi.hs
src/Gargantext/Core/Text/Terms/Multi.hs
+21
-2
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+10
-6
Extract.hs
src/Gargantext/Database/Action/Flow/Extract.hs
+6
-6
Types.hs
src/Gargantext/Database/Action/Flow/Types.hs
+4
-6
No files found.
src/Gargantext/Core/Text/Terms.hs
View file @
4eee890f
...
@@ -116,11 +116,11 @@ data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams }
...
@@ -116,11 +116,11 @@ data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams }
instance
Hashable
ExtractedNgrams
instance
Hashable
ExtractedNgrams
-- | A typeclass that represents extracting ngrams from an entity.
-- | A typeclass that represents extracting ngrams from an entity.
class
ExtractNgrams
h
where
class
Monad
m
=>
ExtractNgrams
m
h
where
extractNgrams
::
NLPServerConfig
extractNgrams
::
NLPServerConfig
->
TermType
Lang
->
TermType
Lang
->
h
->
h
->
DBCmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
->
m
(
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
------------------------------------------------------------------------
------------------------------------------------------------------------
enrichedTerms
::
Lang
->
PosTagAlgo
->
POS
->
Terms
->
NgramsPostag
enrichedTerms
::
Lang
->
PosTagAlgo
->
POS
->
Terms
->
NgramsPostag
...
...
src/Gargantext/Core/Text/Terms/Multi.hs
View file @
4eee890f
...
@@ -11,9 +11,19 @@ Multi-terms are ngrams where n > 1.
...
@@ -11,9 +11,19 @@ Multi-terms are ngrams where n > 1.
-}
-}
module
Gargantext.Core.Text.Terms.Multi
(
multiterms
,
Terms
(
..
),
tokenTag2terms
,
multiterms_rake
,
tokenTagsWith
,
tokenTags
,
cleanTextForNLP
)
module
Gargantext.Core.Text.Terms.Multi
(
multiterms
,
Terms
(
..
)
,
MultitermsExtractionException
(
..
)
,
tokenTag2terms
,
multiterms_rake
,
tokenTagsWith
,
tokenTags
,
cleanTextForNLP
)
where
where
import
Control.Exception.Safe
qualified
as
Safe
import
Data.Attoparsec.Text
as
DAT
(
space
,
notChar
,
string
)
import
Data.Attoparsec.Text
as
DAT
(
space
,
notChar
,
string
)
import
Gargantext.Core
(
Lang
(
..
),
NLPServerConfig
(
..
),
PosTagAlgo
(
..
))
import
Gargantext.Core
(
Lang
(
..
),
NLPServerConfig
(
..
),
PosTagAlgo
(
..
))
import
Gargantext.Core.Text.Terms.Multi.Lang.En
qualified
as
En
import
Gargantext.Core.Text.Terms.Multi.Lang.En
qualified
as
En
...
@@ -25,14 +35,23 @@ import Gargantext.Core.Types ( POS(NP), Terms(Terms), TermsWithCount, TokenTag(T
...
@@ -25,14 +35,23 @@ import Gargantext.Core.Types ( POS(NP), Terms(Terms), TermsWithCount, TokenTag(T
import
Gargantext.Core.Utils
(
groupWithCounts
)
import
Gargantext.Core.Utils
(
groupWithCounts
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Utils.SpacyNLP
qualified
as
SpacyNLP
import
Gargantext.Utils.SpacyNLP
qualified
as
SpacyNLP
import
Network.HTTP.Client
import
Replace.Attoparsec.Text
as
RAT
(
streamEdit
)
import
Replace.Attoparsec.Text
as
RAT
(
streamEdit
)
-------------------------------------------------------------------
-------------------------------------------------------------------
type
NLP_API
=
Lang
->
Text
->
IO
PosSentences
type
NLP_API
=
Lang
->
Text
->
IO
PosSentences
data
MultitermsExtractionException
=
MEE_nlp_server_http_exception
!
NLPServerConfig
!
HttpException
deriving
Show
instance
Exception
MultitermsExtractionException
-------------------------------------------------------------------
-------------------------------------------------------------------
-- | Extracts the terms from the input 'txt'. Throws a
-- 'MultitermExtractionException' in case we fail.
multiterms
::
NLPServerConfig
->
Lang
->
Text
->
IO
[
TermsWithCount
]
multiterms
::
NLPServerConfig
->
Lang
->
Text
->
IO
[
TermsWithCount
]
multiterms
nsc
l
txt
=
do
multiterms
nsc
l
txt
=
handle
(
\
ex
->
Safe
.
throwIO
$
MEE_nlp_server_http_exception
nsc
ex
)
$
do
let
txt'
=
cleanTextForNLP
txt
let
txt'
=
cleanTextForNLP
txt
if
txt'
==
""
if
txt'
==
""
then
do
then
do
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
4eee890f
...
@@ -256,9 +256,11 @@ flowCorpus :: ( IsDBCmd env err m
...
@@ -256,9 +256,11 @@ flowCorpus :: ( IsDBCmd env err m
,
HasTreeError
err
,
HasTreeError
err
,
HasValidationError
err
,
HasValidationError
err
,
FlowCorpus
a
,
FlowCorpus
a
,
ExtractNgrams
m
a
,
MonadJobStatus
m
,
MonadJobStatus
m
,
MonadCatch
m
,
MonadCatch
m
,
HasCentralExchangeNotification
env
,
Show
a
)
,
HasCentralExchangeNotification
env
)
=>
MkCorpusUser
=>
MkCorpusUser
->
TermType
Lang
->
TermType
Lang
->
Maybe
FlowSocialListWith
->
Maybe
FlowSocialListWith
...
@@ -276,10 +278,11 @@ flow :: forall env err m a c.
...
@@ -276,10 +278,11 @@ flow :: forall env err m a c.
,
HasTreeError
err
,
HasTreeError
err
,
HasValidationError
err
,
HasValidationError
err
,
FlowCorpus
a
,
FlowCorpus
a
,
ExtractNgrams
m
a
,
MkCorpus
c
,
MkCorpus
c
,
MonadJobStatus
m
,
MonadJobStatus
m
,
HasCentralExchangeNotification
env
,
HasCentralExchangeNotification
env
,
MonadCatch
m
,
Show
a
,
MonadCatch
m
)
)
=>
Maybe
c
=>
Maybe
c
->
MkCorpusUser
->
MkCorpusUser
...
@@ -317,9 +320,10 @@ addDocumentsToHyperCorpus :: ( IsDBCmd env err m
...
@@ -317,9 +320,10 @@ addDocumentsToHyperCorpus :: ( IsDBCmd env err m
,
HasNodeError
err
,
HasNodeError
err
,
HasNLPServer
env
,
HasNLPServer
env
,
FlowCorpus
document
,
FlowCorpus
document
,
ExtractNgrams
m
document
,
MkCorpus
corpus
,
MkCorpus
corpus
,
MonadLogger
m
,
MonadLogger
m
,
MonadCatch
m
,
Show
document
,
MonadCatch
m
)
)
=>
Maybe
corpus
=>
Maybe
corpus
->
TermType
Lang
->
TermType
Lang
...
@@ -471,7 +475,7 @@ data InsertDocError
...
@@ -471,7 +475,7 @@ data InsertDocError
extractNgramsFromDocument
::
(
UniqParameters
doc
extractNgramsFromDocument
::
(
UniqParameters
doc
,
HasText
doc
,
HasText
doc
,
ExtractNgrams
doc
,
ExtractNgrams
m
doc
,
IsDBCmd
err
env
m
,
IsDBCmd
err
env
m
,
MonadLogger
m
,
MonadLogger
m
,
MonadCatch
m
,
MonadCatch
m
...
@@ -517,7 +521,7 @@ commitNgramsForDocument (UncommittedNgrams ng) (Indexed oldIx node) = do
...
@@ -517,7 +521,7 @@ commitNgramsForDocument (UncommittedNgrams ng) (Indexed oldIx node) = do
extractNgramsFromDocuments
::
forall
doc
env
err
m
.
extractNgramsFromDocuments
::
forall
doc
env
err
m
.
(
HasText
doc
(
HasText
doc
,
UniqParameters
doc
,
UniqParameters
doc
,
ExtractNgrams
doc
,
ExtractNgrams
m
doc
,
IsDBCmd
env
err
m
,
IsDBCmd
env
err
m
,
MonadLogger
m
,
MonadLogger
m
,
MonadCatch
m
,
MonadCatch
m
...
@@ -545,7 +549,7 @@ commitNgramsForDocuments ng nodes =
...
@@ -545,7 +549,7 @@ commitNgramsForDocuments ng nodes =
insertMasterDocs
::
(
HasNodeError
err
insertMasterDocs
::
(
HasNodeError
err
,
UniqParameters
doc
,
UniqParameters
doc
,
FlowCorpus
doc
,
FlowCorpus
doc
,
MkCorpus
c
,
Show
do
c
,
MkCorpus
c
)
)
=>
GargConfig
=>
GargConfig
->
UncommittedNgrams
doc
->
UncommittedNgrams
doc
...
...
src/Gargantext/Database/Action/Flow/Extract.hs
View file @
4eee890f
...
@@ -13,6 +13,7 @@ Portability : POSIX
...
@@ -13,6 +13,7 @@ Portability : POSIX
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE UndecidableInstances #-}
module
Gargantext.Database.Action.Flow.Extract
module
Gargantext.Database.Action.Flow.Extract
...
@@ -30,7 +31,6 @@ import Gargantext.Core.Types (POS(NP), TermsCount, TermsWeight)
...
@@ -30,7 +31,6 @@ import Gargantext.Core.Types (POS(NP), TermsCount, TermsWeight)
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
HyperdataContact
,
cw_lastName
,
hc_who
)
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
HyperdataContact
,
cw_lastName
,
hc_who
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
,
hd_authors
,
hd_bdd
,
hd_institutes
,
hd_source
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
,
hd_authors
,
hd_bdd
,
hd_institutes
,
hd_source
)
import
Gargantext.Database.Admin.Types.Node
(
Node
)
import
Gargantext.Database.Admin.Types.Node
(
Node
)
import
Gargantext.Database.Prelude
(
DBCmd
)
import
Gargantext.Database.Query.Table.NgramsPostag
(
NgramsPostag
)
import
Gargantext.Database.Query.Table.NgramsPostag
(
NgramsPostag
)
import
Gargantext.Database.Schema.Ngrams
(
text2ngrams
)
import
Gargantext.Database.Schema.Ngrams
(
text2ngrams
)
import
Gargantext.Database.Schema.Node
(
NodePoly
(
..
))
import
Gargantext.Database.Schema.Node
(
NodePoly
(
..
))
...
@@ -39,7 +39,7 @@ import Gargantext.Prelude
...
@@ -39,7 +39,7 @@ import Gargantext.Prelude
------------------------------------------------------------------------
------------------------------------------------------------------------
instance
ExtractNgrams
HyperdataContact
where
instance
Monad
m
=>
ExtractNgrams
m
HyperdataContact
where
extractNgrams
_ncs
_l
=
pure
.
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
.
extract
extractNgrams
_ncs
_l
=
pure
.
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
.
extract
where
where
extract
::
HyperdataContact
extract
::
HyperdataContact
...
@@ -55,15 +55,15 @@ instance ExtractNgrams HyperdataContact where
...
@@ -55,15 +55,15 @@ instance ExtractNgrams HyperdataContact where
-- | Main ngrams extraction functionality.
-- | Main ngrams extraction functionality.
-- For NgramsTerms, this calls NLP server under the hood.
-- For NgramsTerms, this calls NLP server under the hood.
-- For Sources, Institutes, Authors, this uses simple split on " ".
-- For Sources, Institutes, Authors, this uses simple split on " ".
instance
ExtractNgrams
HyperdataDocument
where
instance
(
Monad
m
,
MonadBase
IO
m
)
=>
ExtractNgrams
m
HyperdataDocument
where
extractNgrams
::
NLPServerConfig
extractNgrams
::
NLPServerConfig
->
TermType
Lang
->
TermType
Lang
->
HyperdataDocument
->
HyperdataDocument
->
DBCmd
err
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
->
m
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
extractNgrams
ncs
lang
hd
=
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
<$>
extractNgramsT'
hd
extractNgrams
ncs
lang
hd
=
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
<$>
extractNgramsT'
hd
where
where
extractNgramsT'
::
HyperdataDocument
extractNgramsT'
::
HyperdataDocument
->
DBCmd
err
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
->
m
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
extractNgramsT'
doc
=
do
extractNgramsT'
doc
=
do
let
source
=
text2ngrams
let
source
=
text2ngrams
$
maybe
"Nothing"
identity
$
maybe
"Nothing"
identity
...
@@ -87,7 +87,7 @@ instance ExtractNgrams HyperdataDocument where
...
@@ -87,7 +87,7 @@ instance ExtractNgrams HyperdataDocument where
<>
[(
SimpleNgrams
a'
,
(
DM
.
singleton
Authors
1
,
1
))
|
a'
<-
authors
]
<>
[(
SimpleNgrams
a'
,
(
DM
.
singleton
Authors
1
,
1
))
|
a'
<-
authors
]
<>
[(
EnrichedNgrams
t'
,
(
DM
.
singleton
NgramsTerms
1
,
cnt'
))
|
(
t'
,
cnt'
)
<-
termsWithCounts'
]
<>
[(
EnrichedNgrams
t'
,
(
DM
.
singleton
NgramsTerms
1
,
cnt'
))
|
(
t'
,
cnt'
)
<-
termsWithCounts'
]
instance
(
ExtractNgrams
a
,
HasText
a
)
=>
ExtractNgrams
(
Node
a
)
instance
(
ExtractNgrams
m
a
,
HasText
a
)
=>
ExtractNgrams
m
(
Node
a
)
where
where
extractNgrams
ncs
l
(
Node
{
_node_hyperdata
=
h
})
=
extractNgrams
ncs
l
h
extractNgrams
ncs
l
(
Node
{
_node_hyperdata
=
h
})
=
extractNgrams
ncs
l
h
...
...
src/Gargantext/Database/Action/Flow/Types.hs
View file @
4eee890f
...
@@ -25,7 +25,6 @@ import Gargantext.Core.NodeStory.Types ( HasNodeStory )
...
@@ -25,7 +25,6 @@ import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import
Gargantext.Core.Text
(
HasText
)
import
Gargantext.Core.Text
(
HasText
)
import
Gargantext.API.Admin.Orchestrator.Types
qualified
as
API
import
Gargantext.API.Admin.Orchestrator.Types
qualified
as
API
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms
(
ExtractNgrams
)
import
Gargantext.Core.Types
(
HasValidationError
,
TermsCount
,
TermsWeight
)
import
Gargantext.Core.Types
(
HasValidationError
,
TermsCount
,
TermsWeight
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
)
...
@@ -51,11 +50,10 @@ type FlowCmdM env err m =
...
@@ -51,11 +50,10 @@ type FlowCmdM env err m =
)
)
type
FlowCorpus
a
=
(
UniqParameters
a
type
FlowCorpus
a
=
(
UniqParameters
a
,
InsertDb
a
,
InsertDb
a
,
ExtractNgrams
a
,
HasText
a
,
HasText
a
,
ToNode
a
,
ToNode
a
,
ToJSON
a
,
ToJSON
a
)
)
type
FlowInsertDB
a
=
(
AddUniqId
a
type
FlowInsertDB
a
=
(
AddUniqId
a
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment