Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
d3ca8202
Commit
d3ca8202
authored
Sep 05, 2022
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] NLP using Spacy Server
parent
3a8af37d
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
150 additions
and
18 deletions
+150
-18
gargantext.cabal
gargantext.cabal
+1
-0
package.yaml
package.yaml
+1
-0
Multi.hs
src/Gargantext/Core/Text/Terms/Multi.hs
+3
-2
Types.hs
src/Gargantext/Core/Text/Terms/Multi/PosTagging/Types.hs
+15
-14
Types.hs
src/Gargantext/Core/Types.hs
+4
-2
SpacyNLP.hs
src/Gargantext/Utils/SpacyNLP.hs
+126
-0
No files found.
gargantext.cabal
View file @
d3ca8202
...
...
@@ -319,6 +319,7 @@ library
Gargantext.Utils.Aeson
Gargantext.Utils.JohnSnowNLP
Gargantext.Utils.Servant
Gargantext.Utils.SpacyNLP
Gargantext.Utils.UTCTime
Paths_gargantext
hs-source-dirs:
...
...
package.yaml
View file @
d3ca8202
...
...
@@ -75,6 +75,7 @@ library:
-
Gargantext.Core.Types.Individu
-
Gargantext.Core.Types.Main
-
Gargantext.Core.Utils.Prefix
-
Gargantext.Utils.SpacyNLP
-
Gargantext.Database.Action.Flow
-
Gargantext.Database.Action.Flow.Types
-
Gargantext.Database.Action.User.New
...
...
src/Gargantext/Core/Text/Terms/Multi.hs
View file @
d3ca8202
...
...
@@ -28,7 +28,8 @@ import qualified Gargantext.Core.Text.Terms.Multi.Lang.En as En
import
qualified
Gargantext.Core.Text.Terms.Multi.Lang.Fr
as
Fr
import
Gargantext.Core.Text.Terms.Multi.RAKE
(
multiterms_rake
)
import
qualified
Gargantext.Utils.JohnSnowNLP
as
JohnSnow
-- import qualified Gargantext.Utils.JohnSnowNLP as JohnSnow
import
qualified
Gargantext.Utils.SpacyNLP
as
SpacyNLP
-------------------------------------------------------------------
...
...
@@ -51,7 +52,7 @@ tokenTag2terms (TokenTag ws t _ _) = Terms ws t
tokenTags
::
Lang
->
Text
->
IO
[[
TokenTag
]]
tokenTags
EN
txt
=
tokenTagsWith
EN
txt
corenlp
tokenTags
FR
txt
=
tokenTagsWith
FR
txt
JohnSnow
.
nlp
tokenTags
FR
txt
=
tokenTagsWith
FR
txt
SpacyNLP
.
nlp
tokenTags
_
_
=
panic
"[G.C.T.T.Multi] NLP API not implemented yet"
tokenTagsWith
::
Lang
->
Text
->
NLP_API
->
IO
[[
TokenTag
]]
...
...
src/Gargantext/Core/Text/Terms/Multi/PosTagging/Types.hs
View file @
d3ca8202
...
...
@@ -9,6 +9,7 @@ Portability : POSIX
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
...
...
@@ -22,27 +23,27 @@ import Gargantext.Prelude
import
GHC.Generics
data
Token
=
Token
{
_tokenIndex
::
Int
,
_tokenWord
::
Text
,
_tokenOriginalText
::
Text
,
_tokenLemma
::
Text
,
_tokenCharacterOffsetBegin
::
Int
,
_tokenCharacterOffsetEnd
::
Int
,
_tokenPos
::
Maybe
POS
,
_tokenNer
::
Maybe
NER
,
_tokenBefore
::
Maybe
Text
,
_tokenAfter
::
Maybe
Text
data
Token
=
Token
{
_tokenIndex
::
!
Int
,
_tokenWord
::
!
Text
,
_tokenOriginalText
::
!
Text
,
_tokenLemma
::
!
Text
,
_tokenCharacterOffsetBegin
::
!
Int
,
_tokenCharacterOffsetEnd
::
!
Int
,
_tokenPos
::
!
(
Maybe
POS
)
,
_tokenNer
::
!
(
Maybe
NER
)
,
_tokenBefore
::
!
(
Maybe
Text
)
,
_tokenAfter
::
!
(
Maybe
Text
)
}
deriving
(
Show
,
Generic
)
$
(
deriveJSON
(
unPrefix
"_token"
)
''
T
oken
)
data
Sentence
=
Sentence
{
_sentenceIndex
::
Int
,
_sentenceTokens
::
[
Token
]
data
Sentence
=
Sentence
{
_sentenceIndex
::
!
Int
,
_sentenceTokens
::
!
[
Token
]
}
deriving
(
Show
,
Generic
)
$
(
deriveJSON
(
unPrefix
"_sentence"
)
''
S
entence
)
data
Properties
=
Properties
{
_propertiesAnnotators
::
Text
,
_propertiesOutputFormat
::
Text
data
Properties
=
Properties
{
_propertiesAnnotators
::
!
Text
,
_propertiesOutputFormat
::
!
Text
}
deriving
(
Show
,
Generic
)
$
(
deriveJSON
(
unPrefix
"_properties"
)
''
P
roperties
)
...
...
src/Gargantext/Core/Types.hs
View file @
d3ca8202
...
...
@@ -126,7 +126,7 @@ instance FromJSON POS where
instance
ToJSON
POS
instance
Hashable
POS
------------------------------------------------------------------------
data
NER
=
PERSON
|
ORGANIZATION
|
LOCATION
|
NoNER
data
NER
=
PERSON
|
ORGANIZATION
|
LOCATION
|
NoNER
{
noNer
::
!
Text
}
deriving
(
Show
,
Generic
)
------------------------------------------------------------------------
instance
FromJSON
NER
where
...
...
@@ -134,9 +134,11 @@ instance FromJSON NER where
where
ner
::
[
Char
]
->
NER
ner
"PERSON"
=
PERSON
ner
"PER"
=
PERSON
ner
"ORGANIZATION"
=
ORGANIZATION
ner
"LOCATION"
=
LOCATION
ner
_
=
NoNER
ner
"LOC"
=
LOCATION
ner
x
=
NoNER
(
cs
x
)
instance
ToJSON
NER
...
...
src/Gargantext/Utils/SpacyNLP.hs
0 → 100644
View file @
d3ca8202
{-|
Module : Gargantext.Utils.SpacyNLP
Description : John Snow NLP API connexion
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Spacy ecosystem: https://github.com/explosion/spaCy
Server to be used: https://gitlab.iscpif.fr/gargantext/spacy-server
-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Utils.SpacyNLP
where
import
Control.Lens
import
Data.Aeson
(
encode
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Text
hiding
(
map
,
group
,
filter
,
concat
,
zip
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Terms.Multi.PosTagging.Types
import
Gargantext.Core.Types
(
POS
(
..
),
NER
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Prelude
import
Network.HTTP.Simple
(
parseRequest
,
httpJSON
,
setRequestBodyLBS
,
getResponseBody
,
Response
)
data
SpacyData
=
SpacyData
{
_spacy_data
::
!
[
SpacyText
]}
deriving
(
Show
)
data
SpacyText
=
SpacyText
{
_spacy_text
::
!
Text
,
_spacy_tags
::
!
[
SpacyTags
]
}
deriving
(
Show
)
data
SpacyTags
=
SpacyTags
{
_spacyTags_text
::
!
Text
,
_spacyTags_text_with_ws
::
!
Text
,
_spacyTags_whitespace
::
!
Text
,
_spacyTags_head
::
!
Text
,
_spacyTags_head_index
::
!
Int
,
_spacyTags_left_edge
::
!
Text
,
_spacyTags_right_edge
::
!
Text
,
_spacyTags_index
::
Int
,
_spacyTags_ent_type
::
!
NER
,
_spacyTags_ent_iob
::
!
Text
,
_spacyTags_lemma
::
!
Text
,
_spacyTags_normalized
::
!
Text
,
_spacyTags_shape
::
!
Text
,
_spacyTags_prefix
::
!
Text
,
_spacyTags_suffix
::
!
Text
,
_spacyTags_is_alpha
::
Bool
,
_spacyTags_is_ascii
::
Bool
,
_spacyTags_is_digit
::
Bool
,
_spacyTags_is_title
::
Bool
,
_spacyTags_is_punct
::
Bool
,
_spacyTags_is_left_punct
::
Bool
,
_spacyTags_is_right_punct
::
Bool
,
_spacyTags_is_space
::
Bool
,
_spacyTags_is_bracket
::
Bool
,
_spacyTags_is_quote
::
Bool
,
_spacyTags_is_currency
::
Bool
,
_spacyTags_like_url
::
Bool
,
_spacyTags_like_num
::
Bool
,
_spacyTags_like_email
::
Bool
,
_spacyTags_is_oov
::
Bool
,
_spacyTags_is_stop
::
Bool
,
_spacyTags_pos
::
POS
,
_spacyTags_tag
::
POS
,
_spacyTags_dep
::
!
Text
,
_spacyTags_lang
::
!
Text
,
_spacyTags_prob
::
!
Int
,
_spacyTags_char_offset
::
!
Int
}
deriving
(
Show
)
data
SpacyRequest
=
SpacyRequest
{
_spacyRequest_text
::
!
Text
}
deriving
(
Show
)
spacyRequest
::
Text
->
IO
SpacyData
spacyRequest
txt
=
do
url
<-
parseRequest
$
unpack
"POST http://localhost:8001/pos"
let
request
=
setRequestBodyLBS
(
encode
$
SpacyRequest
txt
)
url
result
<-
httpJSON
request
::
IO
(
Response
SpacyData
)
pure
$
getResponseBody
result
-- Instances
deriveJSON
(
unPrefix
"_spacy_"
)
''
S
pacyData
deriveJSON
(
unPrefix
"_spacy_"
)
''
S
pacyText
deriveJSON
(
unPrefix
"_spacyTags_"
)
''
S
pacyTags
deriveJSON
(
unPrefix
"_spacyRequest_"
)
''
S
pacyRequest
makeLenses
''
S
pacyData
makeLenses
''
S
pacyText
makeLenses
''
S
pacyTags
makeLenses
''
S
pacyRequest
----------------------------------------------------------------
spacyTagsToToken
::
SpacyTags
->
Token
spacyTagsToToken
st
=
Token
(
st
^.
spacyTags_index
)
(
st
^.
spacyTags_normalized
)
(
st
^.
spacyTags_text
)
(
st
^.
spacyTags_lemma
)
(
st
^.
spacyTags_head_index
)
(
st
^.
spacyTags_char_offset
)
(
Just
$
st
^.
spacyTags_pos
)
(
Just
$
st
^.
spacyTags_ent_type
)
(
Just
$
st
^.
spacyTags_prefix
)
(
Just
$
st
^.
spacyTags_suffix
)
spacyDataToPosSentences
::
SpacyData
->
PosSentences
spacyDataToPosSentences
(
SpacyData
ds
)
=
PosSentences
$
map
(
\
(
i
,
ts
)
->
Sentence
i
ts
)
$
zip
[
1
..
]
$
map
(
\
(
SpacyText
_
tags
)
->
map
spacyTagsToToken
tags
)
ds
-----------------------------------------------------------------
nlp
::
Lang
->
Text
->
IO
PosSentences
nlp
FR
txt
=
spacyDataToPosSentences
<$>
spacyRequest
txt
nlp
_
_
=
panic
"Make sure you have the right model for your lang for spacy Server"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment