Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
175
Issues
175
List
Board
Labels
Milestones
Merge Requests
10
Merge Requests
10
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
d3ca8202
Commit
d3ca8202
authored
Sep 05, 2022
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] NLP using Spacy Server
parent
3a8af37d
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
150 additions
and
18 deletions
+150
-18
gargantext.cabal
gargantext.cabal
+1
-0
package.yaml
package.yaml
+1
-0
Multi.hs
src/Gargantext/Core/Text/Terms/Multi.hs
+3
-2
Types.hs
src/Gargantext/Core/Text/Terms/Multi/PosTagging/Types.hs
+15
-14
Types.hs
src/Gargantext/Core/Types.hs
+4
-2
SpacyNLP.hs
src/Gargantext/Utils/SpacyNLP.hs
+126
-0
No files found.
gargantext.cabal
View file @
d3ca8202
...
@@ -319,6 +319,7 @@ library
...
@@ -319,6 +319,7 @@ library
Gargantext.Utils.Aeson
Gargantext.Utils.Aeson
Gargantext.Utils.JohnSnowNLP
Gargantext.Utils.JohnSnowNLP
Gargantext.Utils.Servant
Gargantext.Utils.Servant
Gargantext.Utils.SpacyNLP
Gargantext.Utils.UTCTime
Gargantext.Utils.UTCTime
Paths_gargantext
Paths_gargantext
hs-source-dirs:
hs-source-dirs:
...
...
package.yaml
View file @
d3ca8202
...
@@ -75,6 +75,7 @@ library:
...
@@ -75,6 +75,7 @@ library:
-
Gargantext.Core.Types.Individu
-
Gargantext.Core.Types.Individu
-
Gargantext.Core.Types.Main
-
Gargantext.Core.Types.Main
-
Gargantext.Core.Utils.Prefix
-
Gargantext.Core.Utils.Prefix
-
Gargantext.Utils.SpacyNLP
-
Gargantext.Database.Action.Flow
-
Gargantext.Database.Action.Flow
-
Gargantext.Database.Action.Flow.Types
-
Gargantext.Database.Action.Flow.Types
-
Gargantext.Database.Action.User.New
-
Gargantext.Database.Action.User.New
...
...
src/Gargantext/Core/Text/Terms/Multi.hs
View file @
d3ca8202
...
@@ -28,7 +28,8 @@ import qualified Gargantext.Core.Text.Terms.Multi.Lang.En as En
...
@@ -28,7 +28,8 @@ import qualified Gargantext.Core.Text.Terms.Multi.Lang.En as En
import
qualified
Gargantext.Core.Text.Terms.Multi.Lang.Fr
as
Fr
import
qualified
Gargantext.Core.Text.Terms.Multi.Lang.Fr
as
Fr
import
Gargantext.Core.Text.Terms.Multi.RAKE
(
multiterms_rake
)
import
Gargantext.Core.Text.Terms.Multi.RAKE
(
multiterms_rake
)
import
qualified
Gargantext.Utils.JohnSnowNLP
as
JohnSnow
-- import qualified Gargantext.Utils.JohnSnowNLP as JohnSnow
import
qualified
Gargantext.Utils.SpacyNLP
as
SpacyNLP
-------------------------------------------------------------------
-------------------------------------------------------------------
...
@@ -51,7 +52,7 @@ tokenTag2terms (TokenTag ws t _ _) = Terms ws t
...
@@ -51,7 +52,7 @@ tokenTag2terms (TokenTag ws t _ _) = Terms ws t
tokenTags
::
Lang
->
Text
->
IO
[[
TokenTag
]]
tokenTags
::
Lang
->
Text
->
IO
[[
TokenTag
]]
tokenTags
EN
txt
=
tokenTagsWith
EN
txt
corenlp
tokenTags
EN
txt
=
tokenTagsWith
EN
txt
corenlp
tokenTags
FR
txt
=
tokenTagsWith
FR
txt
JohnSnow
.
nlp
tokenTags
FR
txt
=
tokenTagsWith
FR
txt
SpacyNLP
.
nlp
tokenTags
_
_
=
panic
"[G.C.T.T.Multi] NLP API not implemented yet"
tokenTags
_
_
=
panic
"[G.C.T.T.Multi] NLP API not implemented yet"
tokenTagsWith
::
Lang
->
Text
->
NLP_API
->
IO
[[
TokenTag
]]
tokenTagsWith
::
Lang
->
Text
->
NLP_API
->
IO
[[
TokenTag
]]
...
...
src/Gargantext/Core/Text/Terms/Multi/PosTagging/Types.hs
View file @
d3ca8202
...
@@ -9,6 +9,7 @@ Portability : POSIX
...
@@ -9,6 +9,7 @@ Portability : POSIX
-}
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeOperators #-}
...
@@ -22,27 +23,27 @@ import Gargantext.Prelude
...
@@ -22,27 +23,27 @@ import Gargantext.Prelude
import
GHC.Generics
import
GHC.Generics
data
Token
=
Token
{
_tokenIndex
::
Int
data
Token
=
Token
{
_tokenIndex
::
!
Int
,
_tokenWord
::
Text
,
_tokenWord
::
!
Text
,
_tokenOriginalText
::
Text
,
_tokenOriginalText
::
!
Text
,
_tokenLemma
::
Text
,
_tokenLemma
::
!
Text
,
_tokenCharacterOffsetBegin
::
Int
,
_tokenCharacterOffsetBegin
::
!
Int
,
_tokenCharacterOffsetEnd
::
Int
,
_tokenCharacterOffsetEnd
::
!
Int
,
_tokenPos
::
Maybe
POS
,
_tokenPos
::
!
(
Maybe
POS
)
,
_tokenNer
::
Maybe
NER
,
_tokenNer
::
!
(
Maybe
NER
)
,
_tokenBefore
::
Maybe
Text
,
_tokenBefore
::
!
(
Maybe
Text
)
,
_tokenAfter
::
Maybe
Text
,
_tokenAfter
::
!
(
Maybe
Text
)
}
deriving
(
Show
,
Generic
)
}
deriving
(
Show
,
Generic
)
$
(
deriveJSON
(
unPrefix
"_token"
)
''
T
oken
)
$
(
deriveJSON
(
unPrefix
"_token"
)
''
T
oken
)
data
Sentence
=
Sentence
{
_sentenceIndex
::
Int
data
Sentence
=
Sentence
{
_sentenceIndex
::
!
Int
,
_sentenceTokens
::
[
Token
]
,
_sentenceTokens
::
!
[
Token
]
}
deriving
(
Show
,
Generic
)
}
deriving
(
Show
,
Generic
)
$
(
deriveJSON
(
unPrefix
"_sentence"
)
''
S
entence
)
$
(
deriveJSON
(
unPrefix
"_sentence"
)
''
S
entence
)
data
Properties
=
Properties
{
_propertiesAnnotators
::
Text
data
Properties
=
Properties
{
_propertiesAnnotators
::
!
Text
,
_propertiesOutputFormat
::
Text
,
_propertiesOutputFormat
::
!
Text
}
deriving
(
Show
,
Generic
)
}
deriving
(
Show
,
Generic
)
$
(
deriveJSON
(
unPrefix
"_properties"
)
''
P
roperties
)
$
(
deriveJSON
(
unPrefix
"_properties"
)
''
P
roperties
)
...
...
src/Gargantext/Core/Types.hs
View file @
d3ca8202
...
@@ -126,7 +126,7 @@ instance FromJSON POS where
...
@@ -126,7 +126,7 @@ instance FromJSON POS where
instance
ToJSON
POS
instance
ToJSON
POS
instance
Hashable
POS
instance
Hashable
POS
------------------------------------------------------------------------
------------------------------------------------------------------------
data
NER
=
PERSON
|
ORGANIZATION
|
LOCATION
|
NoNER
data
NER
=
PERSON
|
ORGANIZATION
|
LOCATION
|
NoNER
{
noNer
::
!
Text
}
deriving
(
Show
,
Generic
)
deriving
(
Show
,
Generic
)
------------------------------------------------------------------------
------------------------------------------------------------------------
instance
FromJSON
NER
where
instance
FromJSON
NER
where
...
@@ -134,9 +134,11 @@ instance FromJSON NER where
...
@@ -134,9 +134,11 @@ instance FromJSON NER where
where
where
ner
::
[
Char
]
->
NER
ner
::
[
Char
]
->
NER
ner
"PERSON"
=
PERSON
ner
"PERSON"
=
PERSON
ner
"PER"
=
PERSON
ner
"ORGANIZATION"
=
ORGANIZATION
ner
"ORGANIZATION"
=
ORGANIZATION
ner
"LOCATION"
=
LOCATION
ner
"LOCATION"
=
LOCATION
ner
_
=
NoNER
ner
"LOC"
=
LOCATION
ner
x
=
NoNER
(
cs
x
)
instance
ToJSON
NER
instance
ToJSON
NER
...
...
src/Gargantext/Utils/SpacyNLP.hs
0 → 100644
View file @
d3ca8202
{-|
Module : Gargantext.Utils.SpacyNLP
Description : John Snow NLP API connexion
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Spacy ecosystem: https://github.com/explosion/spaCy
Server to be used: https://gitlab.iscpif.fr/gargantext/spacy-server
-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Utils.SpacyNLP
where
import
Control.Lens
import
Data.Aeson
(
encode
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Text
hiding
(
map
,
group
,
filter
,
concat
,
zip
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Terms.Multi.PosTagging.Types
import
Gargantext.Core.Types
(
POS
(
..
),
NER
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Prelude
import
Network.HTTP.Simple
(
parseRequest
,
httpJSON
,
setRequestBodyLBS
,
getResponseBody
,
Response
)
data
SpacyData
=
SpacyData
{
_spacy_data
::
!
[
SpacyText
]}
deriving
(
Show
)
data
SpacyText
=
SpacyText
{
_spacy_text
::
!
Text
,
_spacy_tags
::
!
[
SpacyTags
]
}
deriving
(
Show
)
data
SpacyTags
=
SpacyTags
{
_spacyTags_text
::
!
Text
,
_spacyTags_text_with_ws
::
!
Text
,
_spacyTags_whitespace
::
!
Text
,
_spacyTags_head
::
!
Text
,
_spacyTags_head_index
::
!
Int
,
_spacyTags_left_edge
::
!
Text
,
_spacyTags_right_edge
::
!
Text
,
_spacyTags_index
::
Int
,
_spacyTags_ent_type
::
!
NER
,
_spacyTags_ent_iob
::
!
Text
,
_spacyTags_lemma
::
!
Text
,
_spacyTags_normalized
::
!
Text
,
_spacyTags_shape
::
!
Text
,
_spacyTags_prefix
::
!
Text
,
_spacyTags_suffix
::
!
Text
,
_spacyTags_is_alpha
::
Bool
,
_spacyTags_is_ascii
::
Bool
,
_spacyTags_is_digit
::
Bool
,
_spacyTags_is_title
::
Bool
,
_spacyTags_is_punct
::
Bool
,
_spacyTags_is_left_punct
::
Bool
,
_spacyTags_is_right_punct
::
Bool
,
_spacyTags_is_space
::
Bool
,
_spacyTags_is_bracket
::
Bool
,
_spacyTags_is_quote
::
Bool
,
_spacyTags_is_currency
::
Bool
,
_spacyTags_like_url
::
Bool
,
_spacyTags_like_num
::
Bool
,
_spacyTags_like_email
::
Bool
,
_spacyTags_is_oov
::
Bool
,
_spacyTags_is_stop
::
Bool
,
_spacyTags_pos
::
POS
,
_spacyTags_tag
::
POS
,
_spacyTags_dep
::
!
Text
,
_spacyTags_lang
::
!
Text
,
_spacyTags_prob
::
!
Int
,
_spacyTags_char_offset
::
!
Int
}
deriving
(
Show
)
data
SpacyRequest
=
SpacyRequest
{
_spacyRequest_text
::
!
Text
}
deriving
(
Show
)
spacyRequest
::
Text
->
IO
SpacyData
spacyRequest
txt
=
do
url
<-
parseRequest
$
unpack
"POST http://localhost:8001/pos"
let
request
=
setRequestBodyLBS
(
encode
$
SpacyRequest
txt
)
url
result
<-
httpJSON
request
::
IO
(
Response
SpacyData
)
pure
$
getResponseBody
result
-- Instances
deriveJSON
(
unPrefix
"_spacy_"
)
''
S
pacyData
deriveJSON
(
unPrefix
"_spacy_"
)
''
S
pacyText
deriveJSON
(
unPrefix
"_spacyTags_"
)
''
S
pacyTags
deriveJSON
(
unPrefix
"_spacyRequest_"
)
''
S
pacyRequest
makeLenses
''
S
pacyData
makeLenses
''
S
pacyText
makeLenses
''
S
pacyTags
makeLenses
''
S
pacyRequest
----------------------------------------------------------------
spacyTagsToToken
::
SpacyTags
->
Token
spacyTagsToToken
st
=
Token
(
st
^.
spacyTags_index
)
(
st
^.
spacyTags_normalized
)
(
st
^.
spacyTags_text
)
(
st
^.
spacyTags_lemma
)
(
st
^.
spacyTags_head_index
)
(
st
^.
spacyTags_char_offset
)
(
Just
$
st
^.
spacyTags_pos
)
(
Just
$
st
^.
spacyTags_ent_type
)
(
Just
$
st
^.
spacyTags_prefix
)
(
Just
$
st
^.
spacyTags_suffix
)
spacyDataToPosSentences
::
SpacyData
->
PosSentences
spacyDataToPosSentences
(
SpacyData
ds
)
=
PosSentences
$
map
(
\
(
i
,
ts
)
->
Sentence
i
ts
)
$
zip
[
1
..
]
$
map
(
\
(
SpacyText
_
tags
)
->
map
spacyTagsToToken
tags
)
ds
-----------------------------------------------------------------
nlp
::
Lang
->
Text
->
IO
PosSentences
nlp
FR
txt
=
spacyDataToPosSentences
<$>
spacyRequest
txt
nlp
_
_
=
panic
"Make sure you have the right model for your lang for spacy Server"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment