Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
157
Issues
157
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
dc3cd704
Commit
dc3cd704
authored
Sep 26, 2017
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] NLP functions. Servant ClientM need to be fixed.
parent
1c3d5cfd
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
110 additions
and
0 deletions
+110
-0
gargantext.cabal
gargantext.cabal
+4
-0
CoreNLP.hs
src/Data/Gargantext/NLP/CoreNLP.hs
+106
-0
No files found.
gargantext.cabal
View file @
dc3cd704
...
...
@@ -39,6 +39,8 @@ library
, semigroups
, servant-multipart
, servant-server
, servant
, servant-client
, split
-- , stemmer
, tagsoup
...
...
@@ -55,6 +57,8 @@ library
exposed-modules: Data.Gargantext
, Data.Gargantext.Analysis
, Data.Gargantext.DSL
, Data.Gargantext.NLP
, Data.Gargantext.NLP.CoreNLP
, Data.Gargantext.Database
, Data.Gargantext.Database.Instances
, Data.Gargantext.Database.Ngram
...
...
src/Data/Gargantext/NLP/CoreNLP.hs
0 → 100644
View file @
dc3cd704
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TemplateHaskell #-}
module
Data.Gargantext.NLP.CoreNLP
where
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Proxy
import
GHC.Generics
import
Network.HTTP.Client
(
newManager
,
defaultManagerSettings
)
import
Servant.API
import
Servant.Client
import
Data.Gargantext.Prelude
import
Data.Gargantext.Utils.Prefix
(
unPrefix
)
import
Data.Text
(
Text
)
data
Token
=
Token
{
_tokenIndex
::
Int
,
_tokenWord
::
Text
,
_tokenOriginalText
::
Text
,
_tokenLemma
::
Text
,
_tokenCharacterOffsetBegin
::
Int
,
_tokenCharacterOffsetEnd
::
Int
,
_tokenPos
::
Text
,
_tokenNer
::
Text
,
_tokenBefore
::
Text
,
_tokenAfter
::
Text
}
deriving
(
Show
,
Generic
)
$
(
deriveJSON
(
unPrefix
"_token"
)
''
T
oken
)
data
Sentence
=
Sentence
{
_sentenceIndex
::
Int
,
_sentenceToken
::
[
Token
]
}
deriving
(
Show
,
Generic
)
$
(
deriveJSON
(
unPrefix
"_sentence"
)
''
S
entence
)
data
Properties
=
Properties
{
_propertiesAnnotators
::
Text
,
_propertiesOutputFormat
::
Text
}
deriving
(
Show
,
Generic
)
$
(
deriveJSON
(
unPrefix
"_properties"
)
''
P
roperties
)
data
Sentences
=
Sentences
{
sentences
::
[
Sentence
]}
deriving
(
Show
,
Generic
)
instance
ToJSON
Sentences
-- API Client configuration
-- Example of Client Request :
-- wget --post-data 'Alexandre Grothendieck is a mathematician who lived in France which is a european country. There is another sentence here.' 'localhost:9000/?properties={"annotators": "tokenize,ssplit,pos,ner", "outputFormat": "json"}' -O
-- the result is Sentence as a JSON
-- {"sentences":[{"index":0,"tokens":[{"index":1,"word":"Alexandre","originalText":"Alexandre","lemma":"Alexandre","characterOffsetBegin":0,"characterOffsetEnd":9,"pos":"NNP","ner":"PERSON","before":"","after":" "},{"index":2,"word":"Grothendieck","originalText":"Grothendieck","lemma":"Grothendieck","characterOffsetBegin":10,"characterOffsetEnd":22,"pos":"NNP","ner":"PERSON","before":" ","after":" "},{"index":3,"word":"is","originalText":"is","lemma":"be","characterOffsetBegin":23,"characterOffsetEnd":25,"pos":"VBZ","ner":"O","before":" ","after":" "},{"index":4,"word":"a","originalText":"a","lemma":"a","characterOffsetBegin":26,"characterOffsetEnd":27,"pos":"DT","ner":"O","before":" ","after":" "},{"index":5,"word":"mathematician","originalText":"mathematician","lemma":"mathematician","characterOffsetBegin":28,"characterOffsetEnd":41,"pos":"NN","ner":"O","before":" ","after":" "},{"index":6,"word":"who","originalText":"who","lemma":"who","characterOffsetBegin":42,"characterOffsetEnd":45,"pos":"WP","ner":"O","before":" ","after":" "},{"index":7,"word":"lived","originalText":"lived","lemma":"live","characterOffsetBegin":46,"characterOffsetEnd":51,"pos":"VBD","ner":"O","before":" ","after":" "},{"index":8,"word":"in","originalText":"in","lemma":"in","characterOffsetBegin":52,"characterOffsetEnd":54,"pos":"IN","ner":"O","before":" ","after":" "},{"index":9,"word":"France","originalText":"France","lemma":"France","characterOffsetBegin":55,"characterOffsetEnd":61,"pos":"NNP","ner":"LOCATION","before":" ","after":" "},{"index":10,"word":"which","originalText":"which","lemma":"which","characterOffsetBegin":62,"characterOffsetEnd":67,"pos":"WDT","ner":"O","before":" ","after":" "},{"index":11,"word":"is","originalText":"is","lemma":"be","characterOffsetBegin":68,"characterOffsetEnd":70,"pos":"VBZ","ner":"O","before":" ","after":" "},{"index":12,"word":"a","originalText":"a","lemma":"a","characterOffsetBegin":71,"characterOffsetEnd":72,"pos":"DT","ner":"O","before":" ","after":" "},{"index":13,"word":"european","originalText":"european","lemma":"european","characterOffsetBegin":73,"characterOffsetEnd":81,"pos":"JJ","ner":"O","before":" ","after":" "},{"index":14,"word":"country","originalText":"country","lemma":"country","characterOffsetBegin":82,"characterOffsetEnd":89,"pos":"NN","ner":"O","before":" ","after":""},{"index":15,"word":".","originalText":".","lemma":".","characterOffsetBegin":89,"characterOffsetEnd":90,"pos":".","ner":"O","before":"","after":" "}]},{"index":1,"tokens":[{"index":1,"word":"There","originalText":"There","lemma":"there","characterOffsetBegin":91,"characterOffsetEnd":96,"pos":"EX","ner":"O","before":" ","after":" "},{"index":2,"word":"is","originalText":"is","lemma":"be","characterOffsetBegin":97,"characterOffsetEnd":99,"pos":"VBZ","ner":"O","before":" ","after":" "},{"index":3,"word":"another","originalText":"another","lemma":"another","characterOffsetBegin":100,"characterOffsetEnd":107,"pos":"DT","ner":"O","before":" ","after":" "},{"index":4,"word":"sentence","originalText":"sentence","lemma":"sentence","characterOffsetBegin":108,"characterOffsetEnd":116,"pos":"NN","ner":"O","before":" ","after":" "},{"index":5,"wo
type
API
=
""
:>
QueryParam
"properties"
Properties
:>
ReqBody
'[
J
SON
]
String
:>
Post
'[
J
SON
]
String
corenlp
::
Maybe
Properties
->
Text
->
ClientM
Sentence
corenlp
p
t
=
client
api
-- text2nlp :: Text -> ClientM
api
::
Proxy
API
api
=
Proxy
-- corenlp t = client api
-- | URI scheme to use
--data Scheme =
-- Http -- ^ http://
-- | Https -- ^ https://
--
---- | Simple data type to represent the target of HTTP requests
---- for servant's automatically-generated clients.
--data BaseUrl = BaseUrl
-- { baseUrlScheme :: Scheme -- ^ URI scheme to use
-- , baseUrlHost :: String -- ^ host (eg "haskell.org")
-- , baseUrlPort :: Int -- ^ port (eg 80)
-- , baseUrlPath :: String -- ^ path (eg "/a/b/c")
-- }
--
queries
::
ClientM
(
Text
,
Properties
)
queries
=
do
let
text
=
"Alexandre Grothendieck is free even in a sentence."
let
prop
=
Properties
"tokenize,ssplit,pos,ner"
"json"
return
(
text
,
prop
)
run
::
IO
()
run
=
do
manager
<-
newManager
defaultManagerSettings
res
<-
runClientM
queries
(
ClientEnv
manager
(
BaseUrl
Http
"localhost"
9000
""
))
case
res
of
Left
err
->
putStrLn
$
"Error: "
++
show
err
Right
x
->
do
print
x
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment