Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
157
Issues
157
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
67aecef7
Commit
67aecef7
authored
Oct 03, 2017
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[NLP] parseWith function and improving types clarity.
parent
034ed3de
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
51 additions
and
76 deletions
+51
-76
gargantext.cabal
gargantext.cabal
+6
-5
CoreNLP.hs
src/Data/Gargantext/NLP/CoreNLP.hs
+38
-61
Main.hs
src/Data/Gargantext/Types/Main.hs
+7
-10
No files found.
gargantext.cabal
View file @
67aecef7
...
...
@@ -26,7 +26,7 @@ library
, directory
, extra
, filepath
, http-c
lien
t
, http-c
ondui
t
, lens
, opaleye
, postgresql-simple
...
...
@@ -37,23 +37,24 @@ library
, pureMD5
, regex-compat
, semigroups
, servant-multipart
, servant-server
, servant
, servant-client
, servant-multipart
, servant-server
, split
-- , stemmer
, tagsoup
, text
, time
, time-locale-compat
, transformers
--, utc
, uuid
, vector
, wai
, warp
, yaml
, zlib
-- , stemmer
--, utc
exposed-modules: Data.Gargantext
, Data.Gargantext.Analysis
, Data.Gargantext.DSL
...
...
src/Data/Gargantext/NLP/CoreNLP.hs
View file @
67aecef7
...
...
@@ -7,16 +7,17 @@ module Data.Gargantext.NLP.CoreNLP where
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Proxy
import
GHC.Generics
import
Network.HTTP.Client
(
newManager
,
defaultManagerSettings
)
import
Servant.API
import
Servant.Client
import
Data.Gargantext.Prelude
import
Data.Gargantext.Utils.Prefix
(
unPrefix
)
import
Data.Text
(
Text
)
import
qualified
Data.ByteString.Char8
as
S8
import
qualified
Data.Yaml
as
Yaml
import
Network.HTTP.Simple
data
Token
=
Token
{
_tokenIndex
::
Int
,
_tokenWord
::
Text
,
_tokenOriginalText
::
Text
...
...
@@ -31,7 +32,7 @@ data Token = Token { _tokenIndex :: Int
$
(
deriveJSON
(
unPrefix
"_token"
)
''
T
oken
)
data
Sentence
=
Sentence
{
_sentenceIndex
::
Int
,
_sentenceToken
::
[
Token
]
,
_sentenceToken
s
::
[
Token
]
}
deriving
(
Show
,
Generic
)
$
(
deriveJSON
(
unPrefix
"_sentence"
)
''
S
entence
)
...
...
@@ -45,62 +46,38 @@ $(deriveJSON (unPrefix "_properties") ''Properties)
data
Sentences
=
Sentences
{
sentences
::
[
Sentence
]}
deriving
(
Show
,
Generic
)
instance
ToJSON
Sentences
-- API Client configuration
-- Example of Client Request :
-- wget --post-data 'Alexandre Grothendieck is a mathematician who lived in France which is a european country. There is another sentence here.' 'localhost:9000/?properties={"annotators": "tokenize,ssplit,pos,ner", "outputFormat": "json"}' -O
-- the result is Sentence as a JSON
-- {"sentences":[{"index":0,"tokens":[{"index":1,"word":"Alexandre","originalText":"Alexandre","lemma":"Alexandre","characterOffsetBegin":0,"characterOffsetEnd":9,"pos":"NNP","ner":"PERSON","before":"","after":" "},{"index":2,"word":"Grothendieck","originalText":"Grothendieck","lemma":"Grothendieck","characterOffsetBegin":10,"characterOffsetEnd":22,"pos":"NNP","ner":"PERSON","before":" ","after":" "},{"index":3,"word":"is","originalText":"is","lemma":"be","characterOffsetBegin":23,"characterOffsetEnd":25,"pos":"VBZ","ner":"O","before":" ","after":" "},{"index":4,"word":"a","originalText":"a","lemma":"a","characterOffsetBegin":26,"characterOffsetEnd":27,"pos":"DT","ner":"O","before":" ","after":" "},{"index":5,"word":"mathematician","originalText":"mathematician","lemma":"mathematician","characterOffsetBegin":28,"characterOffsetEnd":41,"pos":"NN","ner":"O","before":" ","after":" "},{"index":6,"word":"who","originalText":"who","lemma":"who","characterOffsetBegin":42,"characterOffsetEnd":45,"pos":"WP","ner":"O","before":" ","after":" "},{"index":7,"word":"lived","originalText":"lived","lemma":"live","characterOffsetBegin":46,"characterOffsetEnd":51,"pos":"VBD","ner":"O","before":" ","after":" "},{"index":8,"word":"in","originalText":"in","lemma":"in","characterOffsetBegin":52,"characterOffsetEnd":54,"pos":"IN","ner":"O","before":" ","after":" "},{"index":9,"word":"France","originalText":"France","lemma":"France","characterOffsetBegin":55,"characterOffsetEnd":61,"pos":"NNP","ner":"LOCATION","before":" ","after":" "},{"index":10,"word":"which","originalText":"which","lemma":"which","characterOffsetBegin":62,"characterOffsetEnd":67,"pos":"WDT","ner":"O","before":" ","after":" "},{"index":11,"word":"is","originalText":"is","lemma":"be","characterOffsetBegin":68,"characterOffsetEnd":70,"pos":"VBZ","ner":"O","before":" ","after":" "},{"index":12,"word":"a","originalText":"a","lemma":"a","characterOffsetBegin":71,"characterOffsetEnd":72,"pos":"DT","ner":"O","before":" ","after":" "},{"index":13,"word":"european","originalText":"european","lemma":"european","characterOffsetBegin":73,"characterOffsetEnd":81,"pos":"JJ","ner":"O","before":" ","after":" "},{"index":14,"word":"country","originalText":"country","lemma":"country","characterOffsetBegin":82,"characterOffsetEnd":89,"pos":"NN","ner":"O","before":" ","after":""},{"index":15,"word":".","originalText":".","lemma":".","characterOffsetBegin":89,"characterOffsetEnd":90,"pos":".","ner":"O","before":"","after":" "}]},{"index":1,"tokens":[{"index":1,"word":"There","originalText":"There","lemma":"there","characterOffsetBegin":91,"characterOffsetEnd":96,"pos":"EX","ner":"O","before":" ","after":" "},{"index":2,"word":"is","originalText":"is","lemma":"be","characterOffsetBegin":97,"characterOffsetEnd":99,"pos":"VBZ","ner":"O","before":" ","after":" "},{"index":3,"word":"another","originalText":"another","lemma":"another","characterOffsetBegin":100,"characterOffsetEnd":107,"pos":"DT","ner":"O","before":" ","after":" "},{"index":4,"word":"sentence","originalText":"sentence","lemma":"sentence","characterOffsetBegin":108,"characterOffsetEnd":116,"pos":"NN","ner":"O","before":" ","after":" "},{"index":5,"wo
type
API
=
""
:>
QueryParam
"properties"
Properties
:>
ReqBody
'[
J
SON
]
String
:>
Post
'[
J
SON
]
String
corenlp
::
Maybe
Properties
->
Text
->
ClientM
Sentence
corenlp
p
t
=
client
api
-- text2nlp :: Text -> ClientM
api
::
Proxy
API
api
=
Proxy
-- corenlp t = client api
-- | URI scheme to use
--data Scheme =
-- Http -- ^ http://
-- | Https -- ^ https://
--
---- | Simple data type to represent the target of HTTP requests
---- for servant's automatically-generated clients.
--data BaseUrl = BaseUrl
-- { baseUrlScheme :: Scheme -- ^ URI scheme to use
-- , baseUrlHost :: String -- ^ host (eg "haskell.org")
-- , baseUrlPort :: Int -- ^ port (eg 80)
-- , baseUrlPath :: String -- ^ path (eg "/a/b/c")
-- }
--
queries
::
ClientM
(
Text
,
Properties
)
queries
=
do
let
text
=
"Alexandre Grothendieck is free even in a sentence."
let
prop
=
Properties
"tokenize,ssplit,pos,ner"
"json"
return
(
text
,
prop
)
run
::
IO
()
run
=
do
manager
<-
newManager
defaultManagerSettings
res
<-
runClientM
queries
(
ClientEnv
manager
(
BaseUrl
Http
"localhost"
9000
""
))
case
res
of
Left
err
->
putStrLn
$
"Error: "
++
show
err
Right
x
->
do
print
x
instance
FromJSON
Sentences
corenlpPretty
::
String
->
IO
()
corenlpPretty
txt
=
do
let
url
=
"POST http://localhost:9000/?properties={
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
let
request
=
setRequestBodyJSON
txt
url
response
<-
httpJSON
request
-- putStrLn $ "The status code was: " ++
-- show (getResponseStatusCode response)
-- print $ getResponseHeader "Content-Type" response
S8
.
putStrLn
$
Yaml
.
encode
(
getResponseBody
response
::
Sentences
)
corenlp
::
String
->
IO
Sentences
corenlp
txt
=
do
let
url
=
"POST http://localhost:9000/?properties={
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
let
request
=
setRequestBodyJSON
txt
url
response
<-
httpJSON
request
pure
(
getResponseBody
response
::
Sentences
)
-- | parseWith
-- Part Of Speech example
-- parseWith _tokenPos "Hello world."
-- == [[("``","``"),("Hello","UH"),("world","NN"),(".","."),("''","''")]]
-- Named Entity Recognition example
-- parseWith _tokenNer "Hello world of Peter."
-- [[("``","O"),("Hello","O"),("world","O"),("of","O"),("Peter","PERSON"),(".","O"),("''","O")]]
parseWith
::
(
Token
->
t
)
->
String
->
IO
[[(
Text
,
t
)]]
parseWith
f
s
=
pm
(
pm
(
\
t
->
(
_tokenWord
t
,
f
t
)))
<$>
pm
_sentenceTokens
<$>
sentences
<$>
corenlp
s
src/Data/Gargantext/Types/Main.hs
View file @
67aecef7
...
...
@@ -21,16 +21,9 @@ import Data.Gargantext.Types.Node ( NodePoly
-- All the Database is structred like a hierachical Tree
-- Where a is a NodeType:
-- TODO force the logic of the architecture
data
Tree
a
=
Empty
|
Node'
a
(
Tree
a
)
(
Tree
a
)
deriving
(
Show
)
--gargTree :: Tree NodeType
--gargTree = Node' NodeUser Empty
-- (Node' Empty
-- (Project Empty Empty)
-- )
--
data
NodeType
=
NodeUser
|
Folder
|
Project
|
Corpus
|
Document
|
Favorites
...
...
@@ -44,9 +37,13 @@ data NodeType = NodeUser
-- | NodePoly indicates that Node has a Polymorphism Type
type
Node
json
=
NodePoly
Integer
NodeTypeId
Integer
Integer
Text
UTCTime
json
type
Node
json
=
NodePoly
NodeId
NodeTypeId
NodeUserId
NodeParentId
NodeName
UTCTime
json
-- type Node json = NodePoly NodeId NodeTypeId UserId ParentId NodeName UTCTime json
type
NodeTypeId
=
Int
type
NodeTypeId
=
Int
type
NodeId
=
Int
type
NodeParentId
=
Int
type
NodeUserId
=
Int
type
NodeName
=
Text
--type NodeUser = Node HyperdataUser
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment