Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
200
Issues
200
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
67aecef7
Commit
67aecef7
authored
Oct 03, 2017
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[NLP] parseWith function and improving types clarity.
parent
034ed3de
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
51 additions
and
76 deletions
+51
-76
gargantext.cabal
gargantext.cabal
+6
-5
CoreNLP.hs
src/Data/Gargantext/NLP/CoreNLP.hs
+38
-61
Main.hs
src/Data/Gargantext/Types/Main.hs
+7
-10
No files found.
gargantext.cabal
View file @
67aecef7
...
...
@@ -26,7 +26,7 @@ library
, directory
, extra
, filepath
, http-c
lien
t
, http-c
ondui
t
, lens
, opaleye
, postgresql-simple
...
...
@@ -37,23 +37,24 @@ library
, pureMD5
, regex-compat
, semigroups
, servant-multipart
, servant-server
, servant
, servant-client
, servant-multipart
, servant-server
, split
-- , stemmer
, tagsoup
, text
, time
, time-locale-compat
, transformers
--, utc
, uuid
, vector
, wai
, warp
, yaml
, zlib
-- , stemmer
--, utc
exposed-modules: Data.Gargantext
, Data.Gargantext.Analysis
, Data.Gargantext.DSL
...
...
src/Data/Gargantext/NLP/CoreNLP.hs
View file @
67aecef7
...
...
@@ -7,16 +7,17 @@ module Data.Gargantext.NLP.CoreNLP where
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Proxy
import
GHC.Generics
import
Network.HTTP.Client
(
newManager
,
defaultManagerSettings
)
import
Servant.API
import
Servant.Client
import
Data.Gargantext.Prelude
import
Data.Gargantext.Utils.Prefix
(
unPrefix
)
import
Data.Text
(
Text
)
import
qualified
Data.ByteString.Char8
as
S8
import
qualified
Data.Yaml
as
Yaml
import
Network.HTTP.Simple
data
Token
=
Token
{
_tokenIndex
::
Int
,
_tokenWord
::
Text
,
_tokenOriginalText
::
Text
...
...
@@ -31,7 +32,7 @@ data Token = Token { _tokenIndex :: Int
$
(
deriveJSON
(
unPrefix
"_token"
)
''
T
oken
)
data
Sentence
=
Sentence
{
_sentenceIndex
::
Int
,
_sentenceToken
::
[
Token
]
,
_sentenceToken
s
::
[
Token
]
}
deriving
(
Show
,
Generic
)
$
(
deriveJSON
(
unPrefix
"_sentence"
)
''
S
entence
)
...
...
@@ -45,62 +46,38 @@ $(deriveJSON (unPrefix "_properties") ''Properties)
data
Sentences
=
Sentences
{
sentences
::
[
Sentence
]}
deriving
(
Show
,
Generic
)
instance
ToJSON
Sentences
-- API Client configuration
-- Example of Client Request :
-- wget --post-data 'Alexandre Grothendieck is a mathematician who lived in France which is a european country. There is another sentence here.' 'localhost:9000/?properties={"annotators": "tokenize,ssplit,pos,ner", "outputFormat": "json"}' -O
-- the result is Sentence as a JSON
-- {"sentences":[{"index":0,"tokens":[{"index":1,"word":"Alexandre","originalText":"Alexandre","lemma":"Alexandre","characterOffsetBegin":0,"characterOffsetEnd":9,"pos":"NNP","ner":"PERSON","before":"","after":" "},{"index":2,"word":"Grothendieck","originalText":"Grothendieck","lemma":"Grothendieck","characterOffsetBegin":10,"characterOffsetEnd":22,"pos":"NNP","ner":"PERSON","before":" ","after":" "},{"index":3,"word":"is","originalText":"is","lemma":"be","characterOffsetBegin":23,"characterOffsetEnd":25,"pos":"VBZ","ner":"O","before":" ","after":" "},{"index":4,"word":"a","originalText":"a","lemma":"a","characterOffsetBegin":26,"characterOffsetEnd":27,"pos":"DT","ner":"O","before":" ","after":" "},{"index":5,"word":"mathematician","originalText":"mathematician","lemma":"mathematician","characterOffsetBegin":28,"characterOffsetEnd":41,"pos":"NN","ner":"O","before":" ","after":" "},{"index":6,"word":"who","originalText":"who","lemma":"who","characterOffsetBegin":42,"characterOffsetEnd":45,"pos":"WP","ner":"O","before":" ","after":" "},{"index":7,"word":"lived","originalText":"lived","lemma":"live","characterOffsetBegin":46,"characterOffsetEnd":51,"pos":"VBD","ner":"O","before":" ","after":" "},{"index":8,"word":"in","originalText":"in","lemma":"in","characterOffsetBegin":52,"characterOffsetEnd":54,"pos":"IN","ner":"O","before":" ","after":" "},{"index":9,"word":"France","originalText":"France","lemma":"France","characterOffsetBegin":55,"characterOffsetEnd":61,"pos":"NNP","ner":"LOCATION","before":" ","after":" "},{"index":10,"word":"which","originalText":"which","lemma":"which","characterOffsetBegin":62,"characterOffsetEnd":67,"pos":"WDT","ner":"O","before":" ","after":" "},{"index":11,"word":"is","originalText":"is","lemma":"be","characterOffsetBegin":68,"characterOffsetEnd":70,"pos":"VBZ","ner":"O","before":" ","after":" "},{"index":12,"word":"a","originalText":"a","lemma":"a","characterOffsetBegin":71,"characterOffsetEnd":72,"pos":"DT","ner":"O","before":" ","after":" "},{"index":13,"word":"european","originalText":"european","lemma":"european","characterOffsetBegin":73,"characterOffsetEnd":81,"pos":"JJ","ner":"O","before":" ","after":" "},{"index":14,"word":"country","originalText":"country","lemma":"country","characterOffsetBegin":82,"characterOffsetEnd":89,"pos":"NN","ner":"O","before":" ","after":""},{"index":15,"word":".","originalText":".","lemma":".","characterOffsetBegin":89,"characterOffsetEnd":90,"pos":".","ner":"O","before":"","after":" "}]},{"index":1,"tokens":[{"index":1,"word":"There","originalText":"There","lemma":"there","characterOffsetBegin":91,"characterOffsetEnd":96,"pos":"EX","ner":"O","before":" ","after":" "},{"index":2,"word":"is","originalText":"is","lemma":"be","characterOffsetBegin":97,"characterOffsetEnd":99,"pos":"VBZ","ner":"O","before":" ","after":" "},{"index":3,"word":"another","originalText":"another","lemma":"another","characterOffsetBegin":100,"characterOffsetEnd":107,"pos":"DT","ner":"O","before":" ","after":" "},{"index":4,"word":"sentence","originalText":"sentence","lemma":"sentence","characterOffsetBegin":108,"characterOffsetEnd":116,"pos":"NN","ner":"O","before":" ","after":" "},{"index":5,"wo
type
API
=
""
:>
QueryParam
"properties"
Properties
:>
ReqBody
'[
J
SON
]
String
:>
Post
'[
J
SON
]
String
corenlp
::
Maybe
Properties
->
Text
->
ClientM
Sentence
corenlp
p
t
=
client
api
-- text2nlp :: Text -> ClientM
api
::
Proxy
API
api
=
Proxy
-- corenlp t = client api
-- | URI scheme to use
--data Scheme =
-- Http -- ^ http://
-- | Https -- ^ https://
--
---- | Simple data type to represent the target of HTTP requests
---- for servant's automatically-generated clients.
--data BaseUrl = BaseUrl
-- { baseUrlScheme :: Scheme -- ^ URI scheme to use
-- , baseUrlHost :: String -- ^ host (eg "haskell.org")
-- , baseUrlPort :: Int -- ^ port (eg 80)
-- , baseUrlPath :: String -- ^ path (eg "/a/b/c")
-- }
--
queries
::
ClientM
(
Text
,
Properties
)
queries
=
do
let
text
=
"Alexandre Grothendieck is free even in a sentence."
let
prop
=
Properties
"tokenize,ssplit,pos,ner"
"json"
return
(
text
,
prop
)
run
::
IO
()
run
=
do
manager
<-
newManager
defaultManagerSettings
res
<-
runClientM
queries
(
ClientEnv
manager
(
BaseUrl
Http
"localhost"
9000
""
))
case
res
of
Left
err
->
putStrLn
$
"Error: "
++
show
err
Right
x
->
do
print
x
instance
FromJSON
Sentences
corenlpPretty
::
String
->
IO
()
corenlpPretty
txt
=
do
let
url
=
"POST http://localhost:9000/?properties={
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
let
request
=
setRequestBodyJSON
txt
url
response
<-
httpJSON
request
-- putStrLn $ "The status code was: " ++
-- show (getResponseStatusCode response)
-- print $ getResponseHeader "Content-Type" response
S8
.
putStrLn
$
Yaml
.
encode
(
getResponseBody
response
::
Sentences
)
corenlp
::
String
->
IO
Sentences
corenlp
txt
=
do
let
url
=
"POST http://localhost:9000/?properties={
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
let
request
=
setRequestBodyJSON
txt
url
response
<-
httpJSON
request
pure
(
getResponseBody
response
::
Sentences
)
-- | parseWith
-- Part Of Speech example
-- parseWith _tokenPos "Hello world."
-- == [[("``","``"),("Hello","UH"),("world","NN"),(".","."),("''","''")]]
-- Named Entity Recognition example
-- parseWith _tokenNer "Hello world of Peter."
-- [[("``","O"),("Hello","O"),("world","O"),("of","O"),("Peter","PERSON"),(".","O"),("''","O")]]
parseWith
::
(
Token
->
t
)
->
String
->
IO
[[(
Text
,
t
)]]
parseWith
f
s
=
pm
(
pm
(
\
t
->
(
_tokenWord
t
,
f
t
)))
<$>
pm
_sentenceTokens
<$>
sentences
<$>
corenlp
s
src/Data/Gargantext/Types/Main.hs
View file @
67aecef7
...
...
@@ -21,16 +21,9 @@ import Data.Gargantext.Types.Node ( NodePoly
-- All the Database is structred like a hierachical Tree
-- Where a is a NodeType:
-- TODO force the logic of the architecture
data
Tree
a
=
Empty
|
Node'
a
(
Tree
a
)
(
Tree
a
)
deriving
(
Show
)
--gargTree :: Tree NodeType
--gargTree = Node' NodeUser Empty
-- (Node' Empty
-- (Project Empty Empty)
-- )
--
data
NodeType
=
NodeUser
|
Folder
|
Project
|
Corpus
|
Document
|
Favorites
...
...
@@ -44,9 +37,13 @@ data NodeType = NodeUser
-- | NodePoly indicates that Node has a Polymorphism Type
type
Node
json
=
NodePoly
Integer
NodeTypeId
Integer
Integer
Text
UTCTime
json
type
Node
json
=
NodePoly
NodeId
NodeTypeId
NodeUserId
NodeParentId
NodeName
UTCTime
json
-- type Node json = NodePoly NodeId NodeTypeId UserId ParentId NodeName UTCTime json
type
NodeTypeId
=
Int
type
NodeTypeId
=
Int
type
NodeId
=
Int
type
NodeParentId
=
Int
type
NodeUserId
=
Int
type
NodeName
=
Text
--type NodeUser = Node HyperdataUser
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment