Commit dc3cd704 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] NLP functions. Servant ClientM need to be fixed.

parent 1c3d5cfd
......@@ -39,6 +39,8 @@ library
, semigroups
, servant-multipart
, servant-server
, servant
, servant-client
, split
-- , stemmer
, tagsoup
......@@ -55,6 +57,8 @@ library
exposed-modules: Data.Gargantext
, Data.Gargantext.Analysis
, Data.Gargantext.DSL
, Data.Gargantext.NLP
, Data.Gargantext.NLP.CoreNLP
, Data.Gargantext.Database
, Data.Gargantext.Database.Instances
, Data.Gargantext.Database.Ngram
......
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TemplateHaskell #-}
module Data.Gargantext.NLP.CoreNLP where
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Proxy
import GHC.Generics
import Network.HTTP.Client (newManager, defaultManagerSettings)
import Servant.API
import Servant.Client
import Data.Gargantext.Prelude
import Data.Gargantext.Utils.Prefix (unPrefix)
import Data.Text (Text)
data Token = Token { _tokenIndex :: Int
, _tokenWord :: Text
, _tokenOriginalText :: Text
, _tokenLemma :: Text
, _tokenCharacterOffsetBegin :: Int
, _tokenCharacterOffsetEnd :: Int
, _tokenPos :: Text
, _tokenNer :: Text
, _tokenBefore :: Text
, _tokenAfter :: Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_token") ''Token)
data Sentence = Sentence { _sentenceIndex :: Int
, _sentenceToken :: [Token]
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_sentence") ''Sentence)
data Properties = Properties { _propertiesAnnotators :: Text
, _propertiesOutputFormat :: Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_properties") ''Properties)
data Sentences = Sentences { sentences :: [Sentence]}
deriving (Show, Generic)
instance ToJSON Sentences
-- API Client configuration
-- Example of Client Request :
-- wget --post-data 'Alexandre Grothendieck is a mathematician who lived in France which is a european country. There is another sentence here.' 'localhost:9000/?properties={"annotators": "tokenize,ssplit,pos,ner", "outputFormat": "json"}' -O
-- the result is Sentence as a JSON
-- {"sentences":[{"index":0,"tokens":[{"index":1,"word":"Alexandre","originalText":"Alexandre","lemma":"Alexandre","characterOffsetBegin":0,"characterOffsetEnd":9,"pos":"NNP","ner":"PERSON","before":"","after":" "},{"index":2,"word":"Grothendieck","originalText":"Grothendieck","lemma":"Grothendieck","characterOffsetBegin":10,"characterOffsetEnd":22,"pos":"NNP","ner":"PERSON","before":" ","after":" "},{"index":3,"word":"is","originalText":"is","lemma":"be","characterOffsetBegin":23,"characterOffsetEnd":25,"pos":"VBZ","ner":"O","before":" ","after":" "},{"index":4,"word":"a","originalText":"a","lemma":"a","characterOffsetBegin":26,"characterOffsetEnd":27,"pos":"DT","ner":"O","before":" ","after":" "},{"index":5,"word":"mathematician","originalText":"mathematician","lemma":"mathematician","characterOffsetBegin":28,"characterOffsetEnd":41,"pos":"NN","ner":"O","before":" ","after":" "},{"index":6,"word":"who","originalText":"who","lemma":"who","characterOffsetBegin":42,"characterOffsetEnd":45,"pos":"WP","ner":"O","before":" ","after":" "},{"index":7,"word":"lived","originalText":"lived","lemma":"live","characterOffsetBegin":46,"characterOffsetEnd":51,"pos":"VBD","ner":"O","before":" ","after":" "},{"index":8,"word":"in","originalText":"in","lemma":"in","characterOffsetBegin":52,"characterOffsetEnd":54,"pos":"IN","ner":"O","before":" ","after":" "},{"index":9,"word":"France","originalText":"France","lemma":"France","characterOffsetBegin":55,"characterOffsetEnd":61,"pos":"NNP","ner":"LOCATION","before":" ","after":" "},{"index":10,"word":"which","originalText":"which","lemma":"which","characterOffsetBegin":62,"characterOffsetEnd":67,"pos":"WDT","ner":"O","before":" ","after":" "},{"index":11,"word":"is","originalText":"is","lemma":"be","characterOffsetBegin":68,"characterOffsetEnd":70,"pos":"VBZ","ner":"O","before":" ","after":" "},{"index":12,"word":"a","originalText":"a","lemma":"a","characterOffsetBegin":71,"characterOffsetEnd":72,"pos":"DT","ner":"O","before":" ","after":" "},{"index":13,"word":"european","originalText":"european","lemma":"european","characterOffsetBegin":73,"characterOffsetEnd":81,"pos":"JJ","ner":"O","before":" ","after":" "},{"index":14,"word":"country","originalText":"country","lemma":"country","characterOffsetBegin":82,"characterOffsetEnd":89,"pos":"NN","ner":"O","before":" ","after":""},{"index":15,"word":".","originalText":".","lemma":".","characterOffsetBegin":89,"characterOffsetEnd":90,"pos":".","ner":"O","before":"","after":" "}]},{"index":1,"tokens":[{"index":1,"word":"There","originalText":"There","lemma":"there","characterOffsetBegin":91,"characterOffsetEnd":96,"pos":"EX","ner":"O","before":" ","after":" "},{"index":2,"word":"is","originalText":"is","lemma":"be","characterOffsetBegin":97,"characterOffsetEnd":99,"pos":"VBZ","ner":"O","before":" ","after":" "},{"index":3,"word":"another","originalText":"another","lemma":"another","characterOffsetBegin":100,"characterOffsetEnd":107,"pos":"DT","ner":"O","before":" ","after":" "},{"index":4,"word":"sentence","originalText":"sentence","lemma":"sentence","characterOffsetBegin":108,"characterOffsetEnd":116,"pos":"NN","ner":"O","before":" ","after":" "},{"index":5,"wo
type API = "" :> QueryParam "properties" Properties :> ReqBody '[JSON] String :> Post '[JSON] String
corenlp :: Maybe Properties -> Text -> ClientM Sentence
corenlp p t = client api
-- text2nlp :: Text -> ClientM
api :: Proxy API
api = Proxy
-- corenlp t = client api
-- | URI scheme to use
--data Scheme =
-- Http -- ^ http://
-- | Https -- ^ https://
--
---- | Simple data type to represent the target of HTTP requests
---- for servant's automatically-generated clients.
--data BaseUrl = BaseUrl
-- { baseUrlScheme :: Scheme -- ^ URI scheme to use
-- , baseUrlHost :: String -- ^ host (eg "haskell.org")
-- , baseUrlPort :: Int -- ^ port (eg 80)
-- , baseUrlPath :: String -- ^ path (eg "/a/b/c")
-- }
--
queries :: ClientM (Text, Properties)
queries = do
let text = "Alexandre Grothendieck is free even in a sentence."
let prop = Properties "tokenize,ssplit,pos,ner" "json"
return (text, prop)
run :: IO ()
run = do
manager <- newManager defaultManagerSettings
res <- runClientM queries (ClientEnv manager (BaseUrl Http "localhost" 9000 ""))
case res of
Left err -> putStrLn $ "Error: " ++ show err
Right x -> do
print x
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment