Commit 67aecef7 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[NLP] parseWith function and improving types clarity.

parent 034ed3de
...@@ -26,7 +26,7 @@ library ...@@ -26,7 +26,7 @@ library
, directory , directory
, extra , extra
, filepath , filepath
, http-client , http-conduit
, lens , lens
, opaleye , opaleye
, postgresql-simple , postgresql-simple
...@@ -37,23 +37,24 @@ library ...@@ -37,23 +37,24 @@ library
, pureMD5 , pureMD5
, regex-compat , regex-compat
, semigroups , semigroups
, servant-multipart
, servant-server
, servant , servant
, servant-client , servant-client
, servant-multipart
, servant-server
, split , split
-- , stemmer
, tagsoup , tagsoup
, text , text
, time , time
, time-locale-compat , time-locale-compat
, transformers , transformers
--, utc
, uuid , uuid
, vector , vector
, wai , wai
, warp , warp
, yaml
, zlib , zlib
-- , stemmer
--, utc
exposed-modules: Data.Gargantext exposed-modules: Data.Gargantext
, Data.Gargantext.Analysis , Data.Gargantext.Analysis
, Data.Gargantext.DSL , Data.Gargantext.DSL
......
...@@ -7,16 +7,17 @@ module Data.Gargantext.NLP.CoreNLP where ...@@ -7,16 +7,17 @@ module Data.Gargantext.NLP.CoreNLP where
import Data.Aeson import Data.Aeson
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Proxy
import GHC.Generics import GHC.Generics
import Network.HTTP.Client (newManager, defaultManagerSettings)
import Servant.API
import Servant.Client
import Data.Gargantext.Prelude import Data.Gargantext.Prelude
import Data.Gargantext.Utils.Prefix (unPrefix) import Data.Gargantext.Utils.Prefix (unPrefix)
import Data.Text (Text) import Data.Text (Text)
import qualified Data.ByteString.Char8 as S8
import qualified Data.Yaml as Yaml
import Network.HTTP.Simple
data Token = Token { _tokenIndex :: Int data Token = Token { _tokenIndex :: Int
, _tokenWord :: Text , _tokenWord :: Text
, _tokenOriginalText :: Text , _tokenOriginalText :: Text
...@@ -31,7 +32,7 @@ data Token = Token { _tokenIndex :: Int ...@@ -31,7 +32,7 @@ data Token = Token { _tokenIndex :: Int
$(deriveJSON (unPrefix "_token") ''Token) $(deriveJSON (unPrefix "_token") ''Token)
data Sentence = Sentence { _sentenceIndex :: Int data Sentence = Sentence { _sentenceIndex :: Int
, _sentenceToken :: [Token] , _sentenceTokens :: [Token]
} deriving (Show, Generic) } deriving (Show, Generic)
$(deriveJSON (unPrefix "_sentence") ''Sentence) $(deriveJSON (unPrefix "_sentence") ''Sentence)
...@@ -45,62 +46,38 @@ $(deriveJSON (unPrefix "_properties") ''Properties) ...@@ -45,62 +46,38 @@ $(deriveJSON (unPrefix "_properties") ''Properties)
data Sentences = Sentences { sentences :: [Sentence]} data Sentences = Sentences { sentences :: [Sentence]}
deriving (Show, Generic) deriving (Show, Generic)
instance ToJSON Sentences instance ToJSON Sentences
instance FromJSON Sentences
-- API Client configuration
-- Example of Client Request :
-- wget --post-data 'Alexandre Grothendieck is a mathematician who lived in France which is a european country. There is another sentence here.' 'localhost:9000/?properties={"annotators": "tokenize,ssplit,pos,ner", "outputFormat": "json"}' -O corenlpPretty :: String -> IO ()
corenlpPretty txt = do
-- the result is Sentence as a JSON let url = "POST http://localhost:9000/?properties={\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
-- {"sentences":[{"index":0,"tokens":[{"index":1,"word":"Alexandre","originalText":"Alexandre","lemma":"Alexandre","characterOffsetBegin":0,"characterOffsetEnd":9,"pos":"NNP","ner":"PERSON","before":"","after":" "},{"index":2,"word":"Grothendieck","originalText":"Grothendieck","lemma":"Grothendieck","characterOffsetBegin":10,"characterOffsetEnd":22,"pos":"NNP","ner":"PERSON","before":" ","after":" "},{"index":3,"word":"is","originalText":"is","lemma":"be","characterOffsetBegin":23,"characterOffsetEnd":25,"pos":"VBZ","ner":"O","before":" ","after":" "},{"index":4,"word":"a","originalText":"a","lemma":"a","characterOffsetBegin":26,"characterOffsetEnd":27,"pos":"DT","ner":"O","before":" ","after":" "},{"index":5,"word":"mathematician","originalText":"mathematician","lemma":"mathematician","characterOffsetBegin":28,"characterOffsetEnd":41,"pos":"NN","ner":"O","before":" ","after":" "},{"index":6,"word":"who","originalText":"who","lemma":"who","characterOffsetBegin":42,"characterOffsetEnd":45,"pos":"WP","ner":"O","before":" ","after":" "},{"index":7,"word":"lived","originalText":"lived","lemma":"live","characterOffsetBegin":46,"characterOffsetEnd":51,"pos":"VBD","ner":"O","before":" ","after":" "},{"index":8,"word":"in","originalText":"in","lemma":"in","characterOffsetBegin":52,"characterOffsetEnd":54,"pos":"IN","ner":"O","before":" ","after":" "},{"index":9,"word":"France","originalText":"France","lemma":"France","characterOffsetBegin":55,"characterOffsetEnd":61,"pos":"NNP","ner":"LOCATION","before":" ","after":" "},{"index":10,"word":"which","originalText":"which","lemma":"which","characterOffsetBegin":62,"characterOffsetEnd":67,"pos":"WDT","ner":"O","before":" ","after":" "},{"index":11,"word":"is","originalText":"is","lemma":"be","characterOffsetBegin":68,"characterOffsetEnd":70,"pos":"VBZ","ner":"O","before":" ","after":" "},{"index":12,"word":"a","originalText":"a","lemma":"a","characterOffsetBegin":71,"characterOffsetEnd":72,"pos":"DT","ner":"O","before":" ","after":" "},{"index":13,"word":"european","originalText":"european","lemma":"european","characterOffsetBegin":73,"characterOffsetEnd":81,"pos":"JJ","ner":"O","before":" ","after":" "},{"index":14,"word":"country","originalText":"country","lemma":"country","characterOffsetBegin":82,"characterOffsetEnd":89,"pos":"NN","ner":"O","before":" ","after":""},{"index":15,"word":".","originalText":".","lemma":".","characterOffsetBegin":89,"characterOffsetEnd":90,"pos":".","ner":"O","before":"","after":" "}]},{"index":1,"tokens":[{"index":1,"word":"There","originalText":"There","lemma":"there","characterOffsetBegin":91,"characterOffsetEnd":96,"pos":"EX","ner":"O","before":" ","after":" "},{"index":2,"word":"is","originalText":"is","lemma":"be","characterOffsetBegin":97,"characterOffsetEnd":99,"pos":"VBZ","ner":"O","before":" ","after":" "},{"index":3,"word":"another","originalText":"another","lemma":"another","characterOffsetBegin":100,"characterOffsetEnd":107,"pos":"DT","ner":"O","before":" ","after":" "},{"index":4,"word":"sentence","originalText":"sentence","lemma":"sentence","characterOffsetBegin":108,"characterOffsetEnd":116,"pos":"NN","ner":"O","before":" ","after":" "},{"index":5,"wo let request = setRequestBodyJSON txt url
response <- httpJSON request
type API = "" :> QueryParam "properties" Properties :> ReqBody '[JSON] String :> Post '[JSON] String -- putStrLn $ "The status code was: " ++
-- show (getResponseStatusCode response)
corenlp :: Maybe Properties -> Text -> ClientM Sentence -- print $ getResponseHeader "Content-Type" response
corenlp p t = client api S8.putStrLn $ Yaml.encode (getResponseBody response :: Sentences)
-- text2nlp :: Text -> ClientM corenlp :: String -> IO Sentences
corenlp txt = do
api :: Proxy API let url = "POST http://localhost:9000/?properties={\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
api = Proxy let request = setRequestBodyJSON txt url
response <- httpJSON request
pure (getResponseBody response :: Sentences)
-- corenlp t = client api
-- | parseWith
-- | URI scheme to use -- Part Of Speech example
--data Scheme = -- parseWith _tokenPos "Hello world."
-- Http -- ^ http:// -- == [[("``","``"),("Hello","UH"),("world","NN"),(".","."),("''","''")]]
-- | Https -- ^ https://
-- -- Named Entity Recognition example
---- | Simple data type to represent the target of HTTP requests -- parseWith _tokenNer "Hello world of Peter."
---- for servant's automatically-generated clients. -- [[("``","O"),("Hello","O"),("world","O"),("of","O"),("Peter","PERSON"),(".","O"),("''","O")]]
--data BaseUrl = BaseUrl parseWith :: (Token -> t) -> String -> IO [[(Text, t)]]
-- { baseUrlScheme :: Scheme -- ^ URI scheme to use parseWith f s = pm (pm (\t -> (_tokenWord t, f t))) <$> pm _sentenceTokens <$> sentences <$> corenlp s
-- , baseUrlHost :: String -- ^ host (eg "haskell.org")
-- , baseUrlPort :: Int -- ^ port (eg 80)
-- , baseUrlPath :: String -- ^ path (eg "/a/b/c")
-- }
--
queries :: ClientM (Text, Properties)
queries = do
let text = "Alexandre Grothendieck is free even in a sentence."
let prop = Properties "tokenize,ssplit,pos,ner" "json"
return (text, prop)
run :: IO ()
run = do
manager <- newManager defaultManagerSettings
res <- runClientM queries (ClientEnv manager (BaseUrl Http "localhost" 9000 ""))
case res of
Left err -> putStrLn $ "Error: " ++ show err
Right x -> do
print x
...@@ -21,16 +21,9 @@ import Data.Gargantext.Types.Node ( NodePoly ...@@ -21,16 +21,9 @@ import Data.Gargantext.Types.Node ( NodePoly
-- All the Database is structred like a hierachical Tree -- All the Database is structred like a hierachical Tree
-- Where a is a NodeType: -- Where a is a NodeType:
-- TODO force the logic of the architecture
data Tree a = Empty | Node' a (Tree a) (Tree a) deriving (Show) data Tree a = Empty | Node' a (Tree a) (Tree a) deriving (Show)
--gargTree :: Tree NodeType
--gargTree = Node' NodeUser Empty
-- (Node' Empty
-- (Project Empty Empty)
-- )
--
data NodeType = NodeUser data NodeType = NodeUser
| Folder | Project | Corpus | Document | Folder | Project | Corpus | Document
| Favorites | Favorites
...@@ -44,9 +37,13 @@ data NodeType = NodeUser ...@@ -44,9 +37,13 @@ data NodeType = NodeUser
-- | NodePoly indicates that Node has a Polymorphism Type -- | NodePoly indicates that Node has a Polymorphism Type
type Node json = NodePoly Integer NodeTypeId Integer Integer Text UTCTime json type Node json = NodePoly NodeId NodeTypeId NodeUserId NodeParentId NodeName UTCTime json
-- type Node json = NodePoly NodeId NodeTypeId UserId ParentId NodeName UTCTime json -- type Node json = NodePoly NodeId NodeTypeId UserId ParentId NodeName UTCTime json
type NodeTypeId = Int type NodeTypeId = Int
type NodeId = Int
type NodeParentId = Int
type NodeUserId = Int
type NodeName = Text
--type NodeUser = Node HyperdataUser --type NodeUser = Node HyperdataUser
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment