{-| Module : Gargantext.Utils.SpacyNLP Description : John Snow NLP API connexion Copyright : (c) CNRS, 2017 License : AGPL + CECILL v3 Maintainer : team@gargantext.org Stability : experimental Portability : POSIX Spacy ecosystem: https://github.com/explosion/spaCy Server to be used: https://gitlab.iscpif.fr/gargantext/spacy-server -} module Gargantext.Utils.SpacyNLP ( module Gargantext.Utils.SpacyNLP.Types , spacyRequest , spacyTagsToToken , spacyDataToPosSentences , nlp ) where import Data.Aeson (encode) import Gargantext.Core (Lang(..)) import Gargantext.Core.Text.Terms.Multi.PosTagging.Types ( PosSentences(PosSentences), Sentence(Sentence), Token(Token) ) import Gargantext.Prelude import Network.HTTP.Simple (parseRequest, httpJSON, setRequestBodyLBS, getResponseBody, Response) import Network.URI (URI(..)) import Gargantext.Utils.SpacyNLP.Types spacyRequest :: URI -> Text -> IO SpacyData spacyRequest uri txt = do req <- parseRequest $ "POST " <> show (uri { uriPath = "/pos" }) let request = setRequestBodyLBS (encode $ SpacyRequest txt) req result <- httpJSON request :: IO (Response SpacyData) pure $ getResponseBody result ---------------------------------------------------------------- spacyTagsToToken :: SpacyTags -> Token spacyTagsToToken st = Token (_spacyTags_index st) (_spacyTags_normalized st) (_spacyTags_text st) (_spacyTags_lemma st) (_spacyTags_head_index st) (_spacyTags_char_offset st) (Just $ _spacyTags_pos st) (Just $ _spacyTags_ent_type st) (Just $ _spacyTags_prefix st) (Just $ _spacyTags_suffix st) spacyDataToPosSentences :: SpacyData -> PosSentences spacyDataToPosSentences (SpacyData ds) = PosSentences $ zipWith Sentence [1..] (map (\(SpacyText _ tags)-> map spacyTagsToToken tags) ds) ----------------------------------------------------------------- nlp :: URI -> Lang -> Text -> IO PosSentences nlp uri _lang txt = spacyDataToPosSentences <$> spacyRequest uri txt