[corenlp] better debugging for corenlp errors

Also, some openalex fixes.
parent 5d0461e3
...@@ -99,7 +99,7 @@ source-repository-package ...@@ -99,7 +99,7 @@ source-repository-package
source-repository-package source-repository-package
type: git type: git
location: https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git location: https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git
tag: dab07cb89e8ab8eaaff8619f5e21d944d9c526ab tag: 1cf872fb3bd0e3e44af31247833c4b6bb7d0dca5
source-repository-package source-repository-package
type: git type: git
......
...@@ -26,7 +26,7 @@ get :: Text ...@@ -26,7 +26,7 @@ get :: Text
-> Maybe Limit -> Maybe Limit
-> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ())) -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get _email q lang mLimit = do get _email q lang mLimit = do
let limit = getLimit $ fromMaybe 10000 mLimit let limit = getLimit $ fromMaybe 1000 mLimit
let mFilter = (\l -> "language:" <> l) <$> toISO639Lang lang let mFilter = (\l -> "language:" <> l) <$> toISO639Lang lang
eRes <- OA.fetchWorksC Nothing mFilter $ Just $ Corpus.getRawQuery q eRes <- OA.fetchWorksC Nothing mFilter $ Just $ Corpus.getRawQuery q
pure $ (\(len, docsC) -> (len, docsC .| takeC limit .| mapC toDoc)) <$> eRes pure $ (\(len, docsC) -> (len, docsC .| takeC limit .| mapC toDoc)) <$> eRes
......
...@@ -22,11 +22,13 @@ Source: https://en.wikipedia.org/wiki/Part-of-speech_tagging ...@@ -22,11 +22,13 @@ Source: https://en.wikipedia.org/wiki/Part-of-speech_tagging
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
module Gargantext.Core.Text.Terms.Multi.PosTagging module Gargantext.Core.Text.Terms.Multi.PosTagging where
where
import Control.Exception (catch, throwIO)
import Data.Aeson import Data.Aeson
import Data.ByteString.Lazy.Char8 qualified as BSL
import Data.ByteString.Lazy.Internal (ByteString) import Data.ByteString.Lazy.Internal (ByteString)
import Data.Map qualified as Map
import Data.Set (fromList) import Data.Set (fromList)
import Data.Text (Text, splitOn, pack, toLower) import Data.Text (Text, splitOn, pack, toLower)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
...@@ -35,8 +37,6 @@ import Gargantext.Core.Types ...@@ -35,8 +37,6 @@ import Gargantext.Core.Types
import Gargantext.Prelude import Gargantext.Prelude
import Network.HTTP.Simple import Network.HTTP.Simple
import Network.URI (URI(..)) import Network.URI (URI(..))
import qualified Data.ByteString.Lazy.Char8 as BSL
import qualified Data.Map as Map
-- import qualified Gargantext.Utils.SpacyNLP as SpacyNLP -- import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
...@@ -82,7 +82,15 @@ corenlp' uri lang txt = do ...@@ -82,7 +82,15 @@ corenlp' uri lang txt = do
req <- parseRequest $ req <- parseRequest $
"POST " <> show (uri { uriQuery = "?properties=" <> (BSL.unpack $ encode $ toJSON $ Map.fromList properties) }) "POST " <> show (uri { uriQuery = "?properties=" <> (BSL.unpack $ encode $ toJSON $ Map.fromList properties) })
-- curl -XPOST 'http://localhost:9000/?properties=%7B%22annotators%22:%20%22tokenize,ssplit,pos,ner%22,%20%22outputFormat%22:%20%22json%22%7D' -d 'hello world, hello' | jq . -- curl -XPOST 'http://localhost:9000/?properties=%7B%22annotators%22:%20%22tokenize,ssplit,pos,ner%22,%20%22outputFormat%22:%20%22json%22%7D' -d 'hello world, hello' | jq .
httpJSON $ setRequestBodyLBS (cs txt) req -- printDebug "[corenlp] sending body" $ (cs txt :: ByteString)
catch (httpJSON $ setRequestBodyLBS (cs txt) req) $ \e ->
case e of
JSONParseException _req res _err -> do
let body = getResponseBody res
printDebug "[corenlp'] request text" (cs txt :: ByteString)
printDebug "[corenlp'] response body (error)" body
throwIO e
JSONConversionException _req _res _err -> throwIO e
where where
properties_ :: [(Text, Text)] properties_ :: [(Text, Text)]
properties_ = case lang of properties_ = case lang of
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment