[corenlp] better debugging for corenlp errors

Also, some openalex fixes.
parent 5d0461e3
Pipeline #4493 failed with stages
in 10 minutes and 13 seconds
......@@ -99,7 +99,7 @@ source-repository-package
source-repository-package
type: git
location: https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git
tag: dab07cb89e8ab8eaaff8619f5e21d944d9c526ab
tag: 1cf872fb3bd0e3e44af31247833c4b6bb7d0dca5
source-repository-package
type: git
......
......@@ -26,7 +26,7 @@ get :: Text
-> Maybe Limit
-> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get _email q lang mLimit = do
let limit = getLimit $ fromMaybe 10000 mLimit
let limit = getLimit $ fromMaybe 1000 mLimit
let mFilter = (\l -> "language:" <> l) <$> toISO639Lang lang
eRes <- OA.fetchWorksC Nothing mFilter $ Just $ Corpus.getRawQuery q
pure $ (\(len, docsC) -> (len, docsC .| takeC limit .| mapC toDoc)) <$> eRes
......
......@@ -22,11 +22,13 @@ Source: https://en.wikipedia.org/wiki/Part-of-speech_tagging
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.Core.Text.Terms.Multi.PosTagging
where
module Gargantext.Core.Text.Terms.Multi.PosTagging where
import Control.Exception (catch, throwIO)
import Data.Aeson
import Data.ByteString.Lazy.Char8 qualified as BSL
import Data.ByteString.Lazy.Internal (ByteString)
import Data.Map qualified as Map
import Data.Set (fromList)
import Data.Text (Text, splitOn, pack, toLower)
import Gargantext.Core (Lang(..))
......@@ -35,8 +37,6 @@ import Gargantext.Core.Types
import Gargantext.Prelude
import Network.HTTP.Simple
import Network.URI (URI(..))
import qualified Data.ByteString.Lazy.Char8 as BSL
import qualified Data.Map as Map
-- import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
......@@ -82,7 +82,15 @@ corenlp' uri lang txt = do
req <- parseRequest $
"POST " <> show (uri { uriQuery = "?properties=" <> (BSL.unpack $ encode $ toJSON $ Map.fromList properties) })
-- curl -XPOST 'http://localhost:9000/?properties=%7B%22annotators%22:%20%22tokenize,ssplit,pos,ner%22,%20%22outputFormat%22:%20%22json%22%7D' -d 'hello world, hello' | jq .
httpJSON $ setRequestBodyLBS (cs txt) req
-- printDebug "[corenlp] sending body" $ (cs txt :: ByteString)
catch (httpJSON $ setRequestBodyLBS (cs txt) req) $ \e ->
case e of
JSONParseException _req res _err -> do
let body = getResponseBody res
printDebug "[corenlp'] request text" (cs txt :: ByteString)
printDebug "[corenlp'] response body (error)" body
throwIO e
JSONConversionException _req _res _err -> throwIO e
where
properties_ :: [(Text, Text)]
properties_ = case lang of
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment