Commit d54d5f06 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[hal] use conduit for hal parsing

This still isn't optimal because in our code we use lists, so this
will fetch everything (using conduit), coerce into a list and then
process.
parent b1a0ce6f
Pipeline #2508 failed with stage
in 9 minutes and 1 second
......@@ -18,6 +18,7 @@ New corpus means either:
module Gargantext.API.Node.Corpus.New
where
import Control.Lens hiding (elements, Empty)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
......
......@@ -39,7 +39,7 @@ get :: ExternalAPIs
-> Maybe Limit
-> IO [HyperdataDocument]
get PubMed _la q _l = PUBMED.get q default_limit -- EN only by default
get HAL la q _l = HAL.get la q default_limit
get HAL la q _l = HAL.getC la q Nothing
get IsTex la q _l = ISTEX.get la q default_limit
get Isidore la q _l = ISIDORE.get la (fromIntegral <$> default_limit) (Just q) Nothing
get _ _ _ _ = undefined
......
......@@ -12,6 +12,8 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API.Hal
where
import Conduit
import Data.Either
import Data.Maybe
import Data.Text (Text, pack, intercalate)
......@@ -25,8 +27,15 @@ import qualified HAL.Doc.Corpus as HAL
get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
get la q ml = do
docs <- HAL.getMetadataWith q (Just 0) (fromIntegral <$> ml)
either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) docs
eDocs <- HAL.getMetadataWith q (Just 0) ml
either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) eDocs
getC :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
getC la q ml = do
eDocs <- HAL.getMetadataRecursively q (Just 0) ml
case eDocs of
Left err -> panic $ pack $ show err
Right docsC -> runConduit $ docsC .| mapMC (toDoc' la) .| sinkList
toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument
toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do
......
......@@ -73,7 +73,7 @@ extra-deps:
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: daeae80365250c4bd539f0a65e271f9aa37f731f
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
commit: 020f5f9b308f5c23c925aedf5fb11f8b4728fb19
commit: a51cb46fdf8622ec48b29206a40a37bbce1380bc
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment