Commit ad6d6d54 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] API data external connection (HAL)

parent 5478dcc5
......@@ -9,8 +9,6 @@ Portability : POSIX
-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Core.Text.Corpus.API
( ExternalAPIs(..)
, Query
......@@ -30,19 +28,22 @@ import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX
import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED
-- | TODO put in gargantext.init
default_limit :: Maybe Integer
default_limit = Just 10000
-- | Get External API metadata main function
get :: ExternalAPIs
-> Lang
-> Query
-> Maybe Limit
-> IO [HyperdataDocument]
get PubMed _la q l = PUBMED.get q l -- EN only by default
get HAL la q l = HAL.get la q l
get IsTex la q l = ISTEX.get la q l
get Isidore la q l = ISIDORE.get la (fromIntegral <$> l) (Just q) Nothing
get PubMed _la q _l = PUBMED.get q default_limit -- EN only by default
get HAL la q _l = HAL.get la q default_limit
get IsTex la q _l = ISTEX.get la q default_limit
get Isidore la q _l = ISIDORE.get la (fromIntegral <$> default_limit) (Just q) Nothing
get _ _ _ _ = undefined
-- | Some Sugar for the documentation
type Query = PUBMED.Query
type Limit = PUBMED.Limit
......@@ -9,7 +9,6 @@ Portability : POSIX
-}
module Gargantext.Core.Text.Corpus.API.Hal
where
......@@ -26,11 +25,11 @@ import qualified HAL.Doc.Corpus as HAL
get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
get la q ml = do
docs <- HAL.getMetadataWith q (fromIntegral <$> ml)
docs <- HAL.getMetadataWith q (Just 0) (fromIntegral <$> ml)
either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) docs
toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument
toDoc' la (HAL.Corpus i t ab d s aus affs) = do
toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do
(utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") Just d)
pure $ HyperdataDocument (Just "Hal")
(Just $ pack $ show i)
......@@ -40,7 +39,7 @@ toDoc' la (HAL.Corpus i t ab d s aus affs) = do
Nothing
(Just $ intercalate " " t)
(Just $ foldl (\x y -> x <> ", " <> y) "" aus)
(Just $ foldl (\x y -> x <> ", " <> y) "" affs)
(Just $ foldl (\x y -> x <> ", " <> y) "" $ affs <> map (cs . show) struct_id)
(Just $ maybe "Nothing" identity s)
(Just $ intercalate " " ab)
(fmap (pack . show) utctime)
......
......@@ -32,8 +32,8 @@ type Limit = PubMed.Limit
-- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs
get :: Query -> Maybe Limit -> IO [HyperdataDocument]
get q _l = either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
<$> PubMed.getMetadataWith q (Just 10000)
get q l = either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
<$> PubMed.getMetadataWith q l
toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
toDoc l (PubMedDoc.PubMed (PubMedDoc.PubMedArticle t j as aus)
......
......@@ -124,6 +124,7 @@ getDataText :: FlowCmdM env err m
getDataText (ExternalOrigin api) la q li = liftBase $ DataNew
<$> splitEvery 500
<$> API.get api (_tt_lang la) q li
getDataText (InternalOrigin _) _la q _li = do
(_masterUserId, _masterRootId, cId) <- getOrMk_RootWithCorpus
(UserName userMaster)
......
......@@ -40,7 +40,7 @@ extra-deps:
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: daeae80365250c4bd539f0a65e271f9aa37f731f
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
commit: ef9e638c97788df251f50b71fcdd9551b87f12c5
commit: 95e8f01a5d3b404a14a7fc664996569a6fb41ec4
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b
#
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment