Commit ad6d6d54 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] API data external connection (HAL)

parent 5478dcc5
...@@ -9,8 +9,6 @@ Portability : POSIX ...@@ -9,8 +9,6 @@ Portability : POSIX
-} -}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Core.Text.Corpus.API module Gargantext.Core.Text.Corpus.API
( ExternalAPIs(..) ( ExternalAPIs(..)
, Query , Query
...@@ -30,19 +28,22 @@ import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE ...@@ -30,19 +28,22 @@ import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX
import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED
-- | TODO put in gargantext.init
default_limit :: Maybe Integer
default_limit = Just 10000
-- | Get External API metadata main function -- | Get External API metadata main function
get :: ExternalAPIs get :: ExternalAPIs
-> Lang -> Lang
-> Query -> Query
-> Maybe Limit -> Maybe Limit
-> IO [HyperdataDocument] -> IO [HyperdataDocument]
get PubMed _la q l = PUBMED.get q l -- EN only by default get PubMed _la q _l = PUBMED.get q default_limit -- EN only by default
get HAL la q l = HAL.get la q l get HAL la q _l = HAL.get la q default_limit
get IsTex la q l = ISTEX.get la q l get IsTex la q _l = ISTEX.get la q default_limit
get Isidore la q l = ISIDORE.get la (fromIntegral <$> l) (Just q) Nothing get Isidore la q _l = ISIDORE.get la (fromIntegral <$> default_limit) (Just q) Nothing
get _ _ _ _ = undefined get _ _ _ _ = undefined
-- | Some Sugar for the documentation -- | Some Sugar for the documentation
type Query = PUBMED.Query type Query = PUBMED.Query
type Limit = PUBMED.Limit type Limit = PUBMED.Limit
...@@ -9,7 +9,6 @@ Portability : POSIX ...@@ -9,7 +9,6 @@ Portability : POSIX
-} -}
module Gargantext.Core.Text.Corpus.API.Hal module Gargantext.Core.Text.Corpus.API.Hal
where where
...@@ -26,11 +25,11 @@ import qualified HAL.Doc.Corpus as HAL ...@@ -26,11 +25,11 @@ import qualified HAL.Doc.Corpus as HAL
get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument] get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
get la q ml = do get la q ml = do
docs <- HAL.getMetadataWith q (fromIntegral <$> ml) docs <- HAL.getMetadataWith q (Just 0) (fromIntegral <$> ml)
either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) docs either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) docs
toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument
toDoc' la (HAL.Corpus i t ab d s aus affs) = do toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do
(utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") Just d) (utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") Just d)
pure $ HyperdataDocument (Just "Hal") pure $ HyperdataDocument (Just "Hal")
(Just $ pack $ show i) (Just $ pack $ show i)
...@@ -40,7 +39,7 @@ toDoc' la (HAL.Corpus i t ab d s aus affs) = do ...@@ -40,7 +39,7 @@ toDoc' la (HAL.Corpus i t ab d s aus affs) = do
Nothing Nothing
(Just $ intercalate " " t) (Just $ intercalate " " t)
(Just $ foldl (\x y -> x <> ", " <> y) "" aus) (Just $ foldl (\x y -> x <> ", " <> y) "" aus)
(Just $ foldl (\x y -> x <> ", " <> y) "" affs) (Just $ foldl (\x y -> x <> ", " <> y) "" $ affs <> map (cs . show) struct_id)
(Just $ maybe "Nothing" identity s) (Just $ maybe "Nothing" identity s)
(Just $ intercalate " " ab) (Just $ intercalate " " ab)
(fmap (pack . show) utctime) (fmap (pack . show) utctime)
......
...@@ -32,8 +32,8 @@ type Limit = PubMed.Limit ...@@ -32,8 +32,8 @@ type Limit = PubMed.Limit
-- | TODO put default pubmed query in gargantext.ini -- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs -- by default: 10K docs
get :: Query -> Maybe Limit -> IO [HyperdataDocument] get :: Query -> Maybe Limit -> IO [HyperdataDocument]
get q _l = either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN)) get q l = either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
<$> PubMed.getMetadataWith q (Just 10000) <$> PubMed.getMetadataWith q l
toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
toDoc l (PubMedDoc.PubMed (PubMedDoc.PubMedArticle t j as aus) toDoc l (PubMedDoc.PubMed (PubMedDoc.PubMedArticle t j as aus)
......
...@@ -124,6 +124,7 @@ getDataText :: FlowCmdM env err m ...@@ -124,6 +124,7 @@ getDataText :: FlowCmdM env err m
getDataText (ExternalOrigin api) la q li = liftBase $ DataNew getDataText (ExternalOrigin api) la q li = liftBase $ DataNew
<$> splitEvery 500 <$> splitEvery 500
<$> API.get api (_tt_lang la) q li <$> API.get api (_tt_lang la) q li
getDataText (InternalOrigin _) _la q _li = do getDataText (InternalOrigin _) _la q _li = do
(_masterUserId, _masterRootId, cId) <- getOrMk_RootWithCorpus (_masterUserId, _masterRootId, cId) <- getOrMk_RootWithCorpus
(UserName userMaster) (UserName userMaster)
......
...@@ -40,7 +40,7 @@ extra-deps: ...@@ -40,7 +40,7 @@ extra-deps:
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: daeae80365250c4bd539f0a65e271f9aa37f731f commit: daeae80365250c4bd539f0a65e271f9aa37f731f
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
commit: ef9e638c97788df251f50b71fcdd9551b87f12c5 commit: 95e8f01a5d3b404a14a7fc664996569a6fb41ec4
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b
# #
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment