Commit d0039f33 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] IsTex basic working for simple queries without quotes

parent db6214d3
......@@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack
name: gargantext
version: 0.0.5.8.9.5
version: 0.0.5.8.9.5
synopsis: Search, map, share
description: Please see README.md
category: Data
......
......@@ -26,6 +26,7 @@ import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
import qualified ISTEX as ISTEX
import qualified ISTEX.Client as ISTEX
get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
get la q _ml = do
--docs <- ISTEX.getMetadataWith q (fromIntegral <$> ml)
......@@ -34,7 +35,7 @@ get la q _ml = do
--printDebug "[Istex.get] calling getMetadataScrollProgress for ml" ml
-- The "scroll" expects "d/h/m/s/ms" time interval. Let's set it to "1 month"
--eDocs <- ISTEX.getMetadataScroll q ((\_n -> pack $ "1m") <$> ml) Nothing 0 --(fromIntegral <$> ml)
eDocs <- ISTEX.getMetadataScroll q "1m" Nothing 0 --(fromIntegral <$> ml)
eDocs <- ISTEX.getMetadataScroll (q <> " abstract:*") "1m" Nothing 0 --(fromIntegral <$> ml)
printDebug "[Istex.get] will print length" (0 :: Int)
case eDocs of
Left _ -> pure ()
......@@ -57,15 +58,17 @@ toDoc' la docs' = mapM (toDoc la) (ISTEX._documents_hits docs')
-- TODO current year as default
toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument
toDoc la (ISTEX.Document i t a ab d s) = do
--printDebug "ISTEX date" d
(utctime, (pub_year, pub_month, pub_day)) <-
Date.dateSplit la (maybe (Just $ pack $ show Defaults.year) (Just . pack . show) d)
pure $ HyperdataDocument { _hd_bdd = Just "Istex"
, _hd_doi = Just i
, _hd_url = Nothing
, _hd_uniqId = Nothing
--printDebug "toDoc Istex" (utctime, (pub_year, pub_month, pub_day))
pure $ HyperdataDocument { _hd_bdd = Just "Istex"
, _hd_doi = Just i
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = t
, _hd_page = Nothing
, _hd_title = t
, _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" (map ISTEX._author_name a)
, _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a)
, _hd_source = Just $ foldl (\x y -> x <> ", " <> y) "" (catMaybes $ map ISTEX._source_title s)
......@@ -77,5 +80,5 @@ toDoc la (ISTEX.Document i t a ab d s) = do
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (pack . show) la }
, _hd_language_iso2 = Just $ (pack . show) la
}
......@@ -67,14 +67,15 @@ type Day = Int
-- | Date Parser
-- Parses dates mentions in full text given the language.
-- >>> parse FR (pack "10 avril 1900 à 19H")
-- 1900-04-10 19:00:00 UTC
-- >>> parse EN (pack "April 10 1900")
-- 1900-04-10 00:00:00 UTC
-- >>> parse FR (pack "1 avril 1900 à 19H")
-- 1900-04-01 19:00:00 UTC
-- >>> parse EN (pack "April 1 1900")
-- 1900-04-01 00:00:00 UTC
parse :: Lang -> Text -> IO UTCTime
parse lang s = do
printDebug "Date: " s
--printDebug "Date: " s
dateStr' <- pure $ dateFlow (DucklingFailure s) -- parseRawSafe lang s
--printDebug "Date': " dateStr'
case dateFlow dateStr' of
DateFlowSuccess ok -> pure ok
_ -> withDebugMode (DebugMode True)
......@@ -93,7 +94,7 @@ data DateFlow = DucklingSuccess { ds_result :: Text }
| DucklingFailure { df_result :: Text }
| ReadFailure1 { rf1_result :: Text }
| ReadFailure2 { rf2_result :: Text }
| DateFlowSuccess { success :: UTCTime }
| DateFlowSuccess { success :: UTCTime }
| DateFlowFailure
deriving Show
......@@ -126,7 +127,7 @@ readDate txt = do
-- | To get Homogeinity of the languages
-- TODO : put this in a more generic place in the source code
parserLang :: Lang -> DC.Lang
parserLang FR = DC.FR
parserLang FR = DC.FR
parserLang EN = DC.EN
parserLang lang = panic $ "[G.C.T.C.P.Date] Lang not implemented" <> (cs $ show lang)
......
......@@ -73,7 +73,7 @@ extra-deps:
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: 02e03d9b856bd35d391f43da8525330f9d184615
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: 6821f41655e298fa1fac5021c5776a6aed2d18fd
commit: a34bb341236d82cf3d488210bc1d8448a98f5808
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
commit: 9a43470241690a19c1c381c42a62c5dd4e28dff2
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment