Commit b87c1360 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge remote-tracking branch 'origin/97-dev-istex-search' into dev

parents 90e9cdf2 fe959c1d
...@@ -64,6 +64,7 @@ library ...@@ -64,6 +64,7 @@ library
Gargantext.Database.Admin.Config Gargantext.Database.Admin.Config
Gargantext.Database.Admin.Types.Hyperdata Gargantext.Database.Admin.Types.Hyperdata
Gargantext.Database.Admin.Types.Node Gargantext.Database.Admin.Types.Node
Gargantext.Defaults
Gargantext.Core.Text Gargantext.Core.Text
Gargantext.Core.Text.Context Gargantext.Core.Text.Context
Gargantext.Core.Text.Corpus.Parsers Gargantext.Core.Text.Corpus.Parsers
......
...@@ -88,6 +88,7 @@ library: ...@@ -88,6 +88,7 @@ library:
- Gargantext.Database.Admin.Config - Gargantext.Database.Admin.Config
- Gargantext.Database.Admin.Types.Hyperdata - Gargantext.Database.Admin.Types.Hyperdata
- Gargantext.Database.Admin.Types.Node - Gargantext.Database.Admin.Types.Node
- Gargantext.Defaults
- Gargantext.Core.Text - Gargantext.Core.Text
- Gargantext.Core.Text.Context - Gargantext.Core.Text.Context
- Gargantext.Core.Text.Corpus.Parsers - Gargantext.Core.Text.Corpus.Parsers
......
...@@ -100,10 +100,9 @@ documentUpload nId doc = do ...@@ -100,10 +100,9 @@ documentUpload nId doc = do
Just c -> c Just c -> c
Nothing -> panic $ T.pack $ "[G.A.N.DU] Node has no corpus parent: " <> show nId Nothing -> panic $ T.pack $ "[G.A.N.DU] Node has no corpus parent: " <> show nId
(theFullDate, (year, month, day)) <- liftBase (theFullDate, (year, month, day)) <- liftBase $ dateSplit EN
$ dateSplit EN $ Just
$ Just $ view du_date doc <> "T:0:0:0"
$ view du_date doc <> "T:0:0:0"
let hd = HyperdataDocument { _hd_bdd = Nothing let hd = HyperdataDocument { _hd_bdd = Nothing
, _hd_doi = Nothing , _hd_doi = Nothing
......
...@@ -37,6 +37,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Frame ...@@ -37,6 +37,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Frame
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith) import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
import Gargantext.Database.Schema.Node (node_hyperdata) import Gargantext.Database.Schema.Node (node_hyperdata)
import qualified Gargantext.Defaults as Defaults
import Gargantext.Prelude import Gargantext.Prelude
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Servant import Servant
...@@ -114,9 +115,9 @@ hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, cont ...@@ -114,9 +115,9 @@ hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, cont
date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year, "-" date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year, "-"
, T.pack $ show month, "-" , T.pack $ show month, "-"
, T.pack $ show day ]) <$> date , T.pack $ show day ]) <$> date
year' = fromIntegral $ maybe 2021 (\(Date { year }) -> year) date year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
month' = fromIntegral $ maybe 10 (\(Date { month }) -> month) date month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
day' = fromIntegral $ maybe 4 (\(Date { day }) -> day) date in day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date in
Right HyperdataDocument { _hd_bdd = Just "FrameWrite" Right HyperdataDocument { _hd_bdd = Just "FrameWrite"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
......
...@@ -31,6 +31,7 @@ import Gargantext.Database.Admin.Types.Hyperdata (HyperdataContact(..), Hyperdat ...@@ -31,6 +31,7 @@ import Gargantext.Database.Admin.Types.Hyperdata (HyperdataContact(..), Hyperdat
import Gargantext.Database.Admin.Types.Hyperdata.Contact (_cw_organization) import Gargantext.Database.Admin.Types.Hyperdata.Contact (_cw_organization)
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Facet import Gargantext.Database.Query.Facet
import qualified Gargantext.Defaults as Defaults
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.Aeson (defaultTaggedObject) import Gargantext.Utils.Aeson (defaultTaggedObject)
import Servant import Servant
...@@ -258,12 +259,12 @@ instance ToHyperdataRow HyperdataDocument where ...@@ -258,12 +259,12 @@ instance ToHyperdataRow HyperdataDocument where
, _hr_language_iso2 = fromMaybe "EN" _hd_language_iso2 , _hr_language_iso2 = fromMaybe "EN" _hd_language_iso2
, _hr_page = fromMaybe 0 _hd_page , _hr_page = fromMaybe 0 _hd_page
, _hr_publication_date = fromMaybe "" _hd_publication_date , _hr_publication_date = fromMaybe "" _hd_publication_date
, _hr_publication_day = fromMaybe 1 _hd_publication_day , _hr_publication_year = fromMaybe (fromIntegral Defaults.year) _hd_publication_year
, _hr_publication_hour = fromMaybe 1 _hd_publication_hour , _hr_publication_month = fromMaybe Defaults.month _hd_publication_month
, _hr_publication_minute = fromMaybe 1 _hd_publication_minute , _hr_publication_day = fromMaybe Defaults.day _hd_publication_day
, _hr_publication_month = fromMaybe 1 _hd_publication_month , _hr_publication_hour = fromMaybe 0 _hd_publication_hour
, _hr_publication_second = fromMaybe 1 _hd_publication_second , _hr_publication_minute = fromMaybe 0 _hd_publication_minute
, _hr_publication_year = fromMaybe 2020 _hd_publication_year , _hr_publication_second = fromMaybe 0 _hd_publication_second
, _hr_source = fromMaybe "" _hd_source , _hr_source = fromMaybe "" _hd_source
, _hr_title = fromMaybe "Title" _hd_title , _hr_title = fromMaybe "Title" _hd_title
, _hr_url = fromMaybe "" _hd_url , _hr_url = fromMaybe "" _hd_url
......
...@@ -15,8 +15,7 @@ module Gargantext.Core.Text.Corpus.API ...@@ -15,8 +15,7 @@ module Gargantext.Core.Text.Corpus.API
, Limit , Limit
, get , get
, externalAPIs , externalAPIs
) ) where
where
import Conduit import Conduit
import Data.Either (Either(..)) import Data.Either (Either(..))
......
...@@ -20,6 +20,7 @@ import Servant.Client (ClientError) ...@@ -20,6 +20,7 @@ import Servant.Client (ClientError)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import qualified Gargantext.Defaults as Defaults
import Gargantext.Prelude import Gargantext.Prelude
import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
import qualified HAL as HAL import qualified HAL as HAL
...@@ -41,24 +42,25 @@ getC la q ml = do ...@@ -41,24 +42,25 @@ getC la q ml = do
toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument
toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do
(utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") Just d) (utctime, (pub_year, pub_month, pub_day)) <-
pure $ HyperdataDocument { _hd_bdd = Just "Hal" Date.dateSplit la (maybe (Just $ pack $ show Defaults.year) Just d)
, _hd_doi = Just $ pack $ show i pure HyperdataDocument { _hd_bdd = Just "Hal"
, _hd_url = Nothing , _hd_doi = Just $ pack $ show i
, _hd_uniqId = Nothing , _hd_url = Nothing
, _hd_uniqIdBdd = Nothing , _hd_uniqId = Nothing
, _hd_page = Nothing , _hd_uniqIdBdd = Nothing
, _hd_title = Just $ intercalate " " t , _hd_page = Nothing
, _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" aus , _hd_title = Just $ intercalate " " t
, _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" $ affs <> map (cs . show) struct_id , _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" aus
, _hd_source = Just $ maybe "Nothing" identity s , _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" $ affs <> map (cs . show) struct_id
, _hd_abstract = Just $ intercalate " " ab , _hd_source = Just $ maybe "Nothing" identity s
, _hd_publication_date = fmap (pack . show) utctime , _hd_abstract = Just $ intercalate " " ab
, _hd_publication_year = pub_year , _hd_publication_date = fmap (pack . show) utctime
, _hd_publication_month = pub_month , _hd_publication_year = pub_year
, _hd_publication_day = pub_day , _hd_publication_month = pub_month
, _hd_publication_hour = Nothing , _hd_publication_day = pub_day
, _hd_publication_minute = Nothing , _hd_publication_hour = Nothing
, _hd_publication_second = Nothing , _hd_publication_minute = Nothing
, _hd_language_iso2 = Just $ (pack . show) la } , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (pack . show) la }
...@@ -18,6 +18,7 @@ import Data.Text (Text) ...@@ -18,6 +18,7 @@ import Data.Text (Text)
import qualified Data.Text as Text import qualified Data.Text as Text
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import qualified Gargantext.Defaults as Defaults
import Gargantext.Prelude import Gargantext.Prelude
import Isidore.Client import Isidore.Client
import Servant.Client import Servant.Client
...@@ -67,26 +68,28 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do ...@@ -67,26 +68,28 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
langText (OnlyText t2 ) = t2 langText (OnlyText t2 ) = t2
langText (ArrayText ts ) = Text.intercalate " " $ map langText ts langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
(utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit l (maybe (Just "2019") (Just) d) (utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit l (maybe (Just $ Text.pack $ show Defaults.year) (Just) d)
pure $ HyperdataDocument (Just "Isidore") pure HyperdataDocument
Nothing { _hd_bdd = Just "Isidore"
u , _hd_doi = Nothing
Nothing , _hd_url = u
Nothing , _hd_uniqId = Nothing
Nothing , _hd_uniqIdBdd = Nothing
(Just $ cleanText $ langText t) , _hd_page = Nothing
(creator2text <$> as) , _hd_title = Just $ cleanText $ langText t
Nothing , _hd_authors = creator2text <$> as
(Just $ maybe "Nothing" identity $ _sourceName <$> s) , _hd_institutes = Nothing
(cleanText <$> langText <$> a) , _hd_source = Just $ maybe "Nothing" identity $ _sourceName <$> s
(fmap (Text.pack . show) utcTime) , _hd_abstract = cleanText <$> langText <$> a
(pub_year) , _hd_publication_date = fmap (Text.pack . show) utcTime
(pub_month) , _hd_publication_year = pub_year
(pub_day) , _hd_publication_month = pub_month
Nothing , _hd_publication_day = pub_day
Nothing , _hd_publication_hour = Nothing
Nothing , _hd_publication_minute = Nothing
(Just $ (Text.pack . show) l) , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (Text.pack . show) l
}
...@@ -13,12 +13,14 @@ Portability : POSIX ...@@ -13,12 +13,14 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API.Istex module Gargantext.Core.Text.Corpus.API.Istex
where where
import Data.Either (Either(..))
import Data.List (concat) import Data.List (concat)
import Data.Maybe import Data.Maybe
import Data.Text (Text, pack) import Data.Text (Text, pack)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import qualified Gargantext.Defaults as Defaults
import Gargantext.Prelude import Gargantext.Prelude
import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
import qualified ISTEX as ISTEX import qualified ISTEX as ISTEX
...@@ -26,19 +28,37 @@ import qualified ISTEX.Client as ISTEX ...@@ -26,19 +28,37 @@ import qualified ISTEX.Client as ISTEX
get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument] get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
get la q ml = do get la q ml = do
docs <- ISTEX.getMetadataWith q (fromIntegral <$> ml) --docs <- ISTEX.getMetadataWith q (fromIntegral <$> ml)
either (panic . pack . show) (toDoc' la) docs printDebug "[Istex.get] calling getMetadataScrollProgress for la" la
printDebug "[Istex.get] calling getMetadataScrollProgress for q" q
printDebug "[Istex.get] calling getMetadataScrollProgress for ml" ml
-- The "scroll" expects "d/h/m/s/ms" time interval. Let's set it to "1 month"
--eDocs <- ISTEX.getMetadataScroll q ((\_n -> pack $ "1m") <$> ml) Nothing 0 --(fromIntegral <$> ml)
eDocs <- ISTEX.getMetadataScroll q "1m" Nothing 0 --(fromIntegral <$> ml)
printDebug "[Istex.get] will print length" (0 :: Int)
case eDocs of
Left _ -> pure ()
Right (ISTEX.Documents { _documents_hits }) -> printDebug "[Istex.get] length docs" $ length _documents_hits
--ISTEX.getMetadataScrollProgress q ((\_ -> pack $ "1m") <$> ml) Nothing progress errorHandler
case eDocs of
Left err -> panic . pack . show $ err
Right docs -> toDoc' la docs
--pure $ either (panic . pack . show) (toDoc' la) eDocs
-- where
-- progress (ISTEX.ScrollResponse { _scroll_documents = ISTEX.Documents { _documents_hits }}) =
-- printDebug "[Istex.get] got docs: " $ length _documents_hits
-- errorHandler err = printDebug "[Istex.get] error" $ show err
toDoc' :: Lang -> ISTEX.Documents -> IO [HyperdataDocument] toDoc' :: Lang -> ISTEX.Documents -> IO [HyperdataDocument]
toDoc' la docs' = do toDoc' la docs' = mapM (toDoc la) (ISTEX._documents_hits docs')
--printDebug "ISTEX" (ISTEX._documents_total docs') --printDebug "ISTEX" (ISTEX._documents_total docs')
mapM (toDoc la) (ISTEX._documents_hits docs')
-- | TODO remove dateSplit here -- | TODO remove dateSplit here
-- TODO current year as default -- TODO current year as default
toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument
toDoc la (ISTEX.Document i t a ab d s) = do toDoc la (ISTEX.Document i t a ab d s) = do
(utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") (Just . pack . show) d) (utctime, (pub_year, pub_month, pub_day)) <-
Date.dateSplit la (maybe (Just $ pack $ show Defaults.year) (Just . pack . show) d)
pure $ HyperdataDocument { _hd_bdd = Just "Istex" pure $ HyperdataDocument { _hd_bdd = Just "Istex"
, _hd_doi = Just i , _hd_doi = Just i
, _hd_url = Nothing , _hd_url = Nothing
......
...@@ -164,6 +164,7 @@ parseFormatC _ _ _ = undefined ...@@ -164,6 +164,7 @@ parseFormatC _ _ _ = undefined
-- | Parse file into documents -- | Parse file into documents
-- TODO manage errors here -- TODO manage errors here
-- TODO: to debug maybe add the filepath in error message -- TODO: to debug maybe add the filepath in error message
parseFile :: FileType -> FileFormat -> FilePath -> IO (Either Prelude.String [HyperdataDocument]) parseFile :: FileType -> FileFormat -> FilePath -> IO (Either Prelude.String [HyperdataDocument])
parseFile CsvHal Plain p = parseHal p parseFile CsvHal Plain p = parseHal p
parseFile CsvGargV3 Plain p = parseCsv p parseFile CsvGargV3 Plain p = parseCsv p
...@@ -185,27 +186,27 @@ toDoc ff d = do ...@@ -185,27 +186,27 @@ toDoc ff d = do
let dateToParse = DT.replace "-" " " <$> lookup "PY" d <> Just " " <> lookup "publication_date" d let dateToParse = DT.replace "-" " " <$> lookup "PY" d <> Just " " <> lookup "publication_date" d
(utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit lang dateToParse (utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit lang dateToParse
pure $ HyperdataDocument { _hd_bdd = Just $ DT.pack $ show ff pure HyperdataDocument { _hd_bdd = Just $ DT.pack $ show ff
, _hd_doi = lookup "doi" d , _hd_doi = lookup "doi" d
, _hd_url = lookup "URL" d , _hd_url = lookup "URL" d
, _hd_uniqId = Nothing , _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing , _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = lookup "title" d , _hd_title = lookup "title" d
, _hd_authors = Nothing , _hd_authors = Nothing
, _hd_institutes = lookup "authors" d , _hd_institutes = lookup "authors" d
, _hd_source = lookup "source" d , _hd_source = lookup "source" d
, _hd_abstract = lookup "abstract" d , _hd_abstract = lookup "abstract" d
, _hd_publication_date = fmap (DT.pack . show) utcTime , _hd_publication_date = fmap (DT.pack . show) utcTime
, _hd_publication_year = pub_year , _hd_publication_year = pub_year
, _hd_publication_month = pub_month , _hd_publication_month = pub_month
, _hd_publication_day = pub_day , _hd_publication_day = pub_day
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (DT.pack . show) lang } , _hd_language_iso2 = Just $ (DT.pack . show) lang }
enrichWith :: FileType enrichWith :: FileType
-> (a, [[[(DB.ByteString, DB.ByteString)]]]) -> (a, [[(Text, Text)]]) -> (a, [[[(DB.ByteString, DB.ByteString)]]]) -> (a, [[(Text, Text)]])
......
...@@ -25,8 +25,10 @@ import Data.Aeson (toJSON, Value) ...@@ -25,8 +25,10 @@ import Data.Aeson (toJSON, Value)
import Data.Either (Either(..)) import Data.Either (Either(..))
import Data.HashMap.Strict as HM hiding (map) import Data.HashMap.Strict as HM hiding (map)
import Data.Text (Text, unpack, splitOn, replace) import Data.Text (Text, unpack, splitOn, replace)
import Data.Time (defaultTimeLocale, toGregorian, iso8601DateFormat, parseTimeM) import Data.Time (defaultTimeLocale, iso8601DateFormat, parseTimeM, toGregorian)
import qualified Data.Time.Calendar as DTC
import Data.Time.Clock (UTCTime(..), getCurrentTime) import Data.Time.Clock (UTCTime(..), getCurrentTime)
import Data.Time.Clock ( secondsToDiffTime)
import Data.Time.LocalTime (utc) import Data.Time.LocalTime (utc)
import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime) import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
import Duckling.Api (analyze) import Duckling.Api (analyze)
...@@ -37,7 +39,7 @@ import Duckling.Types (ResolvedToken(..), ResolvedVal(..)) ...@@ -37,7 +39,7 @@ import Duckling.Types (ResolvedToken(..), ResolvedVal(..))
import Gargantext.Core (Lang(FR,EN)) import Gargantext.Core (Lang(FR,EN))
import Gargantext.Core.Types (DebugMode(..), withDebugMode) import Gargantext.Core.Types (DebugMode(..), withDebugMode)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Control.Exception as CE --import qualified Control.Exception as CE
import qualified Data.Aeson as Json import qualified Data.Aeson as Json
import qualified Data.HashSet as HashSet import qualified Data.HashSet as HashSet
import qualified Duckling.Core as DC import qualified Duckling.Core as DC
...@@ -136,28 +138,28 @@ parserLang lang = panic $ "[G.C.T.C.P.Date] Lang not implemented" <> (cs $ show ...@@ -136,28 +138,28 @@ parserLang lang = panic $ "[G.C.T.C.P.Date] Lang not implemented" <> (cs $ show
parseRawSafe :: Lang -> Text -> IO DateFlow parseRawSafe :: Lang -> Text -> IO DateFlow
parseRawSafe lang text = do parseRawSafe lang text = do
triedParseRaw <- tryParseRaw lang text let triedParseRaw = parseRaw lang text
dateStr' <- case triedParseRaw of dateStr' <- case triedParseRaw of
Left (CE.SomeException err) -> do --Left (CE.SomeException err) -> do
Left err -> do
envLang <- getEnv "LANG" envLang <- getEnv "LANG"
printDebug "[G.C.T.C.P.Date] Exception: " (err, envLang, lang, text) printDebug "[G.C.T.C.P.Date] Exception: " (err, envLang, lang, text)
pure $ DucklingFailure text pure $ DucklingFailure text
Right res -> pure $ DucklingSuccess res Right res -> pure $ DucklingSuccess res
pure dateStr' pure dateStr'
tryParseRaw :: CE.Exception e => Lang -> Text -> IO (Either e Text) --tryParseRaw :: CE.Exception e => Lang -> Text -> IO (Either e Text)
tryParseRaw lang text = CE.try (parseRaw lang text) --tryParseRaw lang text = CE.try (parseRaw lang text)
parseRaw :: Lang -> Text -> IO Text parseRaw :: Lang -> Text -> Either Text Text
parseRaw lang text = do -- case result parseRaw lang text = do -- case result
maybeResult <- extractValue <$> getTimeValue let maybeResult = extractValue $ getTimeValue
<$> parseDateWithDuckling lang text (Options True) $ parseDateWithDuckling lang text (Options True)
case maybeResult of case maybeResult of
Just result -> pure result Just result -> Right result
Nothing -> do Nothing -> do
printDebug ("[G.C.T.C.P.D.parseRaw] ERROR " <> (cs . show) lang) -- printDebug ("[G.C.T.C.P.D.parseRaw] ERROR " <> (cs . show) lang) text
text Left $ "[G.C.T.C.P.D.parseRaw ERROR] " <> (cs . show) lang <> " :: " <> text
pure ""
getTimeValue :: [ResolvedToken] -> Maybe Value getTimeValue :: [ResolvedToken] -> Maybe Value
getTimeValue rt = case head rt of getTimeValue rt = case head rt of
...@@ -182,13 +184,21 @@ utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time ...@@ -182,13 +184,21 @@ utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time
-- | Local Context which depends on Lang and Time -- | Local Context which depends on Lang and Time
localContext :: Lang -> DucklingTime -> Context localContext :: Lang -> DucklingTime -> Context
localContext lang dt = Context {referenceTime = dt, locale = makeLocale (parserLang lang) Nothing} localContext lang dt = Context { referenceTime = dt
, locale = makeLocale (parserLang lang) Nothing }
defaultDay :: DTC.Day
defaultDay = DTC.fromGregorian 1 1 1
defaultUTCTime :: UTCTime
defaultUTCTime = UTCTime { utctDay = defaultDay
, utctDayTime = secondsToDiffTime 0 }
-- | Date parser with Duckling -- | Date parser with Duckling
parseDateWithDuckling :: Lang -> Text -> Options -> IO [ResolvedToken] parseDateWithDuckling :: Lang -> Text -> Options -> [ResolvedToken]
parseDateWithDuckling lang input options = do parseDateWithDuckling lang input options = do
contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime let contxt = localContext lang $ utcToDucklingTime defaultUTCTime
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
-- TODO check/test Options False or True -- TODO check/test Options False or True
pure $ analyze input contxt options $ HashSet.fromList [(Seal Time)] analyze input contxt options $ HashSet.fromList [(Seal Time)]
...@@ -23,6 +23,7 @@ import Data.Aeson.TH (deriveJSON) ...@@ -23,6 +23,7 @@ import Data.Aeson.TH (deriveJSON)
import Data.ByteString.Lazy (readFile) import Data.ByteString.Lazy (readFile)
import Data.Text (Text, unpack) import Data.Text (Text, unpack)
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
import qualified Gargantext.Defaults as Defaults
import Gargantext.Prelude import Gargantext.Prelude
import System.IO (FilePath) import System.IO (FilePath)
import Gargantext.Core.Text.Corpus.Parsers.CSV (CsvDoc(..), writeFile, headerCsvGargV3) import Gargantext.Core.Text.Corpus.Parsers.CSV (CsvDoc(..), writeFile, headerCsvGargV3)
...@@ -52,8 +53,8 @@ patent2csvDoc (Patent { .. }) = ...@@ -52,8 +53,8 @@ patent2csvDoc (Patent { .. }) =
CsvDoc { csv_title = _patent_title CsvDoc { csv_title = _patent_title
, csv_source = "Source" , csv_source = "Source"
, csv_publication_year = Just $ read (unpack _patent_year) , csv_publication_year = Just $ read (unpack _patent_year)
, csv_publication_month = Just 1 , csv_publication_month = Just $ Defaults.month
, csv_publication_day = Just 1 , csv_publication_day = Just $ Defaults.day
, csv_abstract = _patent_abstract , csv_abstract = _patent_abstract
, csv_authors = "Authors" } , csv_authors = "Authors" }
......
...@@ -68,8 +68,7 @@ wikiPageToDocument m wr = do ...@@ -68,8 +68,7 @@ wikiPageToDocument m wr = do
source = Nothing source = Nothing
abstract = Just $ concat $ take m sections abstract = Just $ concat $ take m sections
(date, (year, month, day)) (date, (year, month, day)) <- dateSplit EN $ head
<- dateSplit EN $ head
$ catMaybes $ catMaybes
[ wr ^. wr_yearStart [ wr ^. wr_yearStart
, wr ^. wr_yearEnd , wr ^. wr_yearEnd
......
...@@ -74,6 +74,7 @@ import Gargantext.Database.Admin.Types.Hyperdata ...@@ -74,6 +74,7 @@ import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (Cmd, runPGSQuery{-, formatPGSQuery-}) import Gargantext.Database.Prelude (Cmd, runPGSQuery{-, formatPGSQuery-})
import Gargantext.Database.Schema.Node (NodePoly(..)) import Gargantext.Database.Schema.Node (NodePoly(..))
import qualified Gargantext.Defaults as Defaults
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (hash) import Gargantext.Prelude.Crypto.Hash (hash)
import qualified Data.Text as DT (pack, concat, take) import qualified Data.Text as DT (pack, concat, take)
...@@ -282,9 +283,9 @@ instance ToNode HyperdataDocument where ...@@ -282,9 +283,9 @@ instance ToNode HyperdataDocument where
-- NOTE: There is no year '0' in postgres, there is year 1 AD and beofre that year 1 BC: -- NOTE: There is no year '0' in postgres, there is year 1 AD and beofre that year 1 BC:
-- select '0001-01-01'::date, '0001-01-01'::date - '1 day'::interval; -- select '0001-01-01'::date, '0001-01-01'::date - '1 day'::interval;
-- 0001-01-01 0001-12-31 00:00:00 BC -- 0001-01-01 0001-12-31 00:00:00 BC
y = maybe 1 fromIntegral $ _hd_publication_year h y = fromIntegral $ fromMaybe Defaults.day $ _hd_publication_year h
m = fromMaybe 1 $ _hd_publication_month h m = fromMaybe Defaults.month $ _hd_publication_month h
d = fromMaybe 1 $ _hd_publication_day h d = fromMaybe (fromIntegral Defaults.year) $ _hd_publication_day h
-- TODO better Node -- TODO better Node
instance ToNode HyperdataContact where instance ToNode HyperdataContact where
......
{-|
Module : Gargantext.Defaults
Description : Gargantext default values
Copyright : (c) CNRS, 2021-present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Defaults
where
import Gargantext.Prelude
year :: Integer
year = 1
month :: Int
month = 1
day :: Int
day = 1
...@@ -73,7 +73,7 @@ extra-deps: ...@@ -73,7 +73,7 @@ extra-deps:
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: 02e03d9b856bd35d391f43da8525330f9d184615 commit: 02e03d9b856bd35d391f43da8525330f9d184615
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: daeae80365250c4bd539f0a65e271f9aa37f731f commit: a4a6fb6a578255c9e5b52aab2afccf874976a3f5
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
commit: 9a43470241690a19c1c381c42a62c5dd4e28dff2 commit: 9a43470241690a19c1c381c42a62c5dd4e28dff2
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment