Commit 55ae7b50 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[API] simplify parsing of date

We don't need the IO monad there, just added a default date for
Duckling to make the parser.
parent 70a94c12
...@@ -89,10 +89,9 @@ documentUpload _uId nId doc logStatus = do ...@@ -89,10 +89,9 @@ documentUpload _uId nId doc logStatus = do
Just c -> c Just c -> c
Nothing -> panic $ T.pack $ "[G.A.N.DU] Node has no corpus parent: " <> show nId Nothing -> panic $ T.pack $ "[G.A.N.DU] Node has no corpus parent: " <> show nId
(theFullDate, (year, month, day)) <- liftBase let (theFullDate, (year, month, day)) = dateSplit EN
$ dateSplit EN $ Just
$ Just $ view du_date doc <> "T:0:0:0"
$ view du_date doc <> "T:0:0:0"
let hd = HyperdataDocument { _hd_bdd = Nothing let hd = HyperdataDocument { _hd_bdd = Nothing
, _hd_doi = Nothing , _hd_doi = Nothing
......
...@@ -27,28 +27,29 @@ import qualified HAL.Doc.Corpus as HAL ...@@ -27,28 +27,29 @@ import qualified HAL.Doc.Corpus as HAL
get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument] get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
get la q ml = do get la q ml = do
docs <- HAL.getMetadataWith q (Just 0) (fromIntegral <$> ml) docs <- HAL.getMetadataWith q (Just 0) (fromIntegral <$> ml)
either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) docs pure $ either (panic . pack . show) (\d -> map (toDoc' la) $ HAL._docs d) docs
toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument toDoc' :: Lang -> HAL.Corpus -> HyperdataDocument
toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do
(utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just $ pack $ show Defaults.year) Just d) let (utctime, (pub_year, pub_month, pub_day)) =
pure $ HyperdataDocument { _hd_bdd = Just "Hal" Date.dateSplit la (maybe (Just $ pack $ show Defaults.year) Just d)
, _hd_doi = Just $ pack $ show i HyperdataDocument { _hd_bdd = Just "Hal"
, _hd_url = Nothing , _hd_doi = Just $ pack $ show i
, _hd_uniqId = Nothing , _hd_url = Nothing
, _hd_uniqIdBdd = Nothing , _hd_uniqId = Nothing
, _hd_page = Nothing , _hd_uniqIdBdd = Nothing
, _hd_title = Just $ intercalate " " t , _hd_page = Nothing
, _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" aus , _hd_title = Just $ intercalate " " t
, _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" $ affs <> map (cs . show) struct_id , _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" aus
, _hd_source = Just $ maybe "Nothing" identity s , _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" $ affs <> map (cs . show) struct_id
, _hd_abstract = Just $ intercalate " " ab , _hd_source = Just $ maybe "Nothing" identity s
, _hd_publication_date = fmap (pack . show) utctime , _hd_abstract = Just $ intercalate " " ab
, _hd_publication_year = pub_year , _hd_publication_date = fmap (pack . show) utctime
, _hd_publication_month = pub_month , _hd_publication_year = pub_year
, _hd_publication_day = pub_day , _hd_publication_month = pub_month
, _hd_publication_hour = Nothing , _hd_publication_day = pub_day
, _hd_publication_minute = Nothing , _hd_publication_hour = Nothing
, _hd_publication_second = Nothing , _hd_publication_minute = Nothing
, _hd_language_iso2 = Just $ (pack . show) la } , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (pack . show) la }
...@@ -42,7 +42,7 @@ get la l q a = do ...@@ -42,7 +42,7 @@ get la l q a = do
iDocs <- either printErr _content <$> Isidore.get l q a iDocs <- either printErr _content <$> Isidore.get l q a
hDocs <- mapM (\d -> isidoreToDoc la d) (toIsidoreDocs iDocs) let hDocs = map (\d -> isidoreToDoc la d) (toIsidoreDocs iDocs)
pure hDocs pure hDocs
isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit
...@@ -52,7 +52,7 @@ isidore2csvFile fp la li tq aq = do ...@@ -52,7 +52,7 @@ isidore2csvFile fp la li tq aq = do
hdocs <- get la li tq aq hdocs <- get la li tq aq
writeDocs2Csv fp hdocs writeDocs2Csv fp hdocs
isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument isidoreToDoc :: Lang -> IsidoreDoc -> HyperdataDocument
isidoreToDoc l (IsidoreDoc t a d u s as) = do isidoreToDoc l (IsidoreDoc t a d u s as) = do
let let
author :: Author -> Text author :: Author -> Text
...@@ -68,26 +68,28 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do ...@@ -68,26 +68,28 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
langText (OnlyText t2 ) = t2 langText (OnlyText t2 ) = t2
langText (ArrayText ts ) = Text.intercalate " " $ map langText ts langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
(utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit l (maybe (Just $ Text.pack $ show Defaults.year) (Just) d) let (utcTime, (pub_year, pub_month, pub_day)) = Date.dateSplit l (maybe (Just $ Text.pack $ show Defaults.year) (Just) d)
pure $ HyperdataDocument (Just "Isidore") HyperdataDocument
Nothing { _hd_bdd = Just "Isidore"
u , _hd_doi = Nothing
Nothing , _hd_url = u
Nothing , _hd_uniqId = Nothing
Nothing , _hd_uniqIdBdd = Nothing
(Just $ cleanText $ langText t) , _hd_page = Nothing
(creator2text <$> as) , _hd_title = Just $ cleanText $ langText t
Nothing , _hd_authors = creator2text <$> as
(Just $ maybe "Nothing" identity $ _sourceName <$> s) , _hd_institutes = Nothing
(cleanText <$> langText <$> a) , _hd_source = Just $ maybe "Nothing" identity $ _sourceName <$> s
(fmap (Text.pack . show) utcTime) , _hd_abstract = cleanText <$> langText <$> a
(pub_year) , _hd_publication_date = fmap (Text.pack . show) utcTime
(pub_month) , _hd_publication_year = pub_year
(pub_day) , _hd_publication_month = pub_month
Nothing , _hd_publication_day = pub_day
Nothing , _hd_publication_hour = Nothing
Nothing , _hd_publication_minute = Nothing
(Just $ (Text.pack . show) l) , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (Text.pack . show) l
}
...@@ -13,6 +13,7 @@ Portability : POSIX ...@@ -13,6 +13,7 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API.Istex module Gargantext.Core.Text.Corpus.API.Istex
where where
import Data.Either (Either(..))
import Data.List (concat) import Data.List (concat)
import Data.Maybe import Data.Maybe
import Data.Text (Text, pack) import Data.Text (Text, pack)
...@@ -28,39 +29,49 @@ import qualified ISTEX.Client as ISTEX ...@@ -28,39 +29,49 @@ import qualified ISTEX.Client as ISTEX
get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument] get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
get la q ml = do get la q ml = do
--docs <- ISTEX.getMetadataWith q (fromIntegral <$> ml) --docs <- ISTEX.getMetadataWith q (fromIntegral <$> ml)
printDebug "[Istex.get] calling getMetadataScrollWith for q" q printDebug "[Istex.get] calling getMetadataScrollProgress for la" la
printDebug "[Istex.get] calling getMetadataScrollWith for ml" ml printDebug "[Istex.get] calling getMetadataScrollProgress for q" q
printDebug "[Istex.get] calling getMetadataScrollProgress for ml" ml
-- The "scroll" expects "d/h/m/s/ms" time interval. Let's set it to "1 month" -- The "scroll" expects "d/h/m/s/ms" time interval. Let's set it to "1 month"
docs <- ISTEX.getMetadataScrollWith q ((\_n -> pack $ "1m") <$> ml) --(fromIntegral <$> ml) eDocs <- ISTEX.getMetadataScroll q ((\_n -> pack $ "1m") <$> ml) Nothing 0 --(fromIntegral <$> ml)
either (panic . pack . show) (toDoc' la) docs printDebug "[Istex.get] will print length" (0 :: Int)
case eDocs of
Left _ -> pure ()
Right (ISTEX.Documents { _documents_hits }) -> printDebug "[Istex.get] length docs" $ length _documents_hits
--ISTEX.getMetadataScrollProgress q ((\_ -> pack $ "1m") <$> ml) Nothing progress errorHandler
pure $ either (panic . pack . show) (toDoc' la) eDocs
-- where
-- progress (ISTEX.ScrollResponse { _scroll_documents = ISTEX.Documents { _documents_hits }}) =
-- printDebug "[Istex.get] got docs: " $ length _documents_hits
-- errorHandler err = printDebug "[Istex.get] error" $ show err
toDoc' :: Lang -> ISTEX.Documents -> IO [HyperdataDocument] toDoc' :: Lang -> ISTEX.Documents -> [HyperdataDocument]
toDoc' la docs' = do toDoc' la docs' = map (toDoc la) (ISTEX._documents_hits docs')
--printDebug "ISTEX" (ISTEX._documents_total docs') --printDebug "ISTEX" (ISTEX._documents_total docs')
mapM (toDoc la) (ISTEX._documents_hits docs')
-- | TODO remove dateSplit here -- | TODO remove dateSplit here
-- TODO current year as default -- TODO current year as default
toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument toDoc :: Lang -> ISTEX.Document -> HyperdataDocument
toDoc la (ISTEX.Document i t a ab d s) = do toDoc la (ISTEX.Document i t a ab d s) = do
(utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just $ pack $ show Defaults.year) (Just . pack . show) d) let (utctime, (pub_year, pub_month, pub_day)) =
pure $ HyperdataDocument { _hd_bdd = Just "Istex" Date.dateSplit la (maybe (Just $ pack $ show Defaults.year) (Just . pack . show) d)
, _hd_doi = Just i HyperdataDocument { _hd_bdd = Just "Istex"
, _hd_url = Nothing , _hd_doi = Just i
, _hd_uniqId = Nothing , _hd_url = Nothing
, _hd_uniqIdBdd = Nothing , _hd_uniqId = Nothing
, _hd_page = Nothing , _hd_uniqIdBdd = Nothing
, _hd_title = t , _hd_page = Nothing
, _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" (map ISTEX._author_name a) , _hd_title = t
, _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a) , _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" (map ISTEX._author_name a)
, _hd_source = Just $ foldl (\x y -> x <> ", " <> y) "" (catMaybes $ map ISTEX._source_title s) , _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a)
, _hd_abstract = ab , _hd_source = Just $ foldl (\x y -> x <> ", " <> y) "" (catMaybes $ map ISTEX._source_title s)
, _hd_publication_date = fmap (pack . show) utctime , _hd_abstract = ab
, _hd_publication_year = pub_year , _hd_publication_date = fmap (pack . show) utctime
, _hd_publication_month = pub_month , _hd_publication_year = pub_year
, _hd_publication_day = pub_day , _hd_publication_month = pub_month
, _hd_publication_hour = Nothing , _hd_publication_day = pub_day
, _hd_publication_minute = Nothing , _hd_publication_hour = Nothing
, _hd_publication_second = Nothing , _hd_publication_minute = Nothing
, _hd_language_iso2 = Just $ (pack . show) la } , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (pack . show) la }
...@@ -26,7 +26,6 @@ module Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), clean, parseFile, cl ...@@ -26,7 +26,6 @@ module Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), clean, parseFile, cl
import "zip" Codec.Archive.Zip (withArchive, getEntry, getEntries) import "zip" Codec.Archive.Zip (withArchive, getEntry, getEntries)
import Control.Concurrent.Async as CCA (mapConcurrently) import Control.Concurrent.Async as CCA (mapConcurrently)
import Data.Attoparsec.ByteString (parseOnly, Parser) import Data.Attoparsec.ByteString (parseOnly, Parser)
import Control.Monad (join)
import Data.Either(Either(..)) import Data.Either(Either(..))
import Data.Either.Extra (partitionEithers) import Data.Either.Extra (partitionEithers)
import Data.List (concat, lookup) import Data.List (concat, lookup)
...@@ -79,23 +78,23 @@ data FileFormat = WOS | RIS | RisPresse ...@@ -79,23 +78,23 @@ data FileFormat = WOS | RIS | RisPresse
-- | XML -- Not Implemented / see : -- | XML -- Not Implemented / see :
parseFormat :: FileFormat -> DB.ByteString -> IO (Either Prelude.String [HyperdataDocument]) parseFormat :: FileFormat -> DB.ByteString -> Either Prelude.String [HyperdataDocument]
parseFormat CsvGargV3 bs = pure $ parseCsv' $ DBL.fromStrict bs parseFormat CsvGargV3 bs = parseCsv' $ DBL.fromStrict bs
parseFormat CsvHal bs = pure $ parseHal' $ DBL.fromStrict bs parseFormat CsvHal bs = parseHal' $ DBL.fromStrict bs
parseFormat RisPresse bs = do parseFormat RisPresse bs = do
docs <- mapM (toDoc RIS) let docs = map (toDoc RIS)
<$> snd <$> snd
<$> enrichWith RisPresse <$> enrichWith RisPresse
$ partitionEithers $ partitionEithers
$ [runParser' RisPresse bs] $ [runParser' RisPresse bs]
pure $ Right docs Right docs
parseFormat WOS bs = do parseFormat WOS bs = do
docs <- mapM (toDoc WOS) let docs = map (toDoc WOS)
<$> snd <$> snd
<$> enrichWith WOS <$> enrichWith WOS
$ partitionEithers $ partitionEithers
$ [runParser' WOS bs] $ [runParser' WOS bs]
pure $ Right docs Right docs
parseFormat ZIP bs = do parseFormat ZIP bs = do
path <- emptySystemTempFile "parsed-zip" path <- emptySystemTempFile "parsed-zip"
DB.writeFile path bs DB.writeFile path bs
...@@ -111,16 +110,16 @@ parseFile :: FileFormat -> FilePath -> IO (Either Prelude.String [HyperdataDocum ...@@ -111,16 +110,16 @@ parseFile :: FileFormat -> FilePath -> IO (Either Prelude.String [HyperdataDocum
parseFile CsvHal p = parseHal p parseFile CsvHal p = parseHal p
parseFile CsvGargV3 p = parseCsv p parseFile CsvGargV3 p = parseCsv p
parseFile RisPresse p = do parseFile RisPresse p = do
docs <- join $ mapM (toDoc RIS) <$> snd <$> enrichWith RisPresse <$> readFileWith RIS p docs <- map (toDoc RIS) <$> snd <$> enrichWith RisPresse <$> readFileWith RIS p
pure $ Right docs pure $ Right docs
parseFile WOS p = do parseFile WOS p = do
docs <- join $ mapM (toDoc WOS) <$> snd <$> enrichWith WOS <$> readFileWith WOS p docs <- map (toDoc WOS) <$> snd <$> enrichWith WOS <$> readFileWith WOS p
pure $ Right docs pure $ Right docs
parseFile ff p = do parseFile ff p = do
docs <- join $ mapM (toDoc ff) <$> snd <$> enrichWith ff <$> readFileWith ff p docs <- map (toDoc ff) <$> snd <$> enrichWith ff <$> readFileWith ff p
pure $ Right docs pure $ Right docs
toDoc :: FileFormat -> [(Text, Text)] -> IO HyperdataDocument toDoc :: FileFormat -> [(Text, Text)] -> HyperdataDocument
-- TODO use language for RIS -- TODO use language for RIS
toDoc ff d = do toDoc ff d = do
-- let abstract = lookup "abstract" d -- let abstract = lookup "abstract" d
...@@ -128,27 +127,27 @@ toDoc ff d = do ...@@ -128,27 +127,27 @@ toDoc ff d = do
let dateToParse = DT.replace "-" " " <$> lookup "PY" d <> Just " " <> lookup "publication_date" d let dateToParse = DT.replace "-" " " <$> lookup "PY" d <> Just " " <> lookup "publication_date" d
(utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit lang dateToParse let (utcTime, (pub_year, pub_month, pub_day)) = Date.dateSplit lang dateToParse
pure $ HyperdataDocument { _hd_bdd = Just $ DT.pack $ show ff HyperdataDocument { _hd_bdd = Just $ DT.pack $ show ff
, _hd_doi = lookup "doi" d , _hd_doi = lookup "doi" d
, _hd_url = lookup "URL" d , _hd_url = lookup "URL" d
, _hd_uniqId = Nothing , _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing , _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = lookup "title" d , _hd_title = lookup "title" d
, _hd_authors = Nothing , _hd_authors = Nothing
, _hd_institutes = lookup "authors" d , _hd_institutes = lookup "authors" d
, _hd_source = lookup "source" d , _hd_source = lookup "source" d
, _hd_abstract = lookup "abstract" d , _hd_abstract = lookup "abstract" d
, _hd_publication_date = fmap (DT.pack . show) utcTime , _hd_publication_date = fmap (DT.pack . show) utcTime
, _hd_publication_year = pub_year , _hd_publication_year = pub_year
, _hd_publication_month = pub_month , _hd_publication_month = pub_month
, _hd_publication_day = pub_day , _hd_publication_day = pub_day
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (DT.pack . show) lang } , _hd_language_iso2 = Just $ (DT.pack . show) lang }
enrichWith :: FileFormat enrichWith :: FileFormat
-> (a, [[[(DB.ByteString, DB.ByteString)]]]) -> (a, [[(Text, Text)]]) -> (a, [[[(DB.ByteString, DB.ByteString)]]]) -> (a, [[(Text, Text)]])
......
...@@ -21,10 +21,12 @@ module Gargantext.Core.Text.Corpus.Parsers.Date ...@@ -21,10 +21,12 @@ module Gargantext.Core.Text.Corpus.Parsers.Date
where where
import Data.Aeson (toJSON, Value) import Data.Aeson (toJSON, Value)
import Data.Either (Either(..))
import Data.HashMap.Strict as HM hiding (map) import Data.HashMap.Strict as HM hiding (map)
import Data.Text (Text, unpack, splitOn) import Data.Text (Text, unpack, splitOn)
import Data.Time (parseTimeOrError, defaultTimeLocale, toGregorian) import Data.Time (parseTimeOrError, defaultTimeLocale, toGregorian)
import Data.Time.Clock (UTCTime(..), getCurrentTime) import qualified Data.Time.Calendar as DTC
import Data.Time.Clock (UTCTime(..), secondsToDiffTime)
import Data.Time.LocalTime (utc) import Data.Time.LocalTime (utc)
import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime) import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
import Duckling.Api (analyze) import Duckling.Api (analyze)
...@@ -41,12 +43,12 @@ import qualified Duckling.Core as DC ...@@ -41,12 +43,12 @@ import qualified Duckling.Core as DC
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Parse date to Ints -- | Parse date to Ints
-- TODO add hours, minutes and seconds -- TODO add hours, minutes and seconds
dateSplit :: Lang -> Maybe Text -> IO (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day)) dateSplit :: Lang -> Maybe Text -> (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
dateSplit _ Nothing = pure (Nothing, (Nothing, Nothing, Nothing)) dateSplit _ Nothing = (Nothing, (Nothing, Nothing, Nothing))
dateSplit l (Just txt) = do dateSplit l (Just txt) = do
utcTime <- parse l txt let utcTime = parse l txt
let (y, m, d) = split' utcTime let (y, m, d) = split' utcTime
pure (Just utcTime, (Just y, Just m,Just d)) (Just utcTime, (Just y, Just m,Just d))
split' :: UTCTime -> (Year, Month, Day) split' :: UTCTime -> (Year, Month, Day)
split' utcTime = (fromIntegral y, m, d) split' utcTime = (fromIntegral y, m, d)
...@@ -65,8 +67,8 @@ type Day = Int ...@@ -65,8 +67,8 @@ type Day = Int
-- 1979-04-10 19:00:00 UTC -- 1979-04-10 19:00:00 UTC
-- >>> parseDate EN (pack "April 10 1979") -- >>> parseDate EN (pack "April 10 1979")
-- 1979-04-10 00:00:00 UTC -- 1979-04-10 00:00:00 UTC
parse :: Lang -> Text -> IO UTCTime parse :: Lang -> Text -> UTCTime
parse lang s = parseDate' "%Y-%m-%dT%T" "0-0-0T0:0:0" lang s parse = parseDate' "%Y-%m-%dT%T" "0-0-0T0:0:0"
type DateFormat = Text type DateFormat = Text
type DateDefault = Text type DateDefault = Text
...@@ -75,17 +77,18 @@ parseDate' :: DateFormat ...@@ -75,17 +77,18 @@ parseDate' :: DateFormat
-> DateDefault -> DateDefault
-> Lang -> Lang
-> Text -> Text
-> IO UTCTime -> UTCTime
parseDate' format def lang s = do parseDate' format def lang s = do
dateStr' <- parseRaw lang s let dateStr' = parseRaw lang s
if dateStr' == "" case dateStr' of
then getCurrentTime Left _err -> defaultUTCTime
else do Right "" -> defaultUTCTime
Right ds -> do
let dateStr = unpack let dateStr = unpack
$ maybe def identity $ maybe def identity
$ head $ head
$ splitOn "." dateStr' $ splitOn "." ds
pure $ parseTimeOrError True defaultTimeLocale (unpack format) dateStr parseTimeOrError True defaultTimeLocale (unpack format) dateStr
-- TODO add Paris at Duckling.Locale Region datatype -- TODO add Paris at Duckling.Locale Region datatype
...@@ -102,16 +105,15 @@ parserLang _ = panic "not implemented" ...@@ -102,16 +105,15 @@ parserLang _ = panic "not implemented"
-- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
-- parseRaw :: Context -> Text -> SomeErrorHandling Text -- parseRaw :: Context -> Text -> SomeErrorHandling Text
parseRaw :: Lang -> Text -> IO Text parseRaw :: Lang -> Text -> Either Text Text
parseRaw lang text = do -- case result parseRaw lang text = do -- case result
maybeResult <- extractValue <$> getTimeValue let maybeResult = extractValue $ getTimeValue
<$> parseDateWithDuckling lang text (Options True) $ parseDateWithDuckling lang text (Options True)
case maybeResult of case maybeResult of
Just result -> pure result Just result -> Right result
Nothing -> do Nothing -> do
printDebug ("[G.C.T.C.P.D.parseRaw] ERROR " <> (cs . show) lang) -- printDebug ("[G.C.T.C.P.D.parseRaw] ERROR " <> (cs . show) lang) text
text Left $ "[G.C.T.C.P.D.parseRaw ERROR] " <> (cs . show) lang <> " :: " <> text
pure ""
getTimeValue :: [ResolvedToken] -> Maybe Value getTimeValue :: [ResolvedToken] -> Maybe Value
getTimeValue rt = case head rt of getTimeValue rt = case head rt of
...@@ -136,15 +138,21 @@ utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time ...@@ -136,15 +138,21 @@ utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time
-- | Local Context which depends on Lang and Time -- | Local Context which depends on Lang and Time
localContext :: Lang -> DucklingTime -> Context localContext :: Lang -> DucklingTime -> Context
localContext lang dt = Context {referenceTime = dt, locale = makeLocale (parserLang lang) Nothing} localContext lang dt = Context { referenceTime = dt
, locale = makeLocale (parserLang lang) Nothing }
-- | Date parser with Duckling defaultDay :: DTC.Day
parseDateWithDuckling :: Lang -> Text -> Options -> IO [ResolvedToken] defaultDay = DTC.fromGregorian 1 1 1
parseDateWithDuckling lang input options = do
contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
-- TODO check/test Options False or True
pure $ analyze input contxt options $ HashSet.fromList [(Seal Time)]
defaultUTCTime :: UTCTime
defaultUTCTime = UTCTime { utctDay = defaultDay
, utctDayTime = secondsToDiffTime 0 }
-- | Date parser with Duckling
parseDateWithDuckling :: Lang -> Text -> Options -> [ResolvedToken]
parseDateWithDuckling lang input options = do
let contxt = localContext lang $ utcToDucklingTime defaultUTCTime
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
-- TODO check/test Options False or True
analyze input contxt options $ HashSet.fromList [(Seal Time)]
...@@ -68,8 +68,7 @@ wikiPageToDocument m wr = do ...@@ -68,8 +68,7 @@ wikiPageToDocument m wr = do
source = Nothing source = Nothing
abstract = Just $ concat $ take m sections abstract = Just $ concat $ take m sections
(date, (year, month, day)) let (date, (year, month, day)) = dateSplit EN $ head
<- dateSplit EN $ head
$ catMaybes $ catMaybes
[ wr ^. wr_yearStart [ wr ^. wr_yearStart
, wr ^. wr_yearEnd , wr ^. wr_yearEnd
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment