Date.hs 6.88 KB
Newer Older
1
{-|
2
Module      : Gargantext.Text.Parsers.Date
3 4 5 6 7 8 9
Description : Some utils to parse dates
Copyright   : (c) CNRS 2017-present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

10
According to the language of the text, parseDateRaw returns date as Text:
11 12

TODO : Add some tests
13 14
import Gargantext.Text.Parsers.Date as DGP
DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
15 16
-}

17
{-# LANGUAGE FlexibleContexts  #-}
18
{-# LANGUAGE NoImplicitPrelude #-}
19
{-# LANGUAGE OverloadedStrings #-}
20

21
module Gargantext.Text.Parsers.Date (parseDate, parseDateRaw) where
22

23
import Data.HashMap.Strict as HM hiding (map)
24
import Data.Text (Text, unpack, splitOn, pack)
25
import Data.Time (parseTimeOrError, defaultTimeLocale)
26 27
import Data.Time.Clock (UTCTime, getCurrentTime)
import Data.Time.LocalTime (utc)
28 29
import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
import Duckling.Api (analyze)
30
import Duckling.Core (makeLocale, Some(This), Dimension(Time))
31 32 33 34 35
import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime))
import Duckling.Types (ResolvedToken)
import Duckling.Types (jsonValue)
import Gargantext.Core (Lang(FR,EN))
import Gargantext.Prelude
36
import qualified Data.Aeson   as Json
37 38
import qualified Data.HashSet as HashSet
import qualified Duckling.Core as DC
39

40 41 42 43 44 45 46 47 48
-- | Unused import (to parse Date Format, keeping it for maybe next steps)
-- import Control.Monad ((=<<))
-- import Data.Either (Either)
-- import Data.Fixed (Fixed (MkFixed))
-- import Data.Foldable (length)
-- import Data.String (String)
-- import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
-- import Data.Time.Calendar (Day, fromGregorian)
-- import Duckling.Debug as DB
49 50
-- import Duckling.Engine (parseAndResolve)
-- import Duckling.Rules (rulesFor)
51 52 53 54 55 56 57 58 59
-- import Prelude (toInteger, div, otherwise, (++))
-- import Text.Parsec.Error (ParseError)
-- import Text.Parsec.Prim (Stream, ParsecT)
-- import Text.Parsec.String (Parser)
-- import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
-- import Text.XML.HXT.DOM.Util (decimalStringToInt)
-- import qualified Text.ParserCombinators.Parsec (parse)

------------------------------------------------------------------------
60 61 62 63 64 65
-- | Date Parser
-- Parses dates mentions in full text given the language.
-- >>> parseDate FR (pack "10 avril 1979 à 19H")
-- 1979-04-10 19:00:00 UTC
-- >>> parseDate EN (pack "April 10 1979")
-- 1979-04-10 00:00:00 UTC
66 67 68 69 70 71 72
parseDate :: Lang -> Text -> IO UTCTime
parseDate lang s = do
  dateStr' <- parseDateRaw lang s
  let format  = "%Y-%m-%dT%T"
  let dateStr = unpack $ maybe "0-0-0T0:0:0" identity
                       $ head $ splitOn "." dateStr'
  pure $ parseTimeOrError True defaultTimeLocale format dateStr
73 74 75 76 77 78



-- TODO add Paris at Duckling.Locale Region datatype
-- | To get Homogeinity of the languages
--   TODO : put this in a more generic place in the source code
79 80 81
parserLang :: Lang -> DC.Lang
parserLang FR = DC.FR
parserLang EN = DC.EN
82
-- parserLang _  = panic "not implemented"
83 84 85 86 87

-- | Final Date parser API
-- IO can be avoided here:
-- currentContext :: Lang -> IO Context
-- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
88
-- parseDateRaw :: Context -> Text -> SomeErrorHandling Text
89

90
-- TODO error handling
91
parseDateRaw :: Lang -> Text -> IO (Text)
92
parseDateRaw lang text = do
93
    maybeJson <- map jsonValue <$> parseDateWithDuckling lang text
94 95 96
    case headMay maybeJson of
      Just (Json.Object object) -> case HM.lookup "value" object of
                                     Just (Json.String date) -> pure date
97
                                     Just _                  -> panic "ParseDateRaw ERROR: should be a json String"
98 99 100
                                     Nothing                 -> panic $ "ParseDateRaw ERROR: no date found" <> (pack . show) lang <> " " <> text

      _                         -> panic $ "ParseDateRaw ERROR: type error" <> (pack . show) lang <> " " <> text
101 102 103 104 105 106 107 108 109


-- | Current Time in DucklingTime format
-- TODO : get local Time in a more generic way
utcToDucklingTime :: UTCTime -> DucklingTime
utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc

-- | Local Context which depends on Lang and Time
localContext :: Lang -> DucklingTime -> Context
110
localContext lang dt = Context {referenceTime = dt, locale = makeLocale (parserLang lang) Nothing}
111 112 113 114 115 116 117 118

-- | Date parser with Duckling
parseDateWithDuckling :: Lang -> Text -> IO [ResolvedToken]
parseDateWithDuckling lang input = do
    contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
    --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
    pure $ analyze input contxt $ HashSet.fromList [(This Time)]

Mael NICOLAS's avatar
Mael NICOLAS committed
119
-- | Permit to transform a String to an Int in a monadic context
120 121
--wrapDST :: Monad m => String -> m Int
--wrapDST = (return . decimalStringToInt)
122

Mael NICOLAS's avatar
Mael NICOLAS committed
123
-- | Generic parser which take at least one element not given in argument
124 125
--many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
--many1NoneOf = (many1 . noneOf)
126

127 128 129 130
--getMultiplicator :: Int -> Int
--getMultiplicator a
--  | 0 >= a = 1
--  | otherwise = 10 * (getMultiplicator $ div a 10)
131

Mael NICOLAS's avatar
Mael NICOLAS committed
132
-- | Parser for date format y-m-d
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
--parseGregorian :: Parser Day
--parseGregorian  = do
--        y <- wrapDST =<< many1NoneOf ['-']
--        _ <- char '-'
--        m <- wrapDST =<< many1NoneOf ['-']
--        _ <- char '-'
--        d <- wrapDST =<< many1NoneOf ['T']
--        _ <- char 'T'
--        return $ fromGregorian (toInteger y) m d
--
---- | Parser for time format h:m:s
--parseTimeOfDay :: Parser TimeOfDay
--parseTimeOfDay = do
--        h <- wrapDST =<< many1NoneOf [':']
--        _ <- char ':'
--        m <- wrapDST =<< many1NoneOf [':']
--        _ <- char ':'
--        r <- many1NoneOf ['.']
--        _ <- char '.'
--        dec <- many1NoneOf ['+', '-']
--        let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
--            seconds = nb * 10^(12-l)
--        return $ TimeOfDay h m (MkFixed . toInteger $ seconds)
--
--
Mael NICOLAS's avatar
Mael NICOLAS committed
158
-- | Parser for timezone format +hh:mm
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
--parseTimeZone :: Parser TimeZone
--parseTimeZone = do
--        sign <- oneOf ['+', '-']
--        h <- wrapDST =<< many1NoneOf [':']
--        _ <- char ':'
--        m <- wrapDST =<< (many1 $ anyChar)
--        let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
--         in return $ TimeZone timeInMinute False "CET"
--
---- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
--parseZonedTime :: Parser ZonedTime
--parseZonedTime= do
--        d <- parseGregorian
--        tod <- parseTimeOfDay
--        tz <- parseTimeZone
--        return $ ZonedTime (LocalTime d (tod)) tz
--
---- | Opposite of toRFC3339
--fromRFC3339 :: Text -> Either ParseError ZonedTime
--fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
--        where input = unpack t