Parsec.hs 3.12 KB
Newer Older
1
{-|
2
Module      : Gargantext.Text.Corpus.Parsers.Date
3 4 5 6 7 8 9 10 11 12 13 14 15
Description : Some utils to parse dates
Copyright   : (c) CNRS 2017-present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}

{-# LANGUAGE FlexibleContexts  #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}

16
module Gargantext.Text.Corpus.Parsers.Date.Parsec
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
  where

import Control.Monad ((=<<))
import Data.Either (Either)
import Data.Fixed (Fixed (MkFixed))
import Data.Foldable (length)
import Data.String (String)
import Data.Text (Text, unpack)
import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
import Data.Time.Calendar (Day, fromGregorian)
import Gargantext.Prelude
import Prelude (toInteger, div, otherwise, (++))
import Text.Parsec.Error (ParseError)
import Text.Parsec.Prim (Stream, ParsecT)
import Text.Parsec.String (Parser)
import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
import Text.XML.HXT.DOM.Util (decimalStringToInt)
import qualified Text.ParserCombinators.Parsec (parse)

-- | Permit to transform a String to an Int in a monadic context
wrapDST :: Monad m => String -> m Int
wrapDST = return . decimalStringToInt

-- | Generic parser which take at least one element not given in argument
many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
many1NoneOf = (many1 . noneOf)

getMultiplicator :: Int -> Int
getMultiplicator a
  | 0 >= a = 1
  | otherwise = 10 * (getMultiplicator $ div a 10)

-- | Parser for date format y-m-d
parseGregorian :: Parser Day
parseGregorian  = do
        y <- wrapDST =<< many1NoneOf ['-']
        _ <- char '-'
        m <- wrapDST =<< many1NoneOf ['-']
        _ <- char '-'
        d <- wrapDST =<< many1NoneOf ['T']
        _ <- char 'T'
        return $ fromGregorian (toInteger y) m d

---- | Parser for time format h:m:s
parseTimeOfDay :: Parser TimeOfDay
parseTimeOfDay = do
        h <- wrapDST =<< many1NoneOf [':']
        _ <- char ':'
        m <- wrapDST =<< many1NoneOf [':']
        _ <- char ':'
        r <- many1NoneOf ['.']
        _ <- char '.'
        dec <- many1NoneOf ['+', '-']
        let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
            seconds = nb * 10^(12-l)
        return $ TimeOfDay h m (MkFixed . toInteger $ seconds)


-- | Parser for timezone format +hh:mm
parseTimeZone :: Parser TimeZone
parseTimeZone = do
        sign <- oneOf ['+', '-']
        h <- wrapDST =<< many1NoneOf [':']
        _ <- char ':'
        m <- wrapDST =<< (many1 $ anyChar)
        let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
         in return $ TimeZone timeInMinute False "CET"

---- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
parseZonedTime :: Parser ZonedTime
parseZonedTime= do
        d <- parseGregorian
        tod <- parseTimeOfDay
        tz <- parseTimeZone
        return $ ZonedTime (LocalTime d (tod)) tz

---- | Opposite of toRFC3339
fromRFC3339 :: Text -> Either ParseError ZonedTime
fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
        where input = unpack t