Commit f40b051d authored by Alexandre Delanoë's avatar Alexandre Delanoë

[DATE] parser -> UTCTime

parent 18ceac9c
......@@ -169,12 +169,12 @@ executables:
main: Main.hs
source-dirs: bin/gargantext-server
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -O2
- -Wmissing-signatures
- -Wcompat
- -Wmissing-signatures
- -rtsopts
- -threaded
- -with-rtsopts=-N
dependencies:
- base
- containers
......@@ -232,11 +232,12 @@ tests:
main: Main.hs
source-dirs: src-doctest
ghc-options:
- -Werror
- -threaded
- -O2
- -Wcompat
- -Wmissing-signatures
- -rtsopts
- -threaded
- -with-rtsopts=-N
- -Wmissing-signatures
dependencies:
- doctest
- Glob
......
......@@ -25,8 +25,12 @@ module Gargantext.Core
-- - SP == spanish (not implemented yet)
--
-- ... add your language and help us to implement it (:
data Lang = EN | FR -- | DE | SP | CH
-- | All languages supported
-- TODO : DE | SP | CH
data Lang = EN | FR
deriving (Show, Eq, Ord, Bounded, Enum)
allLangs :: [Lang]
allLangs = [minBound ..]
......@@ -19,12 +19,13 @@ please follow the types.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE PackageImports #-}
module Gargantext.Text.Parsers -- (parse, FileFormat(..))
module Gargantext.Text.Parsers (parse, FileFormat(..), clean)
where
import System.FilePath (FilePath(), takeExtension)
import Codec.Archive.Zip (withArchive, getEntry, getEntries)
import "zip" Codec.Archive.Zip (withArchive, getEntry, getEntries)
import Data.Either.Extra (partitionEithers)
import Data.List (concat)
......@@ -49,15 +50,13 @@ import Gargantext.Prelude
import Gargantext.Text.Parsers.WOS (wosParser)
------------------------------------------------------------------------
type ParseError = String
type Field = Text
type Document = DM.Map Field Text
type FilesParsed = DM.Map FilePath FileParsed
data FileParsed = FileParsed { _fileParsed_errors :: Maybe ParseError
, _fileParsed_result :: [Document]
} deriving (Show)
--type Field = Text
--type Document = DM.Map Field Text
--type FilesParsed = DM.Map FilePath FileParsed
--data FileParsed = FileParsed { _fileParsed_errors :: Maybe ParseError
-- , _fileParsed_result :: [Document]
-- } deriving (Show)
-- | According to the format of Input file,
......@@ -85,7 +84,7 @@ parse format path = do
-- | withParser:
-- According the format of the text, choosing the right parser.
-- According to the format of the text, choose the right parser.
-- TODO withParser :: FileFormat -> Parser [Document]
withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]]
withParser WOS = wosParser
......
......@@ -7,61 +7,66 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
According to the language of the text, parseDate1 returns date as Text:
According to the language of the text, parseDateRaw returns date as Text:
TODO : Add some tests
import Gargantext.Parsers.Date as DGP
DGP.parseDate1 DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
import Gargantext.Text.Parsers.Date as DGP
DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers.Date (parseDate1, parseDate, fromRFC3339, parseTimeOfDay, getMultiplicator) where
import Gargantext.Core (Lang(FR,EN))
import Gargantext.Prelude
import Prelude (toInteger, div, otherwise, (++))
--import Gargantext.Types.Main as G
module Gargantext.Text.Parsers.Date (parseDate, parseDateRaw) where
import Data.HashMap.Strict as HM hiding (map)
import Data.Text (Text, unpack, splitOn)
import Data.Time (parseTimeOrError, defaultTimeLocale)
import Data.Time.Clock (UTCTime, getCurrentTime)
import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
import Data.Time.LocalTime (utc)
import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale)
, DucklingTime(DucklingTime)
)
import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
import Duckling.Api (analyze)
import Duckling.Core (makeLocale, Some(This), Dimension(Time))
import qualified Duckling.Core as DC
import Duckling.Types (jsonValue, Entity)
import Duckling.Api (analyze, parse)
import qualified Data.HashSet as HashSet
import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime))
import Duckling.Types (ResolvedToken)
import Duckling.Types (jsonValue)
import Gargantext.Core (Lang(FR,EN))
import Gargantext.Prelude
import qualified Data.Aeson as Json
import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
import Data.Time.Calendar (Day, fromGregorian)
import Data.Fixed (Fixed (MkFixed))
import Data.Foldable (length)
import Data.HashMap.Strict as HM hiding (map)
import qualified Data.HashSet as HashSet
import qualified Duckling.Core as DC
import Control.Monad ((=<<))
import Data.Either (Either)
import Data.String (String)
import Data.Text (Text, unpack)
-- | Unused import (to parse Date Format, keeping it for maybe next steps)
-- import Control.Monad ((=<<))
-- import Data.Either (Either)
-- import Data.Fixed (Fixed (MkFixed))
-- import Data.Foldable (length)
-- import Data.String (String)
-- import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
-- import Data.Time.Calendar (Day, fromGregorian)
-- import Duckling.Debug as DB
-- import Duckling.Engine (parseAndResolve)
-- import Duckling.Rules (rulesFor)
-- import Duckling.Debug as DB
-- import Prelude (toInteger, div, otherwise, (++))
-- import Text.Parsec.Error (ParseError)
-- import Text.Parsec.Prim (Stream, ParsecT)
-- import Text.Parsec.String (Parser)
-- import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
-- import Text.XML.HXT.DOM.Util (decimalStringToInt)
-- import qualified Text.ParserCombinators.Parsec (parse)
------------------------------------------------------------------------
parseDate :: Lang -> Text -> IO UTCTime
parseDate lang s = do
dateStr' <- parseDateRaw lang s
let format = "%Y-%m-%dT%T"
let dateStr = unpack $ maybe "0-0-0T0:0:0" identity
$ head $ splitOn "." dateStr'
pure $ parseTimeOrError True defaultTimeLocale format dateStr
import Duckling.Types (ResolvedToken)
import Safe (headMay)
import Text.Parsec.Error (ParseError)
import Text.Parsec.String (Parser)
import Text.Parsec.Prim (Stream, ParsecT)
import qualified Text.ParserCombinators.Parsec (parse)
import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
import Text.XML.HXT.DOM.Util (decimalStringToInt)
-- TODO add Paris at Duckling.Locale Region datatype
-- | To get Homogeinity of the languages
-- TODO : put this in a more generic place in the source code
......@@ -74,19 +79,18 @@ parserLang EN = DC.EN
-- IO can be avoided here:
-- currentContext :: Lang -> IO Context
-- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
-- parseDate1 :: Context -> Text -> SomeErrorHandling Text
-- parseDateRaw :: Context -> Text -> SomeErrorHandling Text
-- TODO error handling
parseDate1 :: Lang -> Text -> IO Text
parseDate1 lang text = do
parseDateRaw :: Lang -> Text -> IO Text
parseDateRaw lang text = do
maybeJson <- map jsonValue <$> parseDateWithDuckling lang text
case headMay maybeJson of
Just (Json.Object object) -> case HM.lookup "value" object of
Just (Json.String date) -> pure date
Just _ -> panic "ParseDate ERROR: should be a json String"
Nothing -> panic "ParseDate ERROR: no date found"
_ -> panic "ParseDate ERROR: type error"
Just _ -> panic "ParseDateRaw ERROR: should be a json String"
Nothing -> panic "ParseDateRaw ERROR: no date found"
_ -> panic "ParseDateRaw ERROR: type error"
-- | Current Time in DucklingTime format
......@@ -105,70 +109,64 @@ parseDateWithDuckling lang input = do
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
pure $ analyze input contxt $ HashSet.fromList [(This Time)]
parseDate :: Lang -> Text -> IO [Entity]
parseDate lang input = do
context <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
pure $ parse input context [(This Time)]
-- | Permit to transform a String to an Int in a monadic context
wrapDST :: Monad m => String -> m Int
wrapDST = (return . decimalStringToInt)
--wrapDST :: Monad m => String -> m Int
--wrapDST = (return . decimalStringToInt)
-- | Generic parser which take at least one element not given in argument
many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
many1NoneOf = (many1 . noneOf)
--many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
--many1NoneOf = (many1 . noneOf)
getMultiplicator :: Int -> Int
getMultiplicator a
| 0 >= a = 1
| otherwise = 10 * (getMultiplicator $ div a 10)
--getMultiplicator :: Int -> Int
--getMultiplicator a
-- | 0 >= a = 1
-- | otherwise = 10 * (getMultiplicator $ div a 10)
-- | Parser for date format y-m-d
parseGregorian :: Parser Day
parseGregorian = do
y <- wrapDST =<< many1NoneOf ['-']
_ <- char '-'
m <- wrapDST =<< many1NoneOf ['-']
_ <- char '-'
d <- wrapDST =<< many1NoneOf ['T']
_ <- char 'T'
return $ fromGregorian (toInteger y) m d
-- | Parser for time format h:m:s
parseTimeOfDay :: Parser TimeOfDay
parseTimeOfDay = do
h <- wrapDST =<< many1NoneOf [':']
_ <- char ':'
m <- wrapDST =<< many1NoneOf [':']
_ <- char ':'
r <- many1NoneOf ['.']
_ <- char '.'
dec <- many1NoneOf ['+', '-']
let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
seconds = nb * 10^(12-l)
return $ TimeOfDay h m (MkFixed . toInteger $ seconds)
--parseGregorian :: Parser Day
--parseGregorian = do
-- y <- wrapDST =<< many1NoneOf ['-']
-- _ <- char '-'
-- m <- wrapDST =<< many1NoneOf ['-']
-- _ <- char '-'
-- d <- wrapDST =<< many1NoneOf ['T']
-- _ <- char 'T'
-- return $ fromGregorian (toInteger y) m d
--
---- | Parser for time format h:m:s
--parseTimeOfDay :: Parser TimeOfDay
--parseTimeOfDay = do
-- h <- wrapDST =<< many1NoneOf [':']
-- _ <- char ':'
-- m <- wrapDST =<< many1NoneOf [':']
-- _ <- char ':'
-- r <- many1NoneOf ['.']
-- _ <- char '.'
-- dec <- many1NoneOf ['+', '-']
-- let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
-- seconds = nb * 10^(12-l)
-- return $ TimeOfDay h m (MkFixed . toInteger $ seconds)
--
--
-- | Parser for timezone format +hh:mm
parseTimeZone :: Parser TimeZone
parseTimeZone = do
sign <- oneOf ['+', '-']
h <- wrapDST =<< many1NoneOf [':']
_ <- char ':'
m <- wrapDST =<< (many1 $ anyChar)
let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
in return $ TimeZone timeInMinute False "CET"
-- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
parseZonedTime :: Parser ZonedTime
parseZonedTime= do
d <- parseGregorian
tod <- parseTimeOfDay
tz <- parseTimeZone
return $ ZonedTime (LocalTime d (tod)) tz
-- | Opposite of toRFC3339
fromRFC3339 :: Text -> Either ParseError ZonedTime
fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
where input = unpack t
--parseTimeZone :: Parser TimeZone
--parseTimeZone = do
-- sign <- oneOf ['+', '-']
-- h <- wrapDST =<< many1NoneOf [':']
-- _ <- char ':'
-- m <- wrapDST =<< (many1 $ anyChar)
-- let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
-- in return $ TimeZone timeInMinute False "CET"
--
---- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
--parseZonedTime :: Parser ZonedTime
--parseZonedTime= do
-- d <- parseGregorian
-- tod <- parseTimeOfDay
-- tz <- parseTimeZone
-- return $ ZonedTime (LocalTime d (tod)) tz
--
---- | Opposite of toRFC3339
--fromRFC3339 :: Text -> Either ParseError ZonedTime
--fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
-- where input = unpack t
......@@ -27,7 +27,7 @@ import Data.Text as T
import Data.Either
-- | Use case
-- >>> :{
-- :{
-- wikimediaFile <- BL.readFile "text.xml"
-- _ <- runConduit $ parseLBS def wikimediaFile
-- .| force "mediawiki required" parseMediawiki
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment