Commit f40b051d authored by Alexandre Delanoë's avatar Alexandre Delanoë

[DATE] parser -> UTCTime

parent 18ceac9c
...@@ -169,12 +169,12 @@ executables: ...@@ -169,12 +169,12 @@ executables:
main: Main.hs main: Main.hs
source-dirs: bin/gargantext-server source-dirs: bin/gargantext-server
ghc-options: ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -O2 - -O2
- -Wmissing-signatures
- -Wcompat - -Wcompat
- -Wmissing-signatures
- -rtsopts
- -threaded
- -with-rtsopts=-N
dependencies: dependencies:
- base - base
- containers - containers
...@@ -232,11 +232,12 @@ tests: ...@@ -232,11 +232,12 @@ tests:
main: Main.hs main: Main.hs
source-dirs: src-doctest source-dirs: src-doctest
ghc-options: ghc-options:
- -Werror - -O2
- -threaded - -Wcompat
- -Wmissing-signatures
- -rtsopts - -rtsopts
- -threaded
- -with-rtsopts=-N - -with-rtsopts=-N
- -Wmissing-signatures
dependencies: dependencies:
- doctest - doctest
- Glob - Glob
......
...@@ -25,8 +25,12 @@ module Gargantext.Core ...@@ -25,8 +25,12 @@ module Gargantext.Core
-- - SP == spanish (not implemented yet) -- - SP == spanish (not implemented yet)
-- --
-- ... add your language and help us to implement it (: -- ... add your language and help us to implement it (:
data Lang = EN | FR -- | DE | SP | CH
-- | All languages supported
-- TODO : DE | SP | CH
data Lang = EN | FR
deriving (Show, Eq, Ord, Bounded, Enum) deriving (Show, Eq, Ord, Bounded, Enum)
allLangs :: [Lang] allLangs :: [Lang]
allLangs = [minBound ..] allLangs = [minBound ..]
...@@ -19,12 +19,13 @@ please follow the types. ...@@ -19,12 +19,13 @@ please follow the types.
-} -}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE PackageImports #-}
module Gargantext.Text.Parsers -- (parse, FileFormat(..)) module Gargantext.Text.Parsers (parse, FileFormat(..), clean)
where where
import System.FilePath (FilePath(), takeExtension) import System.FilePath (FilePath(), takeExtension)
import Codec.Archive.Zip (withArchive, getEntry, getEntries) import "zip" Codec.Archive.Zip (withArchive, getEntry, getEntries)
import Data.Either.Extra (partitionEithers) import Data.Either.Extra (partitionEithers)
import Data.List (concat) import Data.List (concat)
...@@ -49,15 +50,13 @@ import Gargantext.Prelude ...@@ -49,15 +50,13 @@ import Gargantext.Prelude
import Gargantext.Text.Parsers.WOS (wosParser) import Gargantext.Text.Parsers.WOS (wosParser)
------------------------------------------------------------------------ ------------------------------------------------------------------------
type ParseError = String type ParseError = String
type Field = Text --type Field = Text
type Document = DM.Map Field Text --type Document = DM.Map Field Text
--type FilesParsed = DM.Map FilePath FileParsed
type FilesParsed = DM.Map FilePath FileParsed --data FileParsed = FileParsed { _fileParsed_errors :: Maybe ParseError
data FileParsed = FileParsed { _fileParsed_errors :: Maybe ParseError -- , _fileParsed_result :: [Document]
, _fileParsed_result :: [Document] -- } deriving (Show)
} deriving (Show)
-- | According to the format of Input file, -- | According to the format of Input file,
...@@ -85,7 +84,7 @@ parse format path = do ...@@ -85,7 +84,7 @@ parse format path = do
-- | withParser: -- | withParser:
-- According the format of the text, choosing the right parser. -- According to the format of the text, choose the right parser.
-- TODO withParser :: FileFormat -> Parser [Document] -- TODO withParser :: FileFormat -> Parser [Document]
withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]] withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]]
withParser WOS = wosParser withParser WOS = wosParser
......
...@@ -7,61 +7,66 @@ Maintainer : team@gargantext.org ...@@ -7,61 +7,66 @@ Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
According to the language of the text, parseDate1 returns date as Text: According to the language of the text, parseDateRaw returns date as Text:
TODO : Add some tests TODO : Add some tests
import Gargantext.Parsers.Date as DGP import Gargantext.Text.Parsers.Date as DGP
DGP.parseDate1 DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00" DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
-} -}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers.Date (parseDate1, parseDate, fromRFC3339, parseTimeOfDay, getMultiplicator) where module Gargantext.Text.Parsers.Date (parseDate, parseDateRaw) where
import Gargantext.Core (Lang(FR,EN))
import Gargantext.Prelude
import Prelude (toInteger, div, otherwise, (++))
--import Gargantext.Types.Main as G
import Data.HashMap.Strict as HM hiding (map)
import Data.Text (Text, unpack, splitOn)
import Data.Time (parseTimeOrError, defaultTimeLocale)
import Data.Time.Clock (UTCTime, getCurrentTime) import Data.Time.Clock (UTCTime, getCurrentTime)
import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
import Data.Time.LocalTime (utc) import Data.Time.LocalTime (utc)
import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale) import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
, DucklingTime(DucklingTime) import Duckling.Api (analyze)
)
import Duckling.Core (makeLocale, Some(This), Dimension(Time)) import Duckling.Core (makeLocale, Some(This), Dimension(Time))
import qualified Duckling.Core as DC import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime))
import Duckling.Types (jsonValue, Entity) import Duckling.Types (ResolvedToken)
import Duckling.Types (jsonValue)
import Duckling.Api (analyze, parse) import Gargantext.Core (Lang(FR,EN))
import qualified Data.HashSet as HashSet import Gargantext.Prelude
import qualified Data.Aeson as Json import qualified Data.Aeson as Json
import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..)) import qualified Data.HashSet as HashSet
import Data.Time.Calendar (Day, fromGregorian) import qualified Duckling.Core as DC
import Data.Fixed (Fixed (MkFixed))
import Data.Foldable (length)
import Data.HashMap.Strict as HM hiding (map)
import Control.Monad ((=<<)) -- | Unused import (to parse Date Format, keeping it for maybe next steps)
import Data.Either (Either) -- import Control.Monad ((=<<))
import Data.String (String) -- import Data.Either (Either)
import Data.Text (Text, unpack) -- import Data.Fixed (Fixed (MkFixed))
-- import Data.Foldable (length)
-- import Data.String (String)
-- import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
-- import Data.Time.Calendar (Day, fromGregorian)
-- import Duckling.Debug as DB
-- import Duckling.Engine (parseAndResolve) -- import Duckling.Engine (parseAndResolve)
-- import Duckling.Rules (rulesFor) -- import Duckling.Rules (rulesFor)
-- import Duckling.Debug as DB -- import Prelude (toInteger, div, otherwise, (++))
-- import Text.Parsec.Error (ParseError)
-- import Text.Parsec.Prim (Stream, ParsecT)
-- import Text.Parsec.String (Parser)
-- import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
-- import Text.XML.HXT.DOM.Util (decimalStringToInt)
-- import qualified Text.ParserCombinators.Parsec (parse)
------------------------------------------------------------------------
parseDate :: Lang -> Text -> IO UTCTime
parseDate lang s = do
dateStr' <- parseDateRaw lang s
let format = "%Y-%m-%dT%T"
let dateStr = unpack $ maybe "0-0-0T0:0:0" identity
$ head $ splitOn "." dateStr'
pure $ parseTimeOrError True defaultTimeLocale format dateStr
import Duckling.Types (ResolvedToken)
import Safe (headMay)
import Text.Parsec.Error (ParseError)
import Text.Parsec.String (Parser)
import Text.Parsec.Prim (Stream, ParsecT)
import qualified Text.ParserCombinators.Parsec (parse)
import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
import Text.XML.HXT.DOM.Util (decimalStringToInt)
-- TODO add Paris at Duckling.Locale Region datatype -- TODO add Paris at Duckling.Locale Region datatype
-- | To get Homogeinity of the languages -- | To get Homogeinity of the languages
-- TODO : put this in a more generic place in the source code -- TODO : put this in a more generic place in the source code
...@@ -74,19 +79,18 @@ parserLang EN = DC.EN ...@@ -74,19 +79,18 @@ parserLang EN = DC.EN
-- IO can be avoided here: -- IO can be avoided here:
-- currentContext :: Lang -> IO Context -- currentContext :: Lang -> IO Context
-- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
-- parseDate1 :: Context -> Text -> SomeErrorHandling Text -- parseDateRaw :: Context -> Text -> SomeErrorHandling Text
-- TODO error handling -- TODO error handling
parseDate1 :: Lang -> Text -> IO Text parseDateRaw :: Lang -> Text -> IO Text
parseDate1 lang text = do parseDateRaw lang text = do
maybeJson <- map jsonValue <$> parseDateWithDuckling lang text maybeJson <- map jsonValue <$> parseDateWithDuckling lang text
case headMay maybeJson of case headMay maybeJson of
Just (Json.Object object) -> case HM.lookup "value" object of Just (Json.Object object) -> case HM.lookup "value" object of
Just (Json.String date) -> pure date Just (Json.String date) -> pure date
Just _ -> panic "ParseDate ERROR: should be a json String" Just _ -> panic "ParseDateRaw ERROR: should be a json String"
Nothing -> panic "ParseDate ERROR: no date found" Nothing -> panic "ParseDateRaw ERROR: no date found"
_ -> panic "ParseDate ERROR: type error" _ -> panic "ParseDateRaw ERROR: type error"
-- | Current Time in DucklingTime format -- | Current Time in DucklingTime format
...@@ -105,70 +109,64 @@ parseDateWithDuckling lang input = do ...@@ -105,70 +109,64 @@ parseDateWithDuckling lang input = do
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
pure $ analyze input contxt $ HashSet.fromList [(This Time)] pure $ analyze input contxt $ HashSet.fromList [(This Time)]
parseDate :: Lang -> Text -> IO [Entity]
parseDate lang input = do
context <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
pure $ parse input context [(This Time)]
-- | Permit to transform a String to an Int in a monadic context -- | Permit to transform a String to an Int in a monadic context
wrapDST :: Monad m => String -> m Int --wrapDST :: Monad m => String -> m Int
wrapDST = (return . decimalStringToInt) --wrapDST = (return . decimalStringToInt)
-- | Generic parser which take at least one element not given in argument -- | Generic parser which take at least one element not given in argument
many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char] --many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
many1NoneOf = (many1 . noneOf) --many1NoneOf = (many1 . noneOf)
getMultiplicator :: Int -> Int --getMultiplicator :: Int -> Int
getMultiplicator a --getMultiplicator a
| 0 >= a = 1 -- | 0 >= a = 1
| otherwise = 10 * (getMultiplicator $ div a 10) -- | otherwise = 10 * (getMultiplicator $ div a 10)
-- | Parser for date format y-m-d -- | Parser for date format y-m-d
parseGregorian :: Parser Day --parseGregorian :: Parser Day
parseGregorian = do --parseGregorian = do
y <- wrapDST =<< many1NoneOf ['-'] -- y <- wrapDST =<< many1NoneOf ['-']
_ <- char '-' -- _ <- char '-'
m <- wrapDST =<< many1NoneOf ['-'] -- m <- wrapDST =<< many1NoneOf ['-']
_ <- char '-' -- _ <- char '-'
d <- wrapDST =<< many1NoneOf ['T'] -- d <- wrapDST =<< many1NoneOf ['T']
_ <- char 'T' -- _ <- char 'T'
return $ fromGregorian (toInteger y) m d -- return $ fromGregorian (toInteger y) m d
--
-- | Parser for time format h:m:s ---- | Parser for time format h:m:s
parseTimeOfDay :: Parser TimeOfDay --parseTimeOfDay :: Parser TimeOfDay
parseTimeOfDay = do --parseTimeOfDay = do
h <- wrapDST =<< many1NoneOf [':'] -- h <- wrapDST =<< many1NoneOf [':']
_ <- char ':' -- _ <- char ':'
m <- wrapDST =<< many1NoneOf [':'] -- m <- wrapDST =<< many1NoneOf [':']
_ <- char ':' -- _ <- char ':'
r <- many1NoneOf ['.'] -- r <- many1NoneOf ['.']
_ <- char '.' -- _ <- char '.'
dec <- many1NoneOf ['+', '-'] -- dec <- many1NoneOf ['+', '-']
let (nb, l) = (decimalStringToInt $ r ++ dec, length dec) -- let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
seconds = nb * 10^(12-l) -- seconds = nb * 10^(12-l)
return $ TimeOfDay h m (MkFixed . toInteger $ seconds) -- return $ TimeOfDay h m (MkFixed . toInteger $ seconds)
--
--
-- | Parser for timezone format +hh:mm -- | Parser for timezone format +hh:mm
parseTimeZone :: Parser TimeZone --parseTimeZone :: Parser TimeZone
parseTimeZone = do --parseTimeZone = do
sign <- oneOf ['+', '-'] -- sign <- oneOf ['+', '-']
h <- wrapDST =<< many1NoneOf [':'] -- h <- wrapDST =<< many1NoneOf [':']
_ <- char ':' -- _ <- char ':'
m <- wrapDST =<< (many1 $ anyChar) -- m <- wrapDST =<< (many1 $ anyChar)
let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m -- let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
in return $ TimeZone timeInMinute False "CET" -- in return $ TimeZone timeInMinute False "CET"
--
-- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime ---- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
parseZonedTime :: Parser ZonedTime --parseZonedTime :: Parser ZonedTime
parseZonedTime= do --parseZonedTime= do
d <- parseGregorian -- d <- parseGregorian
tod <- parseTimeOfDay -- tod <- parseTimeOfDay
tz <- parseTimeZone -- tz <- parseTimeZone
return $ ZonedTime (LocalTime d (tod)) tz -- return $ ZonedTime (LocalTime d (tod)) tz
--
-- | Opposite of toRFC3339 ---- | Opposite of toRFC3339
fromRFC3339 :: Text -> Either ParseError ZonedTime --fromRFC3339 :: Text -> Either ParseError ZonedTime
fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input --fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
where input = unpack t -- where input = unpack t
...@@ -27,7 +27,7 @@ import Data.Text as T ...@@ -27,7 +27,7 @@ import Data.Text as T
import Data.Either import Data.Either
-- | Use case -- | Use case
-- >>> :{ -- :{
-- wikimediaFile <- BL.readFile "text.xml" -- wikimediaFile <- BL.readFile "text.xml"
-- _ <- runConduit $ parseLBS def wikimediaFile -- _ <- runConduit $ parseLBS def wikimediaFile
-- .| force "mediawiki required" parseMediawiki -- .| force "mediawiki required" parseMediawiki
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment