Commit 5c1b33ff authored by Alexandre Delanoë's avatar Alexandre Delanoë

[PARSER] Date + WOS parser.

parent e887a14c
module Main where module Main where
import Hastext.Db import Gargantext.Parser.Wos (parseWos)
main :: IO () main :: IO ()
main = fonction main = parseWos "/tmp/DeepNeuralNetworkFull.zip"
-- This file has been generated from package.yaml by hpack version 0.18.1. -- This file has been generated from package.yaml by hpack version 0.20.0.
-- --
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
--
-- hash: d9ae37baf58628321e1cf53c125f895c9fd3ff19c03fdfaa1ca9b7754fecabf9
name: gargantext name: gargantext
version: 0.1.0.0 version: 0.1.0.0
...@@ -21,28 +23,29 @@ library ...@@ -21,28 +23,29 @@ library
hs-source-dirs: hs-source-dirs:
src src
build-depends: build-depends:
extra aeson
, text , aeson-lens
, base >=4.7 && <5
, aeson
, attoparsec
, async , async
, attoparsec
, base >=4.7 && <5
, base16-bytestring , base16-bytestring
, bytestring , bytestring
, case-insensitive , case-insensitive
, containers
, contravariant
, conduit , conduit
, conduit-extra , conduit-extra
, containers
, contravariant
, directory , directory
, duckling , duckling
, extra
, filepath , filepath
, http-conduit , http-conduit
, lens , lens
, logging-effect , logging-effect
, opaleye , opaleye
, path
, parsec , parsec
, path
, path-io
, postgresql-simple , postgresql-simple
, pretty , pretty
, product-profunctors , product-profunctors
...@@ -50,6 +53,7 @@ library ...@@ -50,6 +53,7 @@ library
, protolude , protolude
, pureMD5 , pureMD5
, regex-compat , regex-compat
, safe
, semigroups , semigroups
, servant , servant
, servant-client , servant-client
...@@ -57,10 +61,11 @@ library ...@@ -57,10 +61,11 @@ library
, servant-server , servant-server
, split , split
, tagsoup , tagsoup
, text
, text-metrics , text-metrics
, time , time
, timezone-series
, time-locale-compat , time-locale-compat
, timezone-series
, transformers , transformers
, unordered-containers , unordered-containers
, uuid , uuid
...@@ -68,9 +73,8 @@ library ...@@ -68,9 +73,8 @@ library
, wai , wai
, warp , warp
, yaml , yaml
, zlib
, zip , zip
, path-io , zlib
exposed-modules: exposed-modules:
Data.Gargantext Data.Gargantext
Data.Gargantext.Analysis Data.Gargantext.Analysis
...@@ -118,13 +122,15 @@ test-suite garg-doctest ...@@ -118,13 +122,15 @@ test-suite garg-doctest
src-doctest src-doctest
ghc-options: -Wall -Werror -threaded -rtsopts -with-rtsopts=-N ghc-options: -Wall -Werror -threaded -rtsopts -with-rtsopts=-N
build-depends: build-depends:
extra Glob
, text
, doctest
, Glob
, QuickCheck , QuickCheck
, base , base
, doctest
, extra
, gargantext , gargantext
, text
other-modules:
Paths_gargantext
default-language: Haskell2010 default-language: Haskell2010
test-suite garg-test test-suite garg-test
...@@ -134,12 +140,12 @@ test-suite garg-test ...@@ -134,12 +140,12 @@ test-suite garg-test
src-test src-test
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
build-depends: build-depends:
extra QuickCheck
, text
, base , base
, extra
, gargantext , gargantext
, hspec , hspec
, QuickCheck , text
other-modules: other-modules:
Ngrams.Lang Ngrams.Lang
Ngrams.Lang.En Ngrams.Lang.En
...@@ -147,4 +153,5 @@ test-suite garg-test ...@@ -147,4 +153,5 @@ test-suite garg-test
Ngrams.Lang.Occurrences Ngrams.Lang.Occurrences
Ngrams.Metrics Ngrams.Metrics
Parsers.WOS Parsers.WOS
Paths_gargantext
default-language: Haskell2010 default-language: Haskell2010
...@@ -57,6 +57,7 @@ library: ...@@ -57,6 +57,7 @@ library:
dependencies: dependencies:
- base >=4.7 && <5 - base >=4.7 && <5
- aeson - aeson
- aeson-lens
- attoparsec - attoparsec
- async - async
- base16-bytestring - base16-bytestring
...@@ -82,6 +83,7 @@ library: ...@@ -82,6 +83,7 @@ library:
- protolude - protolude
- pureMD5 - pureMD5
- regex-compat - regex-compat
- safe
- semigroups - semigroups
- servant - servant
- servant-client - servant-client
......
module Data.Gargantext.Parsers.Date where {-|
Module : Data.Gargantext.Parsers.Date
Description : Some utils to parse dates
Copyright : (c) CNRS 2017
License : AGPL + CECILL v3
Maintainer : alexandre.delanoe@iscpif.fr
Stability : experimental
Portability : POSIX
According to the language of the text, parseDate1 returns date as Text:
TODO : Add some tests
import Data.Gargantext.Parsers as DGP
DGP.parseDate1 DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
-}
module Data.Gargantext.Parsers.Date (parseDate1, Lang(FR, EN)) where
import Data.Gargantext.Prelude
import qualified Data.Gargantext.Types.Main as G
import Data.Time.Clock (UTCTime, getCurrentTime) import Data.Time.Clock (UTCTime, getCurrentTime)
import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime) import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
...@@ -6,28 +26,62 @@ import Data.Time.LocalTime (utc) ...@@ -6,28 +26,62 @@ import Data.Time.LocalTime (utc)
import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale) import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale)
, DucklingTime(DucklingTime) , DucklingTime(DucklingTime)
) )
--import Duckling.Core (makeLocale, Lang(FR,EN), Some(This), Dimension(Time)) import Duckling.Core (makeLocale, Lang(FR,EN), Some(This), Dimension(Time))
import Duckling.Core (makeLocale, Lang(), Some(This), Dimension(Time)) import Duckling.Types (jsonValue)
--import qualified Duckling.Core as DC
import Duckling.Api (analyze) import Duckling.Api (analyze)
import qualified Data.HashSet as HashSet import qualified Data.HashSet as HashSet
import qualified Data.Aeson as Json
import Data.HashMap.Strict as HM
import Data.Text (Text) import Data.Text (Text)
-- import Duckling.Engine (parseAndResolve) -- import Duckling.Engine (parseAndResolve)
-- import Duckling.Rules (rulesFor) -- import Duckling.Rules (rulesFor)
-- import Duckling.Debug as DB -- import Duckling.Debug as DB
import Safe (headMay)
import Duckling.Types (ResolvedToken) import Duckling.Types (ResolvedToken)
-- TODO add Paris at Duckling.Locale Region datatype -- TODO add Paris at Duckling.Locale Region datatype
-- | To get Homogeinity of the languages
-- TODO : put this in a more generic place in the source code
parserLang :: G.Language -> Lang
parserLang G.FR = FR
parserLang G.EN = EN
-- | Final Date parser API
parseDate1 :: Lang -> Text -> IO Text
parseDate1 lang text = do
maybeJson <- pm jsonValue <$> parseDateWithDuckling lang text
case headMay maybeJson of
Just (Json.Object object) -> case HM.lookup "value" object of
Just (Json.String date) -> pure date
Just _ -> error "ERROR: should be a json String"
Nothing -> error "No date found"
Just _ -> error "ERROR: should be a json Object"
Nothing -> pure "No date found"
-- | Current Time in DucklingTime format
-- TODO : get local Time in a more generic way
utcToDucklingTime :: UTCTime -> DucklingTime utcToDucklingTime :: UTCTime -> DucklingTime
utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
-- | Local Context which depends on Lang and Time
localContext :: Lang -> DucklingTime -> Context localContext :: Lang -> DucklingTime -> Context
localContext lang dt = Context {referenceTime = dt, locale = makeLocale lang Nothing} localContext lang dt = Context {referenceTime = dt, locale = makeLocale lang Nothing}
parseDate :: Lang -> Text -> IO [ResolvedToken] -- | Date parser with Duckling
parseDate lang input = do parseDateWithDuckling :: Lang -> Text -> IO [ResolvedToken]
parseDateWithDuckling lang input = do
ctx <- localContext lang <$> utcToDucklingTime <$> getCurrentTime ctx <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
pure $ analyze input ctx $ HashSet.fromList [(This Time)] pure $ analyze input ctx $ HashSet.fromList [(This Time)]
...@@ -50,7 +50,6 @@ endNotice = manyTill anyChar (string $ pack "\nER\n") ...@@ -50,7 +50,6 @@ endNotice = manyTill anyChar (string $ pack "\nER\n")
startNotice :: Parser ByteString startNotice :: Parser ByteString
startNotice = "\nPT " *> takeTill isEndOfLine startNotice = "\nPT " *> takeTill isEndOfLine
field' :: Parser (ByteString, [ByteString]) field' :: Parser (ByteString, [ByteString])
field' = do field' = do
f <- "\n" *> take 2 <* " " f <- "\n" *> take 2 <* " "
...@@ -113,8 +112,8 @@ parseFile p x = case runParser p x of ...@@ -113,8 +112,8 @@ parseFile p x = case runParser p x of
Left _ -> pure 0 Left _ -> pure 0
Right r -> pure $ length r Right r -> pure $ length r
testWos :: FilePath -> IO [Int] parseWos :: FilePath -> IO [Int]
testWos fp = join $ mapConcurrently (parseFile WOS) <$> zipFiles fp parseWos fp = join $ mapConcurrently (parseFile WOS) <$> zipFiles fp
...@@ -36,6 +36,8 @@ import qualified Data.List as L hiding (head, sum) ...@@ -36,6 +36,8 @@ import qualified Data.List as L hiding (head, sum)
import qualified Control.Monad as M import qualified Control.Monad as M
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Vector as V import qualified Data.Vector as V
import Safe (headMay)
pf :: (a -> Bool) -> [a] -> [a] pf :: (a -> Bool) -> [a] -> [a]
pf = filter pf = filter
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment