Commit 5c1b33ff authored by Alexandre Delanoë's avatar Alexandre Delanoë

[PARSER] Date + WOS parser.

parent e887a14c
module Main where
import Hastext.Db
import Gargantext.Parser.Wos (parseWos)
main :: IO ()
main = fonction
main = parseWos "/tmp/DeepNeuralNetworkFull.zip"
-- This file has been generated from package.yaml by hpack version 0.18.1.
-- This file has been generated from package.yaml by hpack version 0.20.0.
--
-- see: https://github.com/sol/hpack
--
-- hash: d9ae37baf58628321e1cf53c125f895c9fd3ff19c03fdfaa1ca9b7754fecabf9
name: gargantext
version: 0.1.0.0
......@@ -21,28 +23,29 @@ library
hs-source-dirs:
src
build-depends:
extra
, text
, base >=4.7 && <5
, aeson
, attoparsec
aeson
, aeson-lens
, async
, attoparsec
, base >=4.7 && <5
, base16-bytestring
, bytestring
, case-insensitive
, containers
, contravariant
, conduit
, conduit-extra
, containers
, contravariant
, directory
, duckling
, extra
, filepath
, http-conduit
, lens
, logging-effect
, opaleye
, path
, parsec
, path
, path-io
, postgresql-simple
, pretty
, product-profunctors
......@@ -50,6 +53,7 @@ library
, protolude
, pureMD5
, regex-compat
, safe
, semigroups
, servant
, servant-client
......@@ -57,10 +61,11 @@ library
, servant-server
, split
, tagsoup
, text
, text-metrics
, time
, timezone-series
, time-locale-compat
, timezone-series
, transformers
, unordered-containers
, uuid
......@@ -68,9 +73,8 @@ library
, wai
, warp
, yaml
, zlib
, zip
, path-io
, zlib
exposed-modules:
Data.Gargantext
Data.Gargantext.Analysis
......@@ -118,13 +122,15 @@ test-suite garg-doctest
src-doctest
ghc-options: -Wall -Werror -threaded -rtsopts -with-rtsopts=-N
build-depends:
extra
, text
, doctest
, Glob
Glob
, QuickCheck
, base
, doctest
, extra
, gargantext
, text
other-modules:
Paths_gargantext
default-language: Haskell2010
test-suite garg-test
......@@ -134,12 +140,12 @@ test-suite garg-test
src-test
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
build-depends:
extra
, text
QuickCheck
, base
, extra
, gargantext
, hspec
, QuickCheck
, text
other-modules:
Ngrams.Lang
Ngrams.Lang.En
......@@ -147,4 +153,5 @@ test-suite garg-test
Ngrams.Lang.Occurrences
Ngrams.Metrics
Parsers.WOS
Paths_gargantext
default-language: Haskell2010
......@@ -57,6 +57,7 @@ library:
dependencies:
- base >=4.7 && <5
- aeson
- aeson-lens
- attoparsec
- async
- base16-bytestring
......@@ -82,6 +83,7 @@ library:
- protolude
- pureMD5
- regex-compat
- safe
- semigroups
- servant
- servant-client
......
module Data.Gargantext.Parsers.Date where
{-|
Module : Data.Gargantext.Parsers.Date
Description : Some utils to parse dates
Copyright : (c) CNRS 2017
License : AGPL + CECILL v3
Maintainer : alexandre.delanoe@iscpif.fr
Stability : experimental
Portability : POSIX
According to the language of the text, parseDate1 returns date as Text:
TODO : Add some tests
import Data.Gargantext.Parsers as DGP
DGP.parseDate1 DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
-}
module Data.Gargantext.Parsers.Date (parseDate1, Lang(FR, EN)) where
import Data.Gargantext.Prelude
import qualified Data.Gargantext.Types.Main as G
import Data.Time.Clock (UTCTime, getCurrentTime)
import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
......@@ -6,28 +26,62 @@ import Data.Time.LocalTime (utc)
import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale)
, DucklingTime(DucklingTime)
)
--import Duckling.Core (makeLocale, Lang(FR,EN), Some(This), Dimension(Time))
import Duckling.Core (makeLocale, Lang(), Some(This), Dimension(Time))
import Duckling.Core (makeLocale, Lang(FR,EN), Some(This), Dimension(Time))
import Duckling.Types (jsonValue)
--import qualified Duckling.Core as DC
import Duckling.Api (analyze)
import qualified Data.HashSet as HashSet
import qualified Data.Aeson as Json
import Data.HashMap.Strict as HM
import Data.Text (Text)
-- import Duckling.Engine (parseAndResolve)
-- import Duckling.Rules (rulesFor)
-- import Duckling.Debug as DB
import Safe (headMay)
import Duckling.Types (ResolvedToken)
-- TODO add Paris at Duckling.Locale Region datatype
-- | To get Homogeinity of the languages
-- TODO : put this in a more generic place in the source code
parserLang :: G.Language -> Lang
parserLang G.FR = FR
parserLang G.EN = EN
-- | Final Date parser API
parseDate1 :: Lang -> Text -> IO Text
parseDate1 lang text = do
maybeJson <- pm jsonValue <$> parseDateWithDuckling lang text
case headMay maybeJson of
Just (Json.Object object) -> case HM.lookup "value" object of
Just (Json.String date) -> pure date
Just _ -> error "ERROR: should be a json String"
Nothing -> error "No date found"
Just _ -> error "ERROR: should be a json Object"
Nothing -> pure "No date found"
-- | Current Time in DucklingTime format
-- TODO : get local Time in a more generic way
utcToDucklingTime :: UTCTime -> DucklingTime
utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
-- | Local Context which depends on Lang and Time
localContext :: Lang -> DucklingTime -> Context
localContext lang dt = Context {referenceTime = dt, locale = makeLocale lang Nothing}
parseDate :: Lang -> Text -> IO [ResolvedToken]
parseDate lang input = do
-- | Date parser with Duckling
parseDateWithDuckling :: Lang -> Text -> IO [ResolvedToken]
parseDateWithDuckling lang input = do
ctx <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
pure $ analyze input ctx $ HashSet.fromList [(This Time)]
......@@ -50,7 +50,6 @@ endNotice = manyTill anyChar (string $ pack "\nER\n")
startNotice :: Parser ByteString
startNotice = "\nPT " *> takeTill isEndOfLine
field' :: Parser (ByteString, [ByteString])
field' = do
f <- "\n" *> take 2 <* " "
......@@ -113,8 +112,8 @@ parseFile p x = case runParser p x of
Left _ -> pure 0
Right r -> pure $ length r
testWos :: FilePath -> IO [Int]
testWos fp = join $ mapConcurrently (parseFile WOS) <$> zipFiles fp
parseWos :: FilePath -> IO [Int]
parseWos fp = join $ mapConcurrently (parseFile WOS) <$> zipFiles fp
......@@ -36,6 +36,8 @@ import qualified Data.List as L hiding (head, sum)
import qualified Control.Monad as M
import qualified Data.Map as Map
import qualified Data.Vector as V
import Safe (headMay)
pf :: (a -> Bool) -> [a] -> [a]
pf = filter
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment