Commit 7a3a7814 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Drop Duckling dependency

parent 23225a78
......@@ -25,11 +25,6 @@ source-repository-package
location: https://github.com/boolexpr/boolexpr.git
tag: bcd7cb20a1b1bc3b58c4ba1b6ae1bccfe62f67ae
source-repository-package
type: git
location: https://github.com/adinapoli/duckling.git
tag: 23603a832117e5352d5b0fb9bb1110228324b35a
source-repository-package
type: git
location: https://gitlab.iscpif.fr/gargantext/opaleye-textsearch.git
......
......@@ -183,7 +183,6 @@ constraints: any.Cabal ==3.8.1.0,
any.doctemplates ==0.11,
any.double-conversion ==2.0.4.2,
double-conversion -developer +embedded_double_conversion,
any.duckling ==0.2.0.0,
any.easy-file ==0.2.5,
any.eigen ==3.3.7.0,
any.either ==5.0.2,
......
......@@ -29,7 +29,7 @@ USER 1000
RUN stack install aeson aeson-lens aeson-pretty array \
blaze-html blaze-markup bytestring \
conduit conduit-extra containers \
deepseq directory duckling \
deepseq directory \
ekg-core ekg-json exceptions \
fgl filepath formatting \
hashable hsparql http-api-data http-client http-client-tls http-conduit \
......
......@@ -496,7 +496,6 @@ library
, deepseq ^>= 1.4.4.0
, directory ^>= 1.3.6.0
, discrimination >= 0.5
, duckling ^>= 0.2.0.0
, ekg-core ^>= 0.1.1.7
, ekg-json ^>= 0.1.0.7
, epo-api-client
......@@ -907,7 +906,6 @@ test-suite garg-test-tasty
, crawlerArxiv
, cryptohash
, directory
, duckling ^>= 0.2.0.0
, extra ^>= 1.7.9
, fast-logger ^>= 3.0.5
, fmt
......@@ -1003,7 +1001,6 @@ test-suite garg-test-hspec
, conduit ^>= 1.3.4.2
, containers ^>= 0.6.5.1
, crawlerArxiv
, duckling ^>= 0.2.0.0
, extra ^>= 1.7.9
, fast-logger ^>= 3.0.5
, fmt
......
......@@ -18,30 +18,21 @@ DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Core.Text.Corpus.Parsers.Date
{-(parse, parseRaw, dateSplit, Year, Month, Day)-}
where
module Gargantext.Core.Text.Corpus.Parsers.Date (
dateSplit
, mDateSplit
, defaultDay
, defaultUTCTime
, split'
) where
import Data.Aeson (Value)
import Data.Aeson qualified as Json
import Data.Aeson.KeyMap as KM hiding (map)
import Data.HashSet qualified as HashSet
import Data.List qualified as List
import Data.Text (unpack, splitOn, replace)
import Data.Time (defaultTimeLocale, iso8601DateFormat, parseTimeM, toGregorian)
import Data.Time.Calendar qualified as DTC
import Data.Time.Clock (UTCTime(..), secondsToDiffTime) -- , getCurrentTime)
import Data.Time.LocalTime (utc)
import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
import Duckling.Api (analyze)
import Duckling.Core (makeLocale, Dimension(Time))
import Duckling.Core qualified as DC
import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime), Options(..))
import Duckling.Types (ResolvedToken(..), ResolvedVal(..), Seal(..))
import Gargantext.Core (Lang(FR,EN))
-- import Gargantext.Core.Types (DebugMode(..), withDebugMode)
import Data.Time.Clock (UTCTime(..), secondsToDiffTime)
import Gargantext.Prelude hiding (replace)
import System.Environment (getEnv)
------------------------------------------------------------------------
-- | Parse date to Ints
-- TODO add hours, minutes and seconds
......@@ -89,13 +80,6 @@ parse s = do
-- $ getCurrentTime)
_ -> Left "[G.C.T.C.Parsers.Date] parse: Should not happen"
defaultDate :: Text
defaultDate = "0-0-0T0:0:0"
type DateFormat = Text
type DateDefault = Text
data DateFlow = DucklingSuccess { ds_result :: Text }
| DucklingFailure { df_result :: Text }
| ReadFailure1 { rf1_result :: Text }
......@@ -131,83 +115,9 @@ readDate txt = do
parseTimeM True defaultTimeLocale (unpack format) (cs txt)
-- TODO add Paris at Duckling.Locale Region datatype
-- | To get Homogeinity of the languages
-- TODO : put this in a more generic place in the source code
parserLang :: Lang -> DC.Lang
parserLang FR = DC.FR
parserLang EN = DC.EN
parserLang lang = panic $ "[G.C.T.C.P.Date] Lang not implemented" <> (show lang)
-- | Final Date parser API
-- IO can be avoided here:
-- currentContext :: Lang -> IO Context
-- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
-- parseRaw :: Context -> Text -> SomeErrorHandling Text
parseRawSafe :: Lang -> Text -> IO DateFlow
parseRawSafe lang text = do
let triedParseRaw = parseRaw lang text
dateStr' <- case triedParseRaw of
--Left (CE.SomeException err) -> do
Left _err -> do
_envLang <- getEnv "LANG"
-- printDebug "[G.C.T.C.P.Date] Exception: " (err, envLang, lang, text)
pure $ DucklingFailure text
Right res -> pure $ DucklingSuccess res
pure dateStr'
--tryParseRaw :: CE.Exception e => Lang -> Text -> IO (Either e Text)
--tryParseRaw lang text = CE.try (parseRaw lang text)
parseRaw :: Lang -> Text -> Either Text Text
parseRaw lang text = do -- case result
let maybeResult = extractValue $ getTimeValue
$ parseDateWithDuckling lang text (Options True)
case maybeResult of
Just result -> Right result
Nothing -> do
-- printDebug ("[G.C.T.C.P.D.parseRaw] ERROR " <> (cs . show) lang) text
Left $ "[G.C.T.C.P.D.parseRaw ERROR] " <> show lang <> " :: " <> text
getTimeValue :: [ResolvedToken] -> Maybe Value
getTimeValue rt = case head rt of
Nothing -> do
Nothing
Just x -> case rval x of
RVal Time t -> Just $ toJSON t
_ -> do
Nothing
extractValue :: Maybe Value -> Maybe Text
extractValue (Just (Json.Object object)) =
case KM.lookup "value" object of
Just (Json.String date) -> Just date
_ -> Nothing
extractValue _ = Nothing
-- | Current Time in DucklingTime format
-- TODO : get local Time in a more generic way
utcToDucklingTime :: UTCTime -> DucklingTime
utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
-- | Local Context which depends on Lang and Time
localContext :: Lang -> DucklingTime -> Context
localContext lang dt = Context { referenceTime = dt
, locale = makeLocale (parserLang lang) Nothing }
defaultDay :: DTC.Day
defaultDay = DTC.fromGregorian 1 1 1
defaultUTCTime :: UTCTime
defaultUTCTime = UTCTime { utctDay = defaultDay
, utctDayTime = secondsToDiffTime 0 }
-- | Date parser with Duckling
parseDateWithDuckling :: Lang -> Text -> Options -> [ResolvedToken]
parseDateWithDuckling lang input options = do
let contxt = localContext lang $ utcToDucklingTime defaultUTCTime
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
-- TODO check/test Options False or True
analyze input contxt options $ HashSet.fromList [(Seal Time)]
......@@ -63,10 +63,6 @@
git: "https://github.com/MercuryTechnologies/ekg-json.git"
subdirs:
- .
- commit: 23603a832117e5352d5b0fb9bb1110228324b35a
git: "https://github.com/adinapoli/duckling.git"
subdirs:
- .
- commit: 7533a9ccd3bfe77141745f6b61039a26aaf5c83b
git: "https://github.com/adinapoli/llvm-hs.git"
subdirs:
......
......@@ -16,39 +16,16 @@ module Test.Parsers.Date where
import Test.Hspec
import Test.QuickCheck
import Data.Time (ZonedTime(..))
import Data.Time.Clock (UTCTime(..), secondsToDiffTime)
import Data.Time.Calendar.OrdinalDate (fromOrdinalDate)
import Data.Text (pack)
import Text.Parsec.Error (ParseError)
import Duckling.Time.Types (toRFC3339)
-----------------------------------------------------------
import Gargantext.Prelude
import Gargantext.Core.Text.Corpus.Parsers.Date (dateSplit)
import Gargantext.Core.Text.Corpus.Parsers.Date.Parsec (fromRFC3339)
import Test.Parsers.Types
-----------------------------------------------------------
fromRFC3339Inv :: Either ParseError ZonedTime -> Text
fromRFC3339Inv (Right z) = toRFC3339 z
fromRFC3339Inv (Left pe) = panic . pack $ show pe
testFromRFC3339 :: Spec
testFromRFC3339 = do
describe "Test fromRFC3339: " $ do
it "is the inverse of Duckling's toRFC3339" $ property $
((==) <*> (fromRFC3339 . fromRFC3339Inv)) . Right . looseZonedTimePrecision
-- \x -> uncurry (==) $ (,) <*> (fromRFC3339 . fromRFC3339Inv) $ Right $ looseZonedTimePrecision x
-- \x -> let e = Right x :: Either ParseError ZonedTime
-- in fmap looseZonedTimePrecision e == (fromRFC3339 . fromRFC3339Inv ) (fmap looseZonedTimePrecision e)
testDateSplit :: Spec
testDateSplit = do
describe "Test date split" $ do
......
......@@ -33,7 +33,6 @@ main :: IO ()
main = do
utilSpec <- testSpec "Utils" Utils.test
clusteringSpec <- testSpec "Graph Clustering" Graph.test
dateParserSpec <- testSpec "Date Parsing" PD.testFromRFC3339
dateSplitSpec <- testSpec "Date split" PD.testDateSplit
cryptoSpec <- testSpec "Crypto" Crypto.test
nlpSpec <- testSpec "NLP" NLP.test
......@@ -43,7 +42,6 @@ main = do
defaultMain $ testGroup "Gargantext"
[ utilSpec
, clusteringSpec
, dateParserSpec
, dateSplitSpec
, cryptoSpec
, nlpSpec
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment