Commit 7a3a7814 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Drop Duckling dependency

parent 23225a78
...@@ -25,11 +25,6 @@ source-repository-package ...@@ -25,11 +25,6 @@ source-repository-package
location: https://github.com/boolexpr/boolexpr.git location: https://github.com/boolexpr/boolexpr.git
tag: bcd7cb20a1b1bc3b58c4ba1b6ae1bccfe62f67ae tag: bcd7cb20a1b1bc3b58c4ba1b6ae1bccfe62f67ae
source-repository-package
type: git
location: https://github.com/adinapoli/duckling.git
tag: 23603a832117e5352d5b0fb9bb1110228324b35a
source-repository-package source-repository-package
type: git type: git
location: https://gitlab.iscpif.fr/gargantext/opaleye-textsearch.git location: https://gitlab.iscpif.fr/gargantext/opaleye-textsearch.git
......
...@@ -183,7 +183,6 @@ constraints: any.Cabal ==3.8.1.0, ...@@ -183,7 +183,6 @@ constraints: any.Cabal ==3.8.1.0,
any.doctemplates ==0.11, any.doctemplates ==0.11,
any.double-conversion ==2.0.4.2, any.double-conversion ==2.0.4.2,
double-conversion -developer +embedded_double_conversion, double-conversion -developer +embedded_double_conversion,
any.duckling ==0.2.0.0,
any.easy-file ==0.2.5, any.easy-file ==0.2.5,
any.eigen ==3.3.7.0, any.eigen ==3.3.7.0,
any.either ==5.0.2, any.either ==5.0.2,
......
...@@ -29,7 +29,7 @@ USER 1000 ...@@ -29,7 +29,7 @@ USER 1000
RUN stack install aeson aeson-lens aeson-pretty array \ RUN stack install aeson aeson-lens aeson-pretty array \
blaze-html blaze-markup bytestring \ blaze-html blaze-markup bytestring \
conduit conduit-extra containers \ conduit conduit-extra containers \
deepseq directory duckling \ deepseq directory \
ekg-core ekg-json exceptions \ ekg-core ekg-json exceptions \
fgl filepath formatting \ fgl filepath formatting \
hashable hsparql http-api-data http-client http-client-tls http-conduit \ hashable hsparql http-api-data http-client http-client-tls http-conduit \
......
...@@ -496,7 +496,6 @@ library ...@@ -496,7 +496,6 @@ library
, deepseq ^>= 1.4.4.0 , deepseq ^>= 1.4.4.0
, directory ^>= 1.3.6.0 , directory ^>= 1.3.6.0
, discrimination >= 0.5 , discrimination >= 0.5
, duckling ^>= 0.2.0.0
, ekg-core ^>= 0.1.1.7 , ekg-core ^>= 0.1.1.7
, ekg-json ^>= 0.1.0.7 , ekg-json ^>= 0.1.0.7
, epo-api-client , epo-api-client
...@@ -907,7 +906,6 @@ test-suite garg-test-tasty ...@@ -907,7 +906,6 @@ test-suite garg-test-tasty
, crawlerArxiv , crawlerArxiv
, cryptohash , cryptohash
, directory , directory
, duckling ^>= 0.2.0.0
, extra ^>= 1.7.9 , extra ^>= 1.7.9
, fast-logger ^>= 3.0.5 , fast-logger ^>= 3.0.5
, fmt , fmt
...@@ -1003,7 +1001,6 @@ test-suite garg-test-hspec ...@@ -1003,7 +1001,6 @@ test-suite garg-test-hspec
, conduit ^>= 1.3.4.2 , conduit ^>= 1.3.4.2
, containers ^>= 0.6.5.1 , containers ^>= 0.6.5.1
, crawlerArxiv , crawlerArxiv
, duckling ^>= 0.2.0.0
, extra ^>= 1.7.9 , extra ^>= 1.7.9
, fast-logger ^>= 3.0.5 , fast-logger ^>= 3.0.5
, fmt , fmt
......
...@@ -18,30 +18,21 @@ DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00" ...@@ -18,30 +18,21 @@ DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
module Gargantext.Core.Text.Corpus.Parsers.Date module Gargantext.Core.Text.Corpus.Parsers.Date (
{-(parse, parseRaw, dateSplit, Year, Month, Day)-} dateSplit
where , mDateSplit
, defaultDay
, defaultUTCTime
, split'
) where
import Data.Aeson (Value)
import Data.Aeson qualified as Json
import Data.Aeson.KeyMap as KM hiding (map)
import Data.HashSet qualified as HashSet
import Data.List qualified as List import Data.List qualified as List
import Data.Text (unpack, splitOn, replace) import Data.Text (unpack, splitOn, replace)
import Data.Time (defaultTimeLocale, iso8601DateFormat, parseTimeM, toGregorian) import Data.Time (defaultTimeLocale, iso8601DateFormat, parseTimeM, toGregorian)
import Data.Time.Calendar qualified as DTC import Data.Time.Calendar qualified as DTC
import Data.Time.Clock (UTCTime(..), secondsToDiffTime) -- , getCurrentTime) import Data.Time.Clock (UTCTime(..), secondsToDiffTime)
import Data.Time.LocalTime (utc)
import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
import Duckling.Api (analyze)
import Duckling.Core (makeLocale, Dimension(Time))
import Duckling.Core qualified as DC
import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime), Options(..))
import Duckling.Types (ResolvedToken(..), ResolvedVal(..), Seal(..))
import Gargantext.Core (Lang(FR,EN))
-- import Gargantext.Core.Types (DebugMode(..), withDebugMode)
import Gargantext.Prelude hiding (replace) import Gargantext.Prelude hiding (replace)
import System.Environment (getEnv)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Parse date to Ints -- | Parse date to Ints
-- TODO add hours, minutes and seconds -- TODO add hours, minutes and seconds
...@@ -89,13 +80,6 @@ parse s = do ...@@ -89,13 +80,6 @@ parse s = do
-- $ getCurrentTime) -- $ getCurrentTime)
_ -> Left "[G.C.T.C.Parsers.Date] parse: Should not happen" _ -> Left "[G.C.T.C.Parsers.Date] parse: Should not happen"
defaultDate :: Text
defaultDate = "0-0-0T0:0:0"
type DateFormat = Text
type DateDefault = Text
data DateFlow = DucklingSuccess { ds_result :: Text } data DateFlow = DucklingSuccess { ds_result :: Text }
| DucklingFailure { df_result :: Text } | DucklingFailure { df_result :: Text }
| ReadFailure1 { rf1_result :: Text } | ReadFailure1 { rf1_result :: Text }
...@@ -131,83 +115,9 @@ readDate txt = do ...@@ -131,83 +115,9 @@ readDate txt = do
parseTimeM True defaultTimeLocale (unpack format) (cs txt) parseTimeM True defaultTimeLocale (unpack format) (cs txt)
-- TODO add Paris at Duckling.Locale Region datatype
-- | To get Homogeinity of the languages
-- TODO : put this in a more generic place in the source code
parserLang :: Lang -> DC.Lang
parserLang FR = DC.FR
parserLang EN = DC.EN
parserLang lang = panic $ "[G.C.T.C.P.Date] Lang not implemented" <> (show lang)
-- | Final Date parser API
-- IO can be avoided here:
-- currentContext :: Lang -> IO Context
-- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
-- parseRaw :: Context -> Text -> SomeErrorHandling Text
parseRawSafe :: Lang -> Text -> IO DateFlow
parseRawSafe lang text = do
let triedParseRaw = parseRaw lang text
dateStr' <- case triedParseRaw of
--Left (CE.SomeException err) -> do
Left _err -> do
_envLang <- getEnv "LANG"
-- printDebug "[G.C.T.C.P.Date] Exception: " (err, envLang, lang, text)
pure $ DucklingFailure text
Right res -> pure $ DucklingSuccess res
pure dateStr'
--tryParseRaw :: CE.Exception e => Lang -> Text -> IO (Either e Text)
--tryParseRaw lang text = CE.try (parseRaw lang text)
parseRaw :: Lang -> Text -> Either Text Text
parseRaw lang text = do -- case result
let maybeResult = extractValue $ getTimeValue
$ parseDateWithDuckling lang text (Options True)
case maybeResult of
Just result -> Right result
Nothing -> do
-- printDebug ("[G.C.T.C.P.D.parseRaw] ERROR " <> (cs . show) lang) text
Left $ "[G.C.T.C.P.D.parseRaw ERROR] " <> show lang <> " :: " <> text
getTimeValue :: [ResolvedToken] -> Maybe Value
getTimeValue rt = case head rt of
Nothing -> do
Nothing
Just x -> case rval x of
RVal Time t -> Just $ toJSON t
_ -> do
Nothing
extractValue :: Maybe Value -> Maybe Text
extractValue (Just (Json.Object object)) =
case KM.lookup "value" object of
Just (Json.String date) -> Just date
_ -> Nothing
extractValue _ = Nothing
-- | Current Time in DucklingTime format
-- TODO : get local Time in a more generic way
utcToDucklingTime :: UTCTime -> DucklingTime
utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
-- | Local Context which depends on Lang and Time
localContext :: Lang -> DucklingTime -> Context
localContext lang dt = Context { referenceTime = dt
, locale = makeLocale (parserLang lang) Nothing }
defaultDay :: DTC.Day defaultDay :: DTC.Day
defaultDay = DTC.fromGregorian 1 1 1 defaultDay = DTC.fromGregorian 1 1 1
defaultUTCTime :: UTCTime defaultUTCTime :: UTCTime
defaultUTCTime = UTCTime { utctDay = defaultDay defaultUTCTime = UTCTime { utctDay = defaultDay
, utctDayTime = secondsToDiffTime 0 } , utctDayTime = secondsToDiffTime 0 }
-- | Date parser with Duckling
parseDateWithDuckling :: Lang -> Text -> Options -> [ResolvedToken]
parseDateWithDuckling lang input options = do
let contxt = localContext lang $ utcToDucklingTime defaultUTCTime
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
-- TODO check/test Options False or True
analyze input contxt options $ HashSet.fromList [(Seal Time)]
...@@ -63,10 +63,6 @@ ...@@ -63,10 +63,6 @@
git: "https://github.com/MercuryTechnologies/ekg-json.git" git: "https://github.com/MercuryTechnologies/ekg-json.git"
subdirs: subdirs:
- . - .
- commit: 23603a832117e5352d5b0fb9bb1110228324b35a
git: "https://github.com/adinapoli/duckling.git"
subdirs:
- .
- commit: 7533a9ccd3bfe77141745f6b61039a26aaf5c83b - commit: 7533a9ccd3bfe77141745f6b61039a26aaf5c83b
git: "https://github.com/adinapoli/llvm-hs.git" git: "https://github.com/adinapoli/llvm-hs.git"
subdirs: subdirs:
......
...@@ -16,39 +16,16 @@ module Test.Parsers.Date where ...@@ -16,39 +16,16 @@ module Test.Parsers.Date where
import Test.Hspec import Test.Hspec
import Test.QuickCheck
import Data.Time (ZonedTime(..))
import Data.Time.Clock (UTCTime(..), secondsToDiffTime) import Data.Time.Clock (UTCTime(..), secondsToDiffTime)
import Data.Time.Calendar.OrdinalDate (fromOrdinalDate) import Data.Time.Calendar.OrdinalDate (fromOrdinalDate)
import Data.Text (pack)
import Text.Parsec.Error (ParseError)
import Duckling.Time.Types (toRFC3339)
----------------------------------------------------------- -----------------------------------------------------------
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core.Text.Corpus.Parsers.Date (dateSplit) import Gargantext.Core.Text.Corpus.Parsers.Date (dateSplit)
import Gargantext.Core.Text.Corpus.Parsers.Date.Parsec (fromRFC3339)
import Test.Parsers.Types
----------------------------------------------------------- -----------------------------------------------------------
fromRFC3339Inv :: Either ParseError ZonedTime -> Text
fromRFC3339Inv (Right z) = toRFC3339 z
fromRFC3339Inv (Left pe) = panic . pack $ show pe
testFromRFC3339 :: Spec
testFromRFC3339 = do
describe "Test fromRFC3339: " $ do
it "is the inverse of Duckling's toRFC3339" $ property $
((==) <*> (fromRFC3339 . fromRFC3339Inv)) . Right . looseZonedTimePrecision
-- \x -> uncurry (==) $ (,) <*> (fromRFC3339 . fromRFC3339Inv) $ Right $ looseZonedTimePrecision x
-- \x -> let e = Right x :: Either ParseError ZonedTime
-- in fmap looseZonedTimePrecision e == (fromRFC3339 . fromRFC3339Inv ) (fmap looseZonedTimePrecision e)
testDateSplit :: Spec testDateSplit :: Spec
testDateSplit = do testDateSplit = do
describe "Test date split" $ do describe "Test date split" $ do
......
...@@ -33,7 +33,6 @@ main :: IO () ...@@ -33,7 +33,6 @@ main :: IO ()
main = do main = do
utilSpec <- testSpec "Utils" Utils.test utilSpec <- testSpec "Utils" Utils.test
clusteringSpec <- testSpec "Graph Clustering" Graph.test clusteringSpec <- testSpec "Graph Clustering" Graph.test
dateParserSpec <- testSpec "Date Parsing" PD.testFromRFC3339
dateSplitSpec <- testSpec "Date split" PD.testDateSplit dateSplitSpec <- testSpec "Date split" PD.testDateSplit
cryptoSpec <- testSpec "Crypto" Crypto.test cryptoSpec <- testSpec "Crypto" Crypto.test
nlpSpec <- testSpec "NLP" NLP.test nlpSpec <- testSpec "NLP" NLP.test
...@@ -43,7 +42,6 @@ main = do ...@@ -43,7 +42,6 @@ main = do
defaultMain $ testGroup "Gargantext" defaultMain $ testGroup "Gargantext"
[ utilSpec [ utilSpec
, clusteringSpec , clusteringSpec
, dateParserSpec
, dateSplitSpec , dateSplitSpec
, cryptoSpec , cryptoSpec
, nlpSpec , nlpSpec
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment