Commit 90d6620e authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] Regex error with Duckling

parent f1538012
#!/bin/bash #!/bin/bash
#stack install --nix --profile --test --fast --no-install-ghc --skip-ghc-check #stack install --nix --profile --test --fast --no-install-ghc --skip-ghc-check
stack install --nix --test --no-install-ghc --skip-ghc-check
env LANG=C.UTF-8 stack install --nix --test --no-install-ghc --skip-ghc-check
...@@ -6,4 +6,4 @@ LOGFILE=$FOLDER"/"$FILE ...@@ -6,4 +6,4 @@ LOGFILE=$FOLDER"/"$FILE
mkdir -p $FOLDER mkdir -p $FOLDER
~/.local/bin/gargantext-server --ini gargantext.ini --run Dev +RTS > $LOGFILE 2>&1 & tail -F $LOGFILE # -p env LANG=en_US.UTF-8 ~/.local/bin/gargantext-server --ini gargantext.ini --run Dev +RTS > $LOGFILE 2>&1 & tail -F $LOGFILE # -p
...@@ -18,7 +18,6 @@ New corpus means either: ...@@ -18,7 +18,6 @@ New corpus means either:
module Gargantext.API.Node.Corpus.New module Gargantext.API.Node.Corpus.New
where where
import Conduit import Conduit
import Control.Lens hiding (elements, Empty) import Control.Lens hiding (elements, Empty)
import Data.Aeson import Data.Aeson
...@@ -38,8 +37,6 @@ import qualified Data.Text.Encoding as TE ...@@ -38,8 +37,6 @@ import qualified Data.Text.Encoding as TE
-- import Test.QuickCheck (elements) -- import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary import Test.QuickCheck.Arbitrary
import Gargantext.Prelude
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs, ScraperEvent(..), scst_events) import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs, ScraperEvent(..), scst_events)
import Gargantext.API.Admin.Types (HasSettings) import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Job (addEvent, jobLogSuccess, jobLogFailTotal) import Gargantext.API.Job (addEvent, jobLogSuccess, jobLogFailTotal)
...@@ -49,8 +46,6 @@ import Gargantext.API.Node.Corpus.Types ...@@ -49,8 +46,6 @@ import Gargantext.API.Node.Corpus.Types
import Gargantext.API.Node.Types import Gargantext.API.Node.Types
import Gargantext.Core (Lang(..){-, allLangs-}) import Gargantext.Core (Lang(..){-, allLangs-})
import Gargantext.Core.Text.List.Social (FlowSocialListWith(..)) import Gargantext.Core.Text.List.Social (FlowSocialListWith(..))
import qualified Gargantext.Core.Text.Corpus.API as API
import qualified Gargantext.Core.Text.Corpus.Parsers as Parser (FileType(..), parseFormatC)
import Gargantext.Core.Types.Individu (User(..)) import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Action.Flow (flowCorpus, getDataText, flowDataText, TermType(..){-, allDataOrigins-}) import Gargantext.Database.Action.Flow (flowCorpus, getDataText, flowDataText, TermType(..){-, allDataOrigins-})
...@@ -64,8 +59,11 @@ import Gargantext.Database.Prelude (hasConfig) ...@@ -64,8 +59,11 @@ import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Database.Query.Table.Node (getNodeWith) import Gargantext.Database.Query.Table.Node (getNodeWith)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata) import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Node (node_hyperdata) import Gargantext.Database.Schema.Node (node_hyperdata)
import qualified Gargantext.Database.GargDB as GargDB import Gargantext.Prelude
import Gargantext.Prelude.Config (gc_max_docs_parsers) import Gargantext.Prelude.Config (gc_max_docs_parsers)
import qualified Gargantext.Core.Text.Corpus.API as API
import qualified Gargantext.Core.Text.Corpus.Parsers as Parser (FileType(..), parseFormatC)
import qualified Gargantext.Database.GargDB as GargDB
------------------------------------------------------------------------ ------------------------------------------------------------------------
{- {-
data Query = Query { query_query :: Text data Query = Query { query_query :: Text
...@@ -177,6 +175,8 @@ type AddWithFile = Summary "Add with MultipartData to corpus endpoint" ...@@ -177,6 +175,8 @@ type AddWithFile = Summary "Add with MultipartData to corpus endpoint"
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- TODO WithQuery also has a corpus id -- TODO WithQuery also has a corpus id
addToCorpusWithQuery :: FlowCmdM env err m addToCorpusWithQuery :: FlowCmdM env err m
=> User => User
-> CorpusId -> CorpusId
...@@ -216,8 +216,11 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q ...@@ -216,8 +216,11 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
-- TODO if cid is folder -> create Corpus -- TODO if cid is folder -> create Corpus
-- if cid is corpus -> add to corpus -- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private -- if cid is root -> create corpus in Private
printDebug "[G.A.N.C.New] getDataText with query" q
eTxts <- mapM (\db -> getDataText db (Multi l) q maybeLimit) [database2origin dbs] eTxts <- mapM (\db -> getDataText db (Multi l) q maybeLimit) [database2origin dbs]
let lTxts = lefts eTxts let lTxts = lefts eTxts
printDebug "[G.A.N.C.New] eTxts" lTxts
case lTxts of case lTxts of
[] -> do [] -> do
let txts = rights eTxts let txts = rights eTxts
...@@ -241,6 +244,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q ...@@ -241,6 +244,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
} }
(err:_) -> do (err:_) -> do
printDebug "Error: " err
pure $ addEvent "ERROR" (T.pack $ show err) $ pure $ addEvent "ERROR" (T.pack $ show err) $
JobLog { _scst_succeeded = Just 2 JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 1 , _scst_failed = Just 1
......
...@@ -20,10 +20,12 @@ module Gargantext.Core.Text.Corpus.Parsers.Date ...@@ -20,10 +20,12 @@ module Gargantext.Core.Text.Corpus.Parsers.Date
{-(parse, parseRaw, dateSplit, Year, Month, Day)-} {-(parse, parseRaw, dateSplit, Year, Month, Day)-}
where where
import System.Environment (getEnv)
import Data.Aeson (toJSON, Value) import Data.Aeson (toJSON, Value)
import Data.Either (Either(..))
import Data.HashMap.Strict as HM hiding (map) import Data.HashMap.Strict as HM hiding (map)
import Data.Text (Text, unpack, splitOn) import Data.Text (Text, unpack, splitOn, replace)
import Data.Time (parseTimeOrError, defaultTimeLocale, toGregorian) import Data.Time (defaultTimeLocale, toGregorian, iso8601DateFormat, parseTimeM)
import Data.Time.Clock (UTCTime(..), getCurrentTime) import Data.Time.Clock (UTCTime(..), getCurrentTime)
import Data.Time.LocalTime (utc) import Data.Time.LocalTime (utc)
import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime) import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
...@@ -33,10 +35,12 @@ import Duckling.Types (Seal(..)) ...@@ -33,10 +35,12 @@ import Duckling.Types (Seal(..))
import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime), Options(..)) import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime), Options(..))
import Duckling.Types (ResolvedToken(..), ResolvedVal(..)) import Duckling.Types (ResolvedToken(..), ResolvedVal(..))
import Gargantext.Core (Lang(FR,EN)) import Gargantext.Core (Lang(FR,EN))
import Gargantext.Core.Types (DebugMode(..), withDebugMode)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Aeson as Json import qualified Control.Exception as CE
import qualified Data.HashSet as HashSet import qualified Data.Aeson as Json
import qualified Duckling.Core as DC import qualified Data.HashSet as HashSet
import qualified Duckling.Core as DC
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Parse date to Ints -- | Parse date to Ints
...@@ -61,31 +65,55 @@ type Day = Int ...@@ -61,31 +65,55 @@ type Day = Int
-- | Date Parser -- | Date Parser
-- Parses dates mentions in full text given the language. -- Parses dates mentions in full text given the language.
-- >>> parseDate FR (pack "10 avril 1979 à 19H") -- >>> parse FR (pack "10 avril 1900 à 19H")
-- 1979-04-10 19:00:00 UTC -- 1900-04-10 19:00:00 UTC
-- >>> parseDate EN (pack "April 10 1979") -- >>> parse EN (pack "April 10 1900")
-- 1979-04-10 00:00:00 UTC -- 1900-04-10 00:00:00 UTC
parse :: Lang -> Text -> IO UTCTime parse :: Lang -> Text -> IO UTCTime
parse lang s = parseDate' "%Y-%m-%dT%T" "0-0-0T0:0:0" lang s parse lang s = do
dateStr' <- parseRawSafe lang s
case dateFlow dateStr' of
DateFlowSuccess ok -> pure ok
_ -> withDebugMode (DebugMode True)
"[G.C.T.P.T.Date parse]" (lang,s)
$ getCurrentTime
defaultDate :: Text
defaultDate = "0-0-0T0:0:0"
type DateFormat = Text type DateFormat = Text
type DateDefault = Text type DateDefault = Text
parseDate' :: DateFormat
-> DateDefault data DateFlow = DucklingSuccess { ds_result :: Text }
-> Lang | DucklingFailure { df_result :: Text }
-> Text | ReadFailure1 { rf1_result :: Text }
-> IO UTCTime | ReadFailure2 { rf2_result :: Text }
parseDate' format def lang s = do | DateFlowSuccess { success :: UTCTime }
dateStr' <- parseRaw lang s | DateFlowFailure
if dateStr' == "" deriving Show
then getCurrentTime
else do --{-
let dateStr = unpack dateFlow :: DateFlow -> DateFlow
$ maybe def identity dateFlow (DucklingSuccess res) = case (head $ splitOn "." res) of
$ head Nothing -> dateFlow (ReadFailure1 res)
$ splitOn "." dateStr' Just re -> case readDate res of
pure $ parseTimeOrError True defaultTimeLocale (unpack format) dateStr Nothing -> dateFlow (ReadFailure1 re)
Just ok -> DateFlowSuccess ok
dateFlow (DucklingFailure txt) = case readDate $ replace " " "T" txt of
Nothing -> dateFlow (ReadFailure1 txt)
Just ok -> DateFlowSuccess ok
dateFlow (ReadFailure1 txt) = case readDate txt of
Nothing -> DateFlowFailure
Just ok -> DateFlowSuccess ok
dateFlow _ = DateFlowFailure
--}
readDate :: Text -> Maybe UTCTime
readDate txt = do
let format = cs $ iso8601DateFormat (Just "%H:%M:%S")
parseTimeM True defaultTimeLocale (unpack format) (cs txt)
-- TODO add Paris at Duckling.Locale Region datatype -- TODO add Paris at Duckling.Locale Region datatype
...@@ -93,8 +121,8 @@ parseDate' format def lang s = do ...@@ -93,8 +121,8 @@ parseDate' format def lang s = do
-- TODO : put this in a more generic place in the source code -- TODO : put this in a more generic place in the source code
parserLang :: Lang -> DC.Lang parserLang :: Lang -> DC.Lang
parserLang FR = DC.FR parserLang FR = DC.FR
parserLang EN = DC.EN parserLang EN = DC.EN
parserLang _ = panic "not implemented" parserLang lang = panic $ "[G.C.T.C.P.Date] Lang not implemented" <> (cs $ show lang)
-- | Final Date parser API -- | Final Date parser API
-- IO can be avoided here: -- IO can be avoided here:
...@@ -102,6 +130,21 @@ parserLang _ = panic "not implemented" ...@@ -102,6 +130,21 @@ parserLang _ = panic "not implemented"
-- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
-- parseRaw :: Context -> Text -> SomeErrorHandling Text -- parseRaw :: Context -> Text -> SomeErrorHandling Text
parseRawSafe :: Lang -> Text -> IO DateFlow
parseRawSafe lang text = do
triedParseRaw <- tryParseRaw lang text
dateStr' <- case triedParseRaw of
Left (CE.SomeException err) -> do
envLang <- getEnv "LANG"
printDebug "[G.C.T.C.P.Date] Exception: " (err, envLang, lang, text)
pure $ DucklingFailure text
Right res -> pure $ DucklingSuccess res
pure dateStr'
tryParseRaw :: CE.Exception e => Lang -> Text -> IO (Either e Text)
tryParseRaw lang text = CE.try (parseRaw lang text)
parseRaw :: Lang -> Text -> IO Text parseRaw :: Lang -> Text -> IO Text
parseRaw lang text = do -- case result parseRaw lang text = do -- case result
maybeResult <- extractValue <$> getTimeValue maybeResult <- extractValue <$> getTimeValue
...@@ -146,5 +189,3 @@ parseDateWithDuckling lang input options = do ...@@ -146,5 +189,3 @@ parseDateWithDuckling lang input options = do
-- TODO check/test Options False or True -- TODO check/test Options False or True
pure $ analyze input contxt options $ HashSet.fromList [(Seal Time)] pure $ analyze input contxt options $ HashSet.fromList [(Seal Time)]
...@@ -16,6 +16,7 @@ commentary with @some markup@. ...@@ -16,6 +16,7 @@ commentary with @some markup@.
module Gargantext.Core.Types ( module Gargantext.Core.Types.Main module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
, module Gargantext.Database.Admin.Types.Node , module Gargantext.Database.Admin.Types.Node
, DebugMode(..), withDebugMode
, Term, Terms(..) , Term, Terms(..)
, TokenTag(..), POS(..), NER(..) , TokenTag(..), POS(..), NER(..)
, Label, Stems , Label, Stems
...@@ -29,6 +30,7 @@ module Gargantext.Core.Types ( module Gargantext.Core.Types.Main ...@@ -29,6 +30,7 @@ module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
import Control.Lens (Prism', (#), makeLenses, over) import Control.Lens (Prism', (#), makeLenses, over)
import Control.Monad.Except (MonadError(throwError)) import Control.Monad.Except (MonadError(throwError))
import Debug.Trace (trace)
import Data.Aeson import Data.Aeson
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Hashable (Hashable) import Data.Hashable (Hashable)
...@@ -47,6 +49,14 @@ import Gargantext.Database.Admin.Types.Node ...@@ -47,6 +49,14 @@ import Gargantext.Database.Admin.Types.Node
import Gargantext.Prelude import Gargantext.Prelude
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
------------------------------------------------------------------------
data DebugMode = DebugMode { activated :: Bool }
withDebugMode :: (Show a) => DebugMode -> Text -> a -> b -> b
withDebugMode (DebugMode True ) msg var a = trace (cs $ "DEBUG" <> msg <> (cs $ show var)) a
withDebugMode (DebugMode False) _ _ a = a
------------------------------------------------------------------------ ------------------------------------------------------------------------
data Ordering = Down | Up data Ordering = Down | Up
deriving (Enum, Show, Eq, Bounded) deriving (Enum, Show, Eq, Bounded)
......
...@@ -26,6 +26,7 @@ Portability : POSIX ...@@ -26,6 +26,7 @@ Portability : POSIX
module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list) module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
( DataText(..) ( DataText(..)
, getDataText , getDataText
, getDataText_Debug
, flowDataText , flowDataText
, flow , flow
...@@ -67,6 +68,8 @@ import System.FilePath (FilePath) ...@@ -67,6 +68,8 @@ import System.FilePath (FilePath)
import qualified Data.HashMap.Strict as HashMap import qualified Data.HashMap.Strict as HashMap
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Conduit.List as CL
import qualified Data.Conduit as C
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..)) import Gargantext.API.Admin.Orchestrator.Types (JobLog(..))
import Gargantext.Core (Lang(..), PosTagAlgo(..)) import Gargantext.Core (Lang(..), PosTagAlgo(..))
...@@ -134,6 +137,13 @@ data DataText = DataOld ![NodeId] ...@@ -134,6 +137,13 @@ data DataText = DataOld ![NodeId]
| DataNew !(Maybe Integer, ConduitT () HyperdataDocument IO ()) | DataNew !(Maybe Integer, ConduitT () HyperdataDocument IO ())
-- | DataNew ![[HyperdataDocument]] -- | DataNew ![[HyperdataDocument]]
-- Show instance is not possible because of IO
printDataText :: DataText -> IO ()
printDataText (DataOld xs) = putStrLn $ show xs
printDataText (DataNew (maybeInt, conduitData)) = do
res <- C.runConduit (conduitData .| CL.consume)
putStrLn $ show (maybeInt, res)
-- TODO use the split parameter in config file -- TODO use the split parameter in config file
getDataText :: FlowCmdM env err m getDataText :: FlowCmdM env err m
=> DataOrigin => DataOrigin
...@@ -153,6 +163,20 @@ getDataText (InternalOrigin _) _la q _li = do ...@@ -153,6 +163,20 @@ getDataText (InternalOrigin _) _la q _li = do
ids <- map fst <$> searchDocInDatabase cId (stemIt q) ids <- map fst <$> searchDocInDatabase cId (stemIt q)
pure $ Right $ DataOld ids pure $ Right $ DataOld ids
getDataText_Debug :: FlowCmdM env err m
=> DataOrigin
-> TermType Lang
-> API.Query
-> Maybe API.Limit
-> m ()
getDataText_Debug a l q li = do
result <- getDataText a l q li
case result of
Left err -> liftBase $ putStrLn $ show err
Right res -> liftBase $ printDataText res
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
flowDataText :: forall env err m. flowDataText :: forall env err m.
( FlowCmdM env err m ( FlowCmdM env err m
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment