[date] fix dateSplit function

(no default getCurrentTime anymore)
parent 37a16868
Pipeline #5289 canceled with stages
...@@ -79,6 +79,7 @@ library ...@@ -79,6 +79,7 @@ library
Gargantext.Core.Text.Corpus.Query Gargantext.Core.Text.Corpus.Query
Gargantext.Core.Text.Corpus.Parsers Gargantext.Core.Text.Corpus.Parsers
Gargantext.Core.Text.Corpus.Parsers.CSV Gargantext.Core.Text.Corpus.Parsers.CSV
Gargantext.Core.Text.Corpus.Parsers.Date
Gargantext.Core.Text.Corpus.Parsers.Date.Parsec Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
Gargantext.Core.Text.List.Formats.CSV Gargantext.Core.Text.List.Formats.CSV
Gargantext.Core.Text.Metrics Gargantext.Core.Text.Metrics
...@@ -222,7 +223,6 @@ library ...@@ -222,7 +223,6 @@ library
Gargantext.Core.Text.Corpus.API.Isidore Gargantext.Core.Text.Corpus.API.Isidore
Gargantext.Core.Text.Corpus.API.Istex Gargantext.Core.Text.Corpus.API.Istex
Gargantext.Core.Text.Corpus.Parsers.Book Gargantext.Core.Text.Corpus.Parsers.Book
Gargantext.Core.Text.Corpus.Parsers.Date
Gargantext.Core.Text.Corpus.Parsers.Date.Attoparsec Gargantext.Core.Text.Corpus.Parsers.Date.Attoparsec
Gargantext.Core.Text.Corpus.Parsers.FrameWrite Gargantext.Core.Text.Corpus.Parsers.FrameWrite
Gargantext.Core.Text.Corpus.Parsers.Gitlab Gargantext.Core.Text.Corpus.Parsers.Gitlab
......
...@@ -25,7 +25,7 @@ import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs) ...@@ -25,7 +25,7 @@ import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Prelude import Gargantext.API.Prelude
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.NLP (nlpServerGet) import Gargantext.Core.NLP (nlpServerGet)
import Gargantext.Core.Text.Corpus.Parsers.Date (dateSplit) import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit)
import Gargantext.Core.Text.Terms (TermType(..)) import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix) import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix)
import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus) import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus)
...@@ -102,9 +102,8 @@ documentUpload nId doc = do ...@@ -102,9 +102,8 @@ documentUpload nId doc = do
Just c -> c Just c -> c
Nothing -> panic $ T.pack $ "[G.A.N.DU] Node has no corpus parent: " <> show nId Nothing -> panic $ T.pack $ "[G.A.N.DU] Node has no corpus parent: " <> show nId
(theFullDate, (year, month, day)) <- liftBase $ dateSplit let mDateS = Just $ view du_date doc
$ Just let (theFullDate, (year, month, day)) = mDateSplit mDateS
$ view du_date doc
let hd = HyperdataDocument { _hd_bdd = Nothing let hd = HyperdataDocument { _hd_bdd = Nothing
, _hd_doi = Nothing , _hd_doi = Nothing
......
...@@ -43,8 +43,8 @@ getC la q ml = do ...@@ -43,8 +43,8 @@ getC la q ml = do
toDoc' :: Maybe ISO639.ISO639_1 -> HAL.Corpus -> IO HyperdataDocument toDoc' :: Maybe ISO639.ISO639_1 -> HAL.Corpus -> IO HyperdataDocument
toDoc' la (HAL.Corpus { .. }) = do toDoc' la (HAL.Corpus { .. }) = do
-- printDebug "[toDoc corpus] h" h -- printDebug "[toDoc corpus] h" h
(utctime, (pub_year, pub_month, pub_day)) <- let mDateS = maybe (Just $ pack $ show Defaults.year) Just _corpus_date
Date.dateSplit (maybe (Just $ pack $ show Defaults.year) Just _corpus_date) let (utctime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
let abstractDefault = intercalate " " _corpus_abstract let abstractDefault = intercalate " " _corpus_abstract
let abstract = case la of let abstract = case la of
Nothing -> abstractDefault Nothing -> abstractDefault
......
...@@ -66,7 +66,8 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do ...@@ -66,7 +66,8 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
langText (OnlyText t2 ) = t2 langText (OnlyText t2 ) = t2
langText (ArrayText ts ) = Text.intercalate " " $ map langText ts langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
(utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit (maybe (Just $ Text.pack $ show Defaults.year) (Just) d) let mDateS = maybe (Just $ Text.pack $ show Defaults.year) (Just) d
let (utcTime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
pure HyperdataDocument pure HyperdataDocument
{ _hd_bdd = Just "Isidore" { _hd_bdd = Just "Isidore"
......
...@@ -227,7 +227,7 @@ toDoc ff d = do ...@@ -227,7 +227,7 @@ toDoc ff d = do
let dateToParse = DT.replace " " "" <$> lookup "PY" d -- <> Just " " <> lookup "publication_date" d let dateToParse = DT.replace " " "" <$> lookup "PY" d -- <> Just " " <> lookup "publication_date" d
-- printDebug "[G.C.T.C.Parsers] dateToParse" dateToParse -- printDebug "[G.C.T.C.Parsers] dateToParse" dateToParse
(utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit dateToParse let (utcTime, (pub_year, pub_month, pub_day)) = Date.mDateSplit dateToParse
let hd = HyperdataDocument { _hd_bdd = Just $ DT.pack $ show ff let hd = HyperdataDocument { _hd_bdd = Just $ DT.pack $ show ff
, _hd_doi = lookup "doi" d , _hd_doi = lookup "doi" d
......
...@@ -31,7 +31,7 @@ import Data.Text (unpack, splitOn, replace) ...@@ -31,7 +31,7 @@ import Data.Text (unpack, splitOn, replace)
import Data.Time (defaultTimeLocale, iso8601DateFormat, parseTimeM, toGregorian) import Data.Time (defaultTimeLocale, iso8601DateFormat, parseTimeM, toGregorian)
import Data.Time.Calendar qualified as DTC import Data.Time.Calendar qualified as DTC
import Data.Time.Clock ( secondsToDiffTime) import Data.Time.Clock ( secondsToDiffTime)
import Data.Time.Clock (UTCTime(..), getCurrentTime) import Data.Time.Clock (UTCTime(..)) -- , getCurrentTime)
import Data.Time.LocalTime (utc) import Data.Time.LocalTime (utc)
import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime) import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
import Duckling.Api (analyze) import Duckling.Api (analyze)
...@@ -41,18 +41,26 @@ import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), Duckl ...@@ -41,18 +41,26 @@ import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), Duckl
import Duckling.Types (ResolvedToken(..), ResolvedVal(..)) import Duckling.Types (ResolvedToken(..), ResolvedVal(..))
import Duckling.Types (Seal(..)) import Duckling.Types (Seal(..))
import Gargantext.Core (Lang(FR,EN)) import Gargantext.Core (Lang(FR,EN))
import Gargantext.Core.Types (DebugMode(..), withDebugMode) -- import Gargantext.Core.Types (DebugMode(..), withDebugMode)
import Gargantext.Prelude hiding (replace) import Gargantext.Prelude hiding (replace)
import System.Environment (getEnv) import System.Environment (getEnv)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Parse date to Ints -- | Parse date to Ints
-- TODO add hours, minutes and seconds -- TODO add hours, minutes and seconds
dateSplit :: Maybe Text -> IO (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day)) dateSplit :: Text -> Either Text (UTCTime, (Year, Month, Day))
dateSplit Nothing = pure (Nothing, (Nothing, Nothing, Nothing)) dateSplit txt = mkSplit <$> parse txt
dateSplit (Just txt) = do where
utcTime <- parse txt mkSplit utcTime =
let (y, m, d) = split' utcTime let (y, m, d) = split' utcTime in
pure (Just utcTime, (Just y, Just m, Just d)) (utcTime, (y, m, d))
mDateSplit :: Maybe Text -> (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
mDateSplit Nothing = (Nothing, (Nothing, Nothing, Nothing))
mDateSplit (Just md) =
case dateSplit md of
Left _err -> (Nothing, (Nothing, Nothing, Nothing))
Right (ut, (y, m, d)) -> (Just ut, (Just y, Just m, Just d))
split' :: UTCTime -> (Year, Month, Day) split' :: UTCTime -> (Year, Month, Day)
split' (UTCTime day _) = (fromIntegral y, m, d) split' (UTCTime day _) = (fromIntegral y, m, d)
...@@ -70,17 +78,18 @@ type Day = Int ...@@ -70,17 +78,18 @@ type Day = Int
-- 1900-04-01 19:00:00 UTC -- 1900-04-01 19:00:00 UTC
-- >>> parse EN (pack "April 1 1900") -- >>> parse EN (pack "April 1 1900")
-- 1900-04-01 00:00:00 UTC -- 1900-04-01 00:00:00 UTC
parse :: Text -> IO UTCTime parse :: Text -> Either Text UTCTime
parse s = do parse s = do
-- printDebug "Date: " s -- printDebug "Date: " s
let result = dateFlow (DucklingFailure s) let result = dateFlow (DucklingFailure s)
--printDebug "Date': " dateStr' --printDebug "Date': " dateStr'
case result of case result of
DateFlowSuccess ok -> pure ok DateFlowSuccess ok -> Right ok
DateFlowFailure -> (withDebugMode (DebugMode True) DateFlowFailure -> Left "[G.C.T.C.Parsers.Date] DateFlowFailure"
"[G.C.T.P.T.Date parse]" s -- DateFlowFailure -> (withDebugMode (DebugMode True)
$ getCurrentTime) -- "[G.C.T.P.T.Date parse]" s
_ -> panic "[G.C.T.C.Parsers.Date] parse: Should not happen" -- $ getCurrentTime)
_ -> Left "[G.C.T.C.Parsers.Date] parse: Should not happen"
defaultDate :: Text defaultDate :: Text
defaultDate = "0-0-0T0:0:0" defaultDate = "0-0-0T0:0:0"
......
...@@ -31,8 +31,8 @@ import Protolude ...@@ -31,8 +31,8 @@ import Protolude
toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument
toDoc la (ISTEX.Document i t a ab d s) = do toDoc la (ISTEX.Document i t a ab d s) = do
--printDebug "ISTEX date" d --printDebug "ISTEX date" d
(utctime, (pub_year, pub_month, pub_day)) <- let mDateS = maybe (Just $ T.pack $ show Defaults.year) (Just . T.pack . show) d
Date.dateSplit (maybe (Just $ T.pack $ show Defaults.year) (Just . T.pack . show) d) let (utctime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
--printDebug "toDoc Istex" (utctime, (pub_year, pub_month, pub_day)) --printDebug "toDoc Istex" (utctime, (pub_year, pub_month, pub_day))
pure $ HyperdataDocument { _hd_bdd = Just "Istex" pure $ HyperdataDocument { _hd_bdd = Just "Istex"
, _hd_doi = Just i , _hd_doi = Just i
......
...@@ -23,7 +23,7 @@ import Data.List qualified as List ...@@ -23,7 +23,7 @@ import Data.List qualified as List
import Data.Text (concat) import Data.Text (concat)
import Database.HSparql.Connection import Database.HSparql.Connection
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.Date (dateSplit) import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit)
import Gargantext.Core.Text.Corpus.Parsers.Isidore (unbound) import Gargantext.Core.Text.Corpus.Parsers.Isidore (unbound)
import Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler import Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
...@@ -67,22 +67,38 @@ wikiPageToDocument m wr = do ...@@ -67,22 +67,38 @@ wikiPageToDocument m wr = do
source = Nothing source = Nothing
abstract = Just $ concat $ take m sections abstract = Just $ concat $ take m sections
(date, (year, month, day)) <- dateSplit $ head let mDateS = head $ catMaybes
$ catMaybes [ wr ^. wr_yearStart
[ wr ^. wr_yearStart , wr ^. wr_yearEnd
, wr ^. wr_yearEnd , wr ^. wr_yearFlorish
, wr ^. wr_yearFlorish , head sections
, head sections ]
] let (date, (year, month, day)) = mDateSplit mDateS
let hour = Nothing let hour = Nothing
minute = Nothing minute = Nothing
sec = Nothing sec = Nothing
iso2 = Just $ show EN iso2 = Just $ show EN
pure $ HyperdataDocument bdd doi url uniqId uniqIdBdd pure $ HyperdataDocument { _hd_bdd = bdd
page title authors institutes source , _hd_doi = doi
abstract (show <$> date) year month day hour minute sec iso2 , _hd_url = url
, _hd_uniqId = uniqId
, _hd_uniqIdBdd = uniqIdBdd
, _hd_page = page
, _hd_title = title
, _hd_authors = authors
, _hd_institutes = institutes
, _hd_source = source
, _hd_abstract = abstract
, _hd_publication_date = show <$> date
, _hd_publication_year = year
, _hd_publication_month = month
, _hd_publication_day = day
, _hd_publication_hour = hour
, _hd_publication_minute = minute
, _hd_publication_second = sec
, _hd_language_iso2 = iso2 }
wikidataSelect :: Int -> IO [WikiResult] wikidataSelect :: Int -> IO [WikiResult]
......
...@@ -19,6 +19,8 @@ import Test.Hspec ...@@ -19,6 +19,8 @@ import Test.Hspec
import Test.QuickCheck import Test.QuickCheck
import Data.Time (ZonedTime(..)) import Data.Time (ZonedTime(..))
import Data.Time.Clock (UTCTime(..), secondsToDiffTime)
import Data.Time.Calendar.OrdinalDate (fromOrdinalDate)
import Data.Text (pack) import Data.Text (pack)
import Text.Parsec.Error (ParseError) import Text.Parsec.Error (ParseError)
...@@ -26,6 +28,7 @@ import Duckling.Time.Types (toRFC3339) ...@@ -26,6 +28,7 @@ import Duckling.Time.Types (toRFC3339)
----------------------------------------------------------- -----------------------------------------------------------
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core.Text.Corpus.Parsers.Date (dateSplit)
import Gargantext.Core.Text.Corpus.Parsers.Date.Parsec (fromRFC3339) import Gargantext.Core.Text.Corpus.Parsers.Date.Parsec (fromRFC3339)
import Test.Parsers.Types import Test.Parsers.Types
...@@ -45,3 +48,14 @@ testFromRFC3339 = do ...@@ -45,3 +48,14 @@ testFromRFC3339 = do
-- \x -> let e = Right x :: Either ParseError ZonedTime -- \x -> let e = Right x :: Either ParseError ZonedTime
-- in fmap looseZonedTimePrecision e == (fromRFC3339 . fromRFC3339Inv ) (fmap looseZonedTimePrecision e) -- in fmap looseZonedTimePrecision e == (fromRFC3339 . fromRFC3339Inv ) (fmap looseZonedTimePrecision e)
testDateSplit :: Spec
testDateSplit = do
describe "Test date split" $ do
it "works for simple date parsing" $ do
let utc = UTCTime { utctDay = fromOrdinalDate 2010 4
, utctDayTime = secondsToDiffTime 0 }
dateSplit "2010-01-04" `shouldBe` Right (utc, (2010, 1, 4))
it "throws error for year-month" $ do
dateSplit "2010-01" `shouldSatisfy` isLeft
...@@ -30,6 +30,7 @@ main = do ...@@ -30,6 +30,7 @@ main = do
utilSpec <- testSpec "Utils" Utils.test utilSpec <- testSpec "Utils" Utils.test
clusteringSpec <- testSpec "Graph Clustering" Graph.test clusteringSpec <- testSpec "Graph Clustering" Graph.test
dateParserSpec <- testSpec "Date Parsing" PD.testFromRFC3339 dateParserSpec <- testSpec "Date Parsing" PD.testFromRFC3339
dateSplitSpec <- testSpec "Date split" PD.testDateSplit
cryptoSpec <- testSpec "Crypto" Crypto.test cryptoSpec <- testSpec "Crypto" Crypto.test
nlpSpec <- testSpec "NLP" NLP.test nlpSpec <- testSpec "NLP" NLP.test
jobsSpec <- testSpec "Jobs" Jobs.test jobsSpec <- testSpec "Jobs" Jobs.test
...@@ -38,6 +39,7 @@ main = do ...@@ -38,6 +39,7 @@ main = do
[ utilSpec [ utilSpec
, clusteringSpec , clusteringSpec
, dateParserSpec , dateParserSpec
, dateSplitSpec
, cryptoSpec , cryptoSpec
, nlpSpec , nlpSpec
, jobsSpec , jobsSpec
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment