[date] fix dateSplit function

(no default getCurrentTime anymore)
parent 37a16868
Pipeline #5289 canceled with stages
......@@ -79,6 +79,7 @@ library
Gargantext.Core.Text.Corpus.Query
Gargantext.Core.Text.Corpus.Parsers
Gargantext.Core.Text.Corpus.Parsers.CSV
Gargantext.Core.Text.Corpus.Parsers.Date
Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
Gargantext.Core.Text.List.Formats.CSV
Gargantext.Core.Text.Metrics
......@@ -222,7 +223,6 @@ library
Gargantext.Core.Text.Corpus.API.Isidore
Gargantext.Core.Text.Corpus.API.Istex
Gargantext.Core.Text.Corpus.Parsers.Book
Gargantext.Core.Text.Corpus.Parsers.Date
Gargantext.Core.Text.Corpus.Parsers.Date.Attoparsec
Gargantext.Core.Text.Corpus.Parsers.FrameWrite
Gargantext.Core.Text.Corpus.Parsers.Gitlab
......
......@@ -25,7 +25,7 @@ import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Prelude
import Gargantext.Core (Lang(..))
import Gargantext.Core.NLP (nlpServerGet)
import Gargantext.Core.Text.Corpus.Parsers.Date (dateSplit)
import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit)
import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix)
import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus)
......@@ -102,9 +102,8 @@ documentUpload nId doc = do
Just c -> c
Nothing -> panic $ T.pack $ "[G.A.N.DU] Node has no corpus parent: " <> show nId
(theFullDate, (year, month, day)) <- liftBase $ dateSplit
$ Just
$ view du_date doc
let mDateS = Just $ view du_date doc
let (theFullDate, (year, month, day)) = mDateSplit mDateS
let hd = HyperdataDocument { _hd_bdd = Nothing
, _hd_doi = Nothing
......
......@@ -43,8 +43,8 @@ getC la q ml = do
toDoc' :: Maybe ISO639.ISO639_1 -> HAL.Corpus -> IO HyperdataDocument
toDoc' la (HAL.Corpus { .. }) = do
-- printDebug "[toDoc corpus] h" h
(utctime, (pub_year, pub_month, pub_day)) <-
Date.dateSplit (maybe (Just $ pack $ show Defaults.year) Just _corpus_date)
let mDateS = maybe (Just $ pack $ show Defaults.year) Just _corpus_date
let (utctime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
let abstractDefault = intercalate " " _corpus_abstract
let abstract = case la of
Nothing -> abstractDefault
......
......@@ -66,7 +66,8 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
langText (OnlyText t2 ) = t2
langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
(utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit (maybe (Just $ Text.pack $ show Defaults.year) (Just) d)
let mDateS = maybe (Just $ Text.pack $ show Defaults.year) (Just) d
let (utcTime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
pure HyperdataDocument
{ _hd_bdd = Just "Isidore"
......
......@@ -227,7 +227,7 @@ toDoc ff d = do
let dateToParse = DT.replace " " "" <$> lookup "PY" d -- <> Just " " <> lookup "publication_date" d
-- printDebug "[G.C.T.C.Parsers] dateToParse" dateToParse
(utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit dateToParse
let (utcTime, (pub_year, pub_month, pub_day)) = Date.mDateSplit dateToParse
let hd = HyperdataDocument { _hd_bdd = Just $ DT.pack $ show ff
, _hd_doi = lookup "doi" d
......
......@@ -31,7 +31,7 @@ import Data.Text (unpack, splitOn, replace)
import Data.Time (defaultTimeLocale, iso8601DateFormat, parseTimeM, toGregorian)
import Data.Time.Calendar qualified as DTC
import Data.Time.Clock ( secondsToDiffTime)
import Data.Time.Clock (UTCTime(..), getCurrentTime)
import Data.Time.Clock (UTCTime(..)) -- , getCurrentTime)
import Data.Time.LocalTime (utc)
import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
import Duckling.Api (analyze)
......@@ -41,18 +41,26 @@ import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), Duckl
import Duckling.Types (ResolvedToken(..), ResolvedVal(..))
import Duckling.Types (Seal(..))
import Gargantext.Core (Lang(FR,EN))
import Gargantext.Core.Types (DebugMode(..), withDebugMode)
-- import Gargantext.Core.Types (DebugMode(..), withDebugMode)
import Gargantext.Prelude hiding (replace)
import System.Environment (getEnv)
------------------------------------------------------------------------
-- | Parse date to Ints
-- TODO add hours, minutes and seconds
dateSplit :: Maybe Text -> IO (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
dateSplit Nothing = pure (Nothing, (Nothing, Nothing, Nothing))
dateSplit (Just txt) = do
utcTime <- parse txt
let (y, m, d) = split' utcTime
pure (Just utcTime, (Just y, Just m, Just d))
dateSplit :: Text -> Either Text (UTCTime, (Year, Month, Day))
dateSplit txt = mkSplit <$> parse txt
where
mkSplit utcTime =
let (y, m, d) = split' utcTime in
(utcTime, (y, m, d))
mDateSplit :: Maybe Text -> (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
mDateSplit Nothing = (Nothing, (Nothing, Nothing, Nothing))
mDateSplit (Just md) =
case dateSplit md of
Left _err -> (Nothing, (Nothing, Nothing, Nothing))
Right (ut, (y, m, d)) -> (Just ut, (Just y, Just m, Just d))
split' :: UTCTime -> (Year, Month, Day)
split' (UTCTime day _) = (fromIntegral y, m, d)
......@@ -70,17 +78,18 @@ type Day = Int
-- 1900-04-01 19:00:00 UTC
-- >>> parse EN (pack "April 1 1900")
-- 1900-04-01 00:00:00 UTC
parse :: Text -> IO UTCTime
parse :: Text -> Either Text UTCTime
parse s = do
-- printDebug "Date: " s
let result = dateFlow (DucklingFailure s)
--printDebug "Date': " dateStr'
case result of
DateFlowSuccess ok -> pure ok
DateFlowFailure -> (withDebugMode (DebugMode True)
"[G.C.T.P.T.Date parse]" s
$ getCurrentTime)
_ -> panic "[G.C.T.C.Parsers.Date] parse: Should not happen"
DateFlowSuccess ok -> Right ok
DateFlowFailure -> Left "[G.C.T.C.Parsers.Date] DateFlowFailure"
-- DateFlowFailure -> (withDebugMode (DebugMode True)
-- "[G.C.T.P.T.Date parse]" s
-- $ getCurrentTime)
_ -> Left "[G.C.T.C.Parsers.Date] parse: Should not happen"
defaultDate :: Text
defaultDate = "0-0-0T0:0:0"
......
......@@ -31,8 +31,8 @@ import Protolude
toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument
toDoc la (ISTEX.Document i t a ab d s) = do
--printDebug "ISTEX date" d
(utctime, (pub_year, pub_month, pub_day)) <-
Date.dateSplit (maybe (Just $ T.pack $ show Defaults.year) (Just . T.pack . show) d)
let mDateS = maybe (Just $ T.pack $ show Defaults.year) (Just . T.pack . show) d
let (utctime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
--printDebug "toDoc Istex" (utctime, (pub_year, pub_month, pub_day))
pure $ HyperdataDocument { _hd_bdd = Just "Istex"
, _hd_doi = Just i
......
......@@ -23,7 +23,7 @@ import Data.List qualified as List
import Data.Text (concat)
import Database.HSparql.Connection
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.Date (dateSplit)
import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit)
import Gargantext.Core.Text.Corpus.Parsers.Isidore (unbound)
import Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
......@@ -67,22 +67,38 @@ wikiPageToDocument m wr = do
source = Nothing
abstract = Just $ concat $ take m sections
(date, (year, month, day)) <- dateSplit $ head
$ catMaybes
[ wr ^. wr_yearStart
, wr ^. wr_yearEnd
, wr ^. wr_yearFlorish
, head sections
]
let mDateS = head $ catMaybes
[ wr ^. wr_yearStart
, wr ^. wr_yearEnd
, wr ^. wr_yearFlorish
, head sections
]
let (date, (year, month, day)) = mDateSplit mDateS
let hour = Nothing
minute = Nothing
sec = Nothing
iso2 = Just $ show EN
pure $ HyperdataDocument bdd doi url uniqId uniqIdBdd
page title authors institutes source
abstract (show <$> date) year month day hour minute sec iso2
pure $ HyperdataDocument { _hd_bdd = bdd
, _hd_doi = doi
, _hd_url = url
, _hd_uniqId = uniqId
, _hd_uniqIdBdd = uniqIdBdd
, _hd_page = page
, _hd_title = title
, _hd_authors = authors
, _hd_institutes = institutes
, _hd_source = source
, _hd_abstract = abstract
, _hd_publication_date = show <$> date
, _hd_publication_year = year
, _hd_publication_month = month
, _hd_publication_day = day
, _hd_publication_hour = hour
, _hd_publication_minute = minute
, _hd_publication_second = sec
, _hd_language_iso2 = iso2 }
wikidataSelect :: Int -> IO [WikiResult]
......
......@@ -19,6 +19,8 @@ import Test.Hspec
import Test.QuickCheck
import Data.Time (ZonedTime(..))
import Data.Time.Clock (UTCTime(..), secondsToDiffTime)
import Data.Time.Calendar.OrdinalDate (fromOrdinalDate)
import Data.Text (pack)
import Text.Parsec.Error (ParseError)
......@@ -26,6 +28,7 @@ import Duckling.Time.Types (toRFC3339)
-----------------------------------------------------------
import Gargantext.Prelude
import Gargantext.Core.Text.Corpus.Parsers.Date (dateSplit)
import Gargantext.Core.Text.Corpus.Parsers.Date.Parsec (fromRFC3339)
import Test.Parsers.Types
......@@ -45,3 +48,14 @@ testFromRFC3339 = do
-- \x -> let e = Right x :: Either ParseError ZonedTime
-- in fmap looseZonedTimePrecision e == (fromRFC3339 . fromRFC3339Inv ) (fmap looseZonedTimePrecision e)
testDateSplit :: Spec
testDateSplit = do
describe "Test date split" $ do
it "works for simple date parsing" $ do
let utc = UTCTime { utctDay = fromOrdinalDate 2010 4
, utctDayTime = secondsToDiffTime 0 }
dateSplit "2010-01-04" `shouldBe` Right (utc, (2010, 1, 4))
it "throws error for year-month" $ do
dateSplit "2010-01" `shouldSatisfy` isLeft
......@@ -30,6 +30,7 @@ main = do
utilSpec <- testSpec "Utils" Utils.test
clusteringSpec <- testSpec "Graph Clustering" Graph.test
dateParserSpec <- testSpec "Date Parsing" PD.testFromRFC3339
dateSplitSpec <- testSpec "Date split" PD.testDateSplit
cryptoSpec <- testSpec "Crypto" Crypto.test
nlpSpec <- testSpec "NLP" NLP.test
jobsSpec <- testSpec "Jobs" Jobs.test
......@@ -38,6 +39,7 @@ main = do
[ utilSpec
, clusteringSpec
, dateParserSpec
, dateSplitSpec
, cryptoSpec
, nlpSpec
, jobsSpec
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment