Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
160
Issues
160
List
Board
Labels
Milestones
Merge Requests
14
Merge Requests
14
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
06d2a999
Verified
Commit
06d2a999
authored
Oct 20, 2023
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[date] fix dateSplit function
(no default getCurrentTime anymore)
parent
37a16868
Pipeline
#5289
canceled with stages
Changes
10
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
77 additions
and
36 deletions
+77
-36
gargantext.cabal
gargantext.cabal
+1
-1
DocumentUpload.hs
src/Gargantext/API/Node/DocumentUpload.hs
+3
-4
Hal.hs
src/Gargantext/Core/Text/Corpus/API/Hal.hs
+2
-2
Isidore.hs
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
+2
-1
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+1
-1
Date.hs
src/Gargantext/Core/Text/Corpus/Parsers/Date.hs
+23
-14
Istex.hs
src/Gargantext/Core/Text/Corpus/Parsers/JSON/Istex.hs
+2
-2
Wikidata.hs
src/Gargantext/Core/Text/Corpus/Parsers/Wikidata.hs
+27
-11
Date.hs
test/Test/Parsers/Date.hs
+14
-0
Main.hs
test/drivers/tasty/Main.hs
+2
-0
No files found.
gargantext.cabal
View file @
06d2a999
...
...
@@ -79,6 +79,7 @@ library
Gargantext.Core.Text.Corpus.Query
Gargantext.Core.Text.Corpus.Parsers
Gargantext.Core.Text.Corpus.Parsers.CSV
Gargantext.Core.Text.Corpus.Parsers.Date
Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
Gargantext.Core.Text.List.Formats.CSV
Gargantext.Core.Text.Metrics
...
...
@@ -222,7 +223,6 @@ library
Gargantext.Core.Text.Corpus.API.Isidore
Gargantext.Core.Text.Corpus.API.Istex
Gargantext.Core.Text.Corpus.Parsers.Book
Gargantext.Core.Text.Corpus.Parsers.Date
Gargantext.Core.Text.Corpus.Parsers.Date.Attoparsec
Gargantext.Core.Text.Corpus.Parsers.FrameWrite
Gargantext.Core.Text.Corpus.Parsers.Gitlab
...
...
src/Gargantext/API/Node/DocumentUpload.hs
View file @
06d2a999
...
...
@@ -25,7 +25,7 @@ import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import
Gargantext.API.Prelude
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.NLP
(
nlpServerGet
)
import
Gargantext.Core.Text.Corpus.Parsers.Date
(
d
ateSplit
)
import
Gargantext.Core.Text.Corpus.Parsers.Date
(
mD
ateSplit
)
import
Gargantext.Core.Text.Terms
(
TermType
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unCapitalize
,
dropPrefix
)
import
Gargantext.Database.Action.Flow
(
addDocumentsToHyperCorpus
)
...
...
@@ -102,9 +102,8 @@ documentUpload nId doc = do
Just
c
->
c
Nothing
->
panic
$
T
.
pack
$
"[G.A.N.DU] Node has no corpus parent: "
<>
show
nId
(
theFullDate
,
(
year
,
month
,
day
))
<-
liftBase
$
dateSplit
$
Just
$
view
du_date
doc
let
mDateS
=
Just
$
view
du_date
doc
let
(
theFullDate
,
(
year
,
month
,
day
))
=
mDateSplit
mDateS
let
hd
=
HyperdataDocument
{
_hd_bdd
=
Nothing
,
_hd_doi
=
Nothing
...
...
src/Gargantext/Core/Text/Corpus/API/Hal.hs
View file @
06d2a999
...
...
@@ -43,8 +43,8 @@ getC la q ml = do
toDoc'
::
Maybe
ISO639
.
ISO639_1
->
HAL
.
Corpus
->
IO
HyperdataDocument
toDoc'
la
(
HAL
.
Corpus
{
..
})
=
do
-- printDebug "[toDoc corpus] h" h
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
(
maybe
(
Just
$
pack
$
show
Defaults
.
year
)
Just
_corpus_date
)
let
mDateS
=
maybe
(
Just
$
pack
$
show
Defaults
.
year
)
Just
_corpus_date
let
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
=
Date
.
mDateSplit
mDateS
let
abstractDefault
=
intercalate
" "
_corpus_abstract
let
abstract
=
case
la
of
Nothing
->
abstractDefault
...
...
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
View file @
06d2a999
...
...
@@ -66,7 +66,8 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
langText
(
OnlyText
t2
)
=
t2
langText
(
ArrayText
ts
)
=
Text
.
intercalate
" "
$
map
langText
ts
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
(
maybe
(
Just
$
Text
.
pack
$
show
Defaults
.
year
)
(
Just
)
d
)
let
mDateS
=
maybe
(
Just
$
Text
.
pack
$
show
Defaults
.
year
)
(
Just
)
d
let
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
=
Date
.
mDateSplit
mDateS
pure
HyperdataDocument
{
_hd_bdd
=
Just
"Isidore"
...
...
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
06d2a999
...
...
@@ -227,7 +227,7 @@ toDoc ff d = do
let
dateToParse
=
DT
.
replace
" "
""
<$>
lookup
"PY"
d
-- <> Just " " <> lookup "publication_date" d
-- printDebug "[G.C.T.C.Parsers] dateToParse" dateToParse
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
d
ateSplit
dateToParse
let
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
=
Date
.
mD
ateSplit
dateToParse
let
hd
=
HyperdataDocument
{
_hd_bdd
=
Just
$
DT
.
pack
$
show
ff
,
_hd_doi
=
lookup
"doi"
d
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Date.hs
View file @
06d2a999
...
...
@@ -31,7 +31,7 @@ import Data.Text (unpack, splitOn, replace)
import
Data.Time
(
defaultTimeLocale
,
iso8601DateFormat
,
parseTimeM
,
toGregorian
)
import
Data.Time.Calendar
qualified
as
DTC
import
Data.Time.Clock
(
secondsToDiffTime
)
import
Data.Time.Clock
(
UTCTime
(
..
),
getCurrentTime
)
import
Data.Time.Clock
(
UTCTime
(
..
)
)
--
, getCurrentTime)
import
Data.Time.LocalTime
(
utc
)
import
Data.Time.LocalTime.TimeZone.Series
(
zonedTimeToZoneSeriesTime
)
import
Duckling.Api
(
analyze
)
...
...
@@ -41,18 +41,26 @@ import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), Duckl
import
Duckling.Types
(
ResolvedToken
(
..
),
ResolvedVal
(
..
))
import
Duckling.Types
(
Seal
(
..
))
import
Gargantext.Core
(
Lang
(
FR
,
EN
))
import
Gargantext.Core.Types
(
DebugMode
(
..
),
withDebugMode
)
--
import Gargantext.Core.Types (DebugMode(..), withDebugMode)
import
Gargantext.Prelude
hiding
(
replace
)
import
System.Environment
(
getEnv
)
------------------------------------------------------------------------
-- | Parse date to Ints
-- TODO add hours, minutes and seconds
dateSplit
::
Maybe
Text
->
IO
(
Maybe
UTCTime
,
(
Maybe
Year
,
Maybe
Month
,
Maybe
Day
))
dateSplit
Nothing
=
pure
(
Nothing
,
(
Nothing
,
Nothing
,
Nothing
))
dateSplit
(
Just
txt
)
=
do
utcTime
<-
parse
txt
let
(
y
,
m
,
d
)
=
split'
utcTime
pure
(
Just
utcTime
,
(
Just
y
,
Just
m
,
Just
d
))
dateSplit
::
Text
->
Either
Text
(
UTCTime
,
(
Year
,
Month
,
Day
))
dateSplit
txt
=
mkSplit
<$>
parse
txt
where
mkSplit
utcTime
=
let
(
y
,
m
,
d
)
=
split'
utcTime
in
(
utcTime
,
(
y
,
m
,
d
))
mDateSplit
::
Maybe
Text
->
(
Maybe
UTCTime
,
(
Maybe
Year
,
Maybe
Month
,
Maybe
Day
))
mDateSplit
Nothing
=
(
Nothing
,
(
Nothing
,
Nothing
,
Nothing
))
mDateSplit
(
Just
md
)
=
case
dateSplit
md
of
Left
_err
->
(
Nothing
,
(
Nothing
,
Nothing
,
Nothing
))
Right
(
ut
,
(
y
,
m
,
d
))
->
(
Just
ut
,
(
Just
y
,
Just
m
,
Just
d
))
split'
::
UTCTime
->
(
Year
,
Month
,
Day
)
split'
(
UTCTime
day
_
)
=
(
fromIntegral
y
,
m
,
d
)
...
...
@@ -70,17 +78,18 @@ type Day = Int
-- 1900-04-01 19:00:00 UTC
-- >>> parse EN (pack "April 1 1900")
-- 1900-04-01 00:00:00 UTC
parse
::
Text
->
IO
UTCTime
parse
::
Text
->
Either
Text
UTCTime
parse
s
=
do
-- printDebug "Date: " s
let
result
=
dateFlow
(
DucklingFailure
s
)
--printDebug "Date': " dateStr'
case
result
of
DateFlowSuccess
ok
->
pure
ok
DateFlowFailure
->
(
withDebugMode
(
DebugMode
True
)
"[G.C.T.P.T.Date parse]"
s
$
getCurrentTime
)
_
->
panic
"[G.C.T.C.Parsers.Date] parse: Should not happen"
DateFlowSuccess
ok
->
Right
ok
DateFlowFailure
->
Left
"[G.C.T.C.Parsers.Date] DateFlowFailure"
-- DateFlowFailure -> (withDebugMode (DebugMode True)
-- "[G.C.T.P.T.Date parse]" s
-- $ getCurrentTime)
_
->
Left
"[G.C.T.C.Parsers.Date] parse: Should not happen"
defaultDate
::
Text
defaultDate
=
"0-0-0T0:0:0"
...
...
src/Gargantext/Core/Text/Corpus/Parsers/JSON/Istex.hs
View file @
06d2a999
...
...
@@ -31,8 +31,8 @@ import Protolude
toDoc
::
Lang
->
ISTEX
.
Document
->
IO
HyperdataDocument
toDoc
la
(
ISTEX
.
Document
i
t
a
ab
d
s
)
=
do
--printDebug "ISTEX date" d
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
(
maybe
(
Just
$
T
.
pack
$
show
Defaults
.
year
)
(
Just
.
T
.
pack
.
show
)
d
)
let
mDateS
=
maybe
(
Just
$
T
.
pack
$
show
Defaults
.
year
)
(
Just
.
T
.
pack
.
show
)
d
let
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
=
Date
.
mDateSplit
mDateS
--printDebug "toDoc Istex" (utctime, (pub_year, pub_month, pub_day))
pure
$
HyperdataDocument
{
_hd_bdd
=
Just
"Istex"
,
_hd_doi
=
Just
i
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Wikidata.hs
View file @
06d2a999
...
...
@@ -23,7 +23,7 @@ import Data.List qualified as List
import
Data.Text
(
concat
)
import
Database.HSparql.Connection
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers.Date
(
d
ateSplit
)
import
Gargantext.Core.Text.Corpus.Parsers.Date
(
mD
ateSplit
)
import
Gargantext.Core.Text.Corpus.Parsers.Isidore
(
unbound
)
import
Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
))
...
...
@@ -67,22 +67,38 @@ wikiPageToDocument m wr = do
source
=
Nothing
abstract
=
Just
$
concat
$
take
m
sections
(
date
,
(
year
,
month
,
day
))
<-
dateSplit
$
head
$
catMaybes
[
wr
^.
wr_yearStart
,
wr
^.
wr_yearEnd
,
wr
^.
wr_yearFlorish
,
head
sections
]
let
mDateS
=
head
$
catMaybes
[
wr
^.
wr_yearStart
,
wr
^.
wr_yearEnd
,
wr
^.
wr_yearFlorish
,
head
sections
]
let
(
date
,
(
year
,
month
,
day
))
=
mDateSplit
mDateS
let
hour
=
Nothing
minute
=
Nothing
sec
=
Nothing
iso2
=
Just
$
show
EN
pure
$
HyperdataDocument
bdd
doi
url
uniqId
uniqIdBdd
page
title
authors
institutes
source
abstract
(
show
<$>
date
)
year
month
day
hour
minute
sec
iso2
pure
$
HyperdataDocument
{
_hd_bdd
=
bdd
,
_hd_doi
=
doi
,
_hd_url
=
url
,
_hd_uniqId
=
uniqId
,
_hd_uniqIdBdd
=
uniqIdBdd
,
_hd_page
=
page
,
_hd_title
=
title
,
_hd_authors
=
authors
,
_hd_institutes
=
institutes
,
_hd_source
=
source
,
_hd_abstract
=
abstract
,
_hd_publication_date
=
show
<$>
date
,
_hd_publication_year
=
year
,
_hd_publication_month
=
month
,
_hd_publication_day
=
day
,
_hd_publication_hour
=
hour
,
_hd_publication_minute
=
minute
,
_hd_publication_second
=
sec
,
_hd_language_iso2
=
iso2
}
wikidataSelect
::
Int
->
IO
[
WikiResult
]
...
...
test/Test/Parsers/Date.hs
View file @
06d2a999
...
...
@@ -19,6 +19,8 @@ import Test.Hspec
import
Test.QuickCheck
import
Data.Time
(
ZonedTime
(
..
))
import
Data.Time.Clock
(
UTCTime
(
..
),
secondsToDiffTime
)
import
Data.Time.Calendar.OrdinalDate
(
fromOrdinalDate
)
import
Data.Text
(
pack
)
import
Text.Parsec.Error
(
ParseError
)
...
...
@@ -26,6 +28,7 @@ import Duckling.Time.Types (toRFC3339)
-----------------------------------------------------------
import
Gargantext.Prelude
import
Gargantext.Core.Text.Corpus.Parsers.Date
(
dateSplit
)
import
Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
(
fromRFC3339
)
import
Test.Parsers.Types
...
...
@@ -45,3 +48,14 @@ testFromRFC3339 = do
-- \x -> let e = Right x :: Either ParseError ZonedTime
-- in fmap looseZonedTimePrecision e == (fromRFC3339 . fromRFC3339Inv ) (fmap looseZonedTimePrecision e)
testDateSplit
::
Spec
testDateSplit
=
do
describe
"Test date split"
$
do
it
"works for simple date parsing"
$
do
let
utc
=
UTCTime
{
utctDay
=
fromOrdinalDate
2010
4
,
utctDayTime
=
secondsToDiffTime
0
}
dateSplit
"2010-01-04"
`
shouldBe
`
Right
(
utc
,
(
2010
,
1
,
4
))
it
"throws error for year-month"
$
do
dateSplit
"2010-01"
`
shouldSatisfy
`
isLeft
test/drivers/tasty/Main.hs
View file @
06d2a999
...
...
@@ -30,6 +30,7 @@ main = do
utilSpec
<-
testSpec
"Utils"
Utils
.
test
clusteringSpec
<-
testSpec
"Graph Clustering"
Graph
.
test
dateParserSpec
<-
testSpec
"Date Parsing"
PD
.
testFromRFC3339
dateSplitSpec
<-
testSpec
"Date split"
PD
.
testDateSplit
cryptoSpec
<-
testSpec
"Crypto"
Crypto
.
test
nlpSpec
<-
testSpec
"NLP"
NLP
.
test
jobsSpec
<-
testSpec
"Jobs"
Jobs
.
test
...
...
@@ -38,6 +39,7 @@ main = do
[
utilSpec
,
clusteringSpec
,
dateParserSpec
,
dateSplitSpec
,
cryptoSpec
,
nlpSpec
,
jobsSpec
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment