Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
157
Issues
157
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
06d2a999
Verified
Commit
06d2a999
authored
Oct 20, 2023
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[date] fix dateSplit function
(no default getCurrentTime anymore)
parent
37a16868
Pipeline
#5289
canceled with stages
Changes
10
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
77 additions
and
36 deletions
+77
-36
gargantext.cabal
gargantext.cabal
+1
-1
DocumentUpload.hs
src/Gargantext/API/Node/DocumentUpload.hs
+3
-4
Hal.hs
src/Gargantext/Core/Text/Corpus/API/Hal.hs
+2
-2
Isidore.hs
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
+2
-1
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+1
-1
Date.hs
src/Gargantext/Core/Text/Corpus/Parsers/Date.hs
+23
-14
Istex.hs
src/Gargantext/Core/Text/Corpus/Parsers/JSON/Istex.hs
+2
-2
Wikidata.hs
src/Gargantext/Core/Text/Corpus/Parsers/Wikidata.hs
+27
-11
Date.hs
test/Test/Parsers/Date.hs
+14
-0
Main.hs
test/drivers/tasty/Main.hs
+2
-0
No files found.
gargantext.cabal
View file @
06d2a999
...
...
@@ -79,6 +79,7 @@ library
Gargantext.Core.Text.Corpus.Query
Gargantext.Core.Text.Corpus.Parsers
Gargantext.Core.Text.Corpus.Parsers.CSV
Gargantext.Core.Text.Corpus.Parsers.Date
Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
Gargantext.Core.Text.List.Formats.CSV
Gargantext.Core.Text.Metrics
...
...
@@ -222,7 +223,6 @@ library
Gargantext.Core.Text.Corpus.API.Isidore
Gargantext.Core.Text.Corpus.API.Istex
Gargantext.Core.Text.Corpus.Parsers.Book
Gargantext.Core.Text.Corpus.Parsers.Date
Gargantext.Core.Text.Corpus.Parsers.Date.Attoparsec
Gargantext.Core.Text.Corpus.Parsers.FrameWrite
Gargantext.Core.Text.Corpus.Parsers.Gitlab
...
...
src/Gargantext/API/Node/DocumentUpload.hs
View file @
06d2a999
...
...
@@ -25,7 +25,7 @@ import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import
Gargantext.API.Prelude
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.NLP
(
nlpServerGet
)
import
Gargantext.Core.Text.Corpus.Parsers.Date
(
d
ateSplit
)
import
Gargantext.Core.Text.Corpus.Parsers.Date
(
mD
ateSplit
)
import
Gargantext.Core.Text.Terms
(
TermType
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unCapitalize
,
dropPrefix
)
import
Gargantext.Database.Action.Flow
(
addDocumentsToHyperCorpus
)
...
...
@@ -102,9 +102,8 @@ documentUpload nId doc = do
Just
c
->
c
Nothing
->
panic
$
T
.
pack
$
"[G.A.N.DU] Node has no corpus parent: "
<>
show
nId
(
theFullDate
,
(
year
,
month
,
day
))
<-
liftBase
$
dateSplit
$
Just
$
view
du_date
doc
let
mDateS
=
Just
$
view
du_date
doc
let
(
theFullDate
,
(
year
,
month
,
day
))
=
mDateSplit
mDateS
let
hd
=
HyperdataDocument
{
_hd_bdd
=
Nothing
,
_hd_doi
=
Nothing
...
...
src/Gargantext/Core/Text/Corpus/API/Hal.hs
View file @
06d2a999
...
...
@@ -43,8 +43,8 @@ getC la q ml = do
toDoc'
::
Maybe
ISO639
.
ISO639_1
->
HAL
.
Corpus
->
IO
HyperdataDocument
toDoc'
la
(
HAL
.
Corpus
{
..
})
=
do
-- printDebug "[toDoc corpus] h" h
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
(
maybe
(
Just
$
pack
$
show
Defaults
.
year
)
Just
_corpus_date
)
let
mDateS
=
maybe
(
Just
$
pack
$
show
Defaults
.
year
)
Just
_corpus_date
let
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
=
Date
.
mDateSplit
mDateS
let
abstractDefault
=
intercalate
" "
_corpus_abstract
let
abstract
=
case
la
of
Nothing
->
abstractDefault
...
...
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
View file @
06d2a999
...
...
@@ -66,7 +66,8 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
langText
(
OnlyText
t2
)
=
t2
langText
(
ArrayText
ts
)
=
Text
.
intercalate
" "
$
map
langText
ts
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
(
maybe
(
Just
$
Text
.
pack
$
show
Defaults
.
year
)
(
Just
)
d
)
let
mDateS
=
maybe
(
Just
$
Text
.
pack
$
show
Defaults
.
year
)
(
Just
)
d
let
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
=
Date
.
mDateSplit
mDateS
pure
HyperdataDocument
{
_hd_bdd
=
Just
"Isidore"
...
...
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
06d2a999
...
...
@@ -227,7 +227,7 @@ toDoc ff d = do
let
dateToParse
=
DT
.
replace
" "
""
<$>
lookup
"PY"
d
-- <> Just " " <> lookup "publication_date" d
-- printDebug "[G.C.T.C.Parsers] dateToParse" dateToParse
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
d
ateSplit
dateToParse
let
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
=
Date
.
mD
ateSplit
dateToParse
let
hd
=
HyperdataDocument
{
_hd_bdd
=
Just
$
DT
.
pack
$
show
ff
,
_hd_doi
=
lookup
"doi"
d
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Date.hs
View file @
06d2a999
...
...
@@ -31,7 +31,7 @@ import Data.Text (unpack, splitOn, replace)
import
Data.Time
(
defaultTimeLocale
,
iso8601DateFormat
,
parseTimeM
,
toGregorian
)
import
Data.Time.Calendar
qualified
as
DTC
import
Data.Time.Clock
(
secondsToDiffTime
)
import
Data.Time.Clock
(
UTCTime
(
..
),
getCurrentTime
)
import
Data.Time.Clock
(
UTCTime
(
..
)
)
--
, getCurrentTime)
import
Data.Time.LocalTime
(
utc
)
import
Data.Time.LocalTime.TimeZone.Series
(
zonedTimeToZoneSeriesTime
)
import
Duckling.Api
(
analyze
)
...
...
@@ -41,18 +41,26 @@ import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), Duckl
import
Duckling.Types
(
ResolvedToken
(
..
),
ResolvedVal
(
..
))
import
Duckling.Types
(
Seal
(
..
))
import
Gargantext.Core
(
Lang
(
FR
,
EN
))
import
Gargantext.Core.Types
(
DebugMode
(
..
),
withDebugMode
)
--
import Gargantext.Core.Types (DebugMode(..), withDebugMode)
import
Gargantext.Prelude
hiding
(
replace
)
import
System.Environment
(
getEnv
)
------------------------------------------------------------------------
-- | Parse date to Ints
-- TODO add hours, minutes and seconds
dateSplit
::
Maybe
Text
->
IO
(
Maybe
UTCTime
,
(
Maybe
Year
,
Maybe
Month
,
Maybe
Day
))
dateSplit
Nothing
=
pure
(
Nothing
,
(
Nothing
,
Nothing
,
Nothing
))
dateSplit
(
Just
txt
)
=
do
utcTime
<-
parse
txt
let
(
y
,
m
,
d
)
=
split'
utcTime
pure
(
Just
utcTime
,
(
Just
y
,
Just
m
,
Just
d
))
dateSplit
::
Text
->
Either
Text
(
UTCTime
,
(
Year
,
Month
,
Day
))
dateSplit
txt
=
mkSplit
<$>
parse
txt
where
mkSplit
utcTime
=
let
(
y
,
m
,
d
)
=
split'
utcTime
in
(
utcTime
,
(
y
,
m
,
d
))
mDateSplit
::
Maybe
Text
->
(
Maybe
UTCTime
,
(
Maybe
Year
,
Maybe
Month
,
Maybe
Day
))
mDateSplit
Nothing
=
(
Nothing
,
(
Nothing
,
Nothing
,
Nothing
))
mDateSplit
(
Just
md
)
=
case
dateSplit
md
of
Left
_err
->
(
Nothing
,
(
Nothing
,
Nothing
,
Nothing
))
Right
(
ut
,
(
y
,
m
,
d
))
->
(
Just
ut
,
(
Just
y
,
Just
m
,
Just
d
))
split'
::
UTCTime
->
(
Year
,
Month
,
Day
)
split'
(
UTCTime
day
_
)
=
(
fromIntegral
y
,
m
,
d
)
...
...
@@ -70,17 +78,18 @@ type Day = Int
-- 1900-04-01 19:00:00 UTC
-- >>> parse EN (pack "April 1 1900")
-- 1900-04-01 00:00:00 UTC
parse
::
Text
->
IO
UTCTime
parse
::
Text
->
Either
Text
UTCTime
parse
s
=
do
-- printDebug "Date: " s
let
result
=
dateFlow
(
DucklingFailure
s
)
--printDebug "Date': " dateStr'
case
result
of
DateFlowSuccess
ok
->
pure
ok
DateFlowFailure
->
(
withDebugMode
(
DebugMode
True
)
"[G.C.T.P.T.Date parse]"
s
$
getCurrentTime
)
_
->
panic
"[G.C.T.C.Parsers.Date] parse: Should not happen"
DateFlowSuccess
ok
->
Right
ok
DateFlowFailure
->
Left
"[G.C.T.C.Parsers.Date] DateFlowFailure"
-- DateFlowFailure -> (withDebugMode (DebugMode True)
-- "[G.C.T.P.T.Date parse]" s
-- $ getCurrentTime)
_
->
Left
"[G.C.T.C.Parsers.Date] parse: Should not happen"
defaultDate
::
Text
defaultDate
=
"0-0-0T0:0:0"
...
...
src/Gargantext/Core/Text/Corpus/Parsers/JSON/Istex.hs
View file @
06d2a999
...
...
@@ -31,8 +31,8 @@ import Protolude
toDoc
::
Lang
->
ISTEX
.
Document
->
IO
HyperdataDocument
toDoc
la
(
ISTEX
.
Document
i
t
a
ab
d
s
)
=
do
--printDebug "ISTEX date" d
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
(
maybe
(
Just
$
T
.
pack
$
show
Defaults
.
year
)
(
Just
.
T
.
pack
.
show
)
d
)
let
mDateS
=
maybe
(
Just
$
T
.
pack
$
show
Defaults
.
year
)
(
Just
.
T
.
pack
.
show
)
d
let
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
=
Date
.
mDateSplit
mDateS
--printDebug "toDoc Istex" (utctime, (pub_year, pub_month, pub_day))
pure
$
HyperdataDocument
{
_hd_bdd
=
Just
"Istex"
,
_hd_doi
=
Just
i
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Wikidata.hs
View file @
06d2a999
...
...
@@ -23,7 +23,7 @@ import Data.List qualified as List
import
Data.Text
(
concat
)
import
Database.HSparql.Connection
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers.Date
(
d
ateSplit
)
import
Gargantext.Core.Text.Corpus.Parsers.Date
(
mD
ateSplit
)
import
Gargantext.Core.Text.Corpus.Parsers.Isidore
(
unbound
)
import
Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
))
...
...
@@ -67,22 +67,38 @@ wikiPageToDocument m wr = do
source
=
Nothing
abstract
=
Just
$
concat
$
take
m
sections
(
date
,
(
year
,
month
,
day
))
<-
dateSplit
$
head
$
catMaybes
[
wr
^.
wr_yearStart
,
wr
^.
wr_yearEnd
,
wr
^.
wr_yearFlorish
,
head
sections
]
let
mDateS
=
head
$
catMaybes
[
wr
^.
wr_yearStart
,
wr
^.
wr_yearEnd
,
wr
^.
wr_yearFlorish
,
head
sections
]
let
(
date
,
(
year
,
month
,
day
))
=
mDateSplit
mDateS
let
hour
=
Nothing
minute
=
Nothing
sec
=
Nothing
iso2
=
Just
$
show
EN
pure
$
HyperdataDocument
bdd
doi
url
uniqId
uniqIdBdd
page
title
authors
institutes
source
abstract
(
show
<$>
date
)
year
month
day
hour
minute
sec
iso2
pure
$
HyperdataDocument
{
_hd_bdd
=
bdd
,
_hd_doi
=
doi
,
_hd_url
=
url
,
_hd_uniqId
=
uniqId
,
_hd_uniqIdBdd
=
uniqIdBdd
,
_hd_page
=
page
,
_hd_title
=
title
,
_hd_authors
=
authors
,
_hd_institutes
=
institutes
,
_hd_source
=
source
,
_hd_abstract
=
abstract
,
_hd_publication_date
=
show
<$>
date
,
_hd_publication_year
=
year
,
_hd_publication_month
=
month
,
_hd_publication_day
=
day
,
_hd_publication_hour
=
hour
,
_hd_publication_minute
=
minute
,
_hd_publication_second
=
sec
,
_hd_language_iso2
=
iso2
}
wikidataSelect
::
Int
->
IO
[
WikiResult
]
...
...
test/Test/Parsers/Date.hs
View file @
06d2a999
...
...
@@ -19,6 +19,8 @@ import Test.Hspec
import
Test.QuickCheck
import
Data.Time
(
ZonedTime
(
..
))
import
Data.Time.Clock
(
UTCTime
(
..
),
secondsToDiffTime
)
import
Data.Time.Calendar.OrdinalDate
(
fromOrdinalDate
)
import
Data.Text
(
pack
)
import
Text.Parsec.Error
(
ParseError
)
...
...
@@ -26,6 +28,7 @@ import Duckling.Time.Types (toRFC3339)
-----------------------------------------------------------
import
Gargantext.Prelude
import
Gargantext.Core.Text.Corpus.Parsers.Date
(
dateSplit
)
import
Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
(
fromRFC3339
)
import
Test.Parsers.Types
...
...
@@ -45,3 +48,14 @@ testFromRFC3339 = do
-- \x -> let e = Right x :: Either ParseError ZonedTime
-- in fmap looseZonedTimePrecision e == (fromRFC3339 . fromRFC3339Inv ) (fmap looseZonedTimePrecision e)
testDateSplit
::
Spec
testDateSplit
=
do
describe
"Test date split"
$
do
it
"works for simple date parsing"
$
do
let
utc
=
UTCTime
{
utctDay
=
fromOrdinalDate
2010
4
,
utctDayTime
=
secondsToDiffTime
0
}
dateSplit
"2010-01-04"
`
shouldBe
`
Right
(
utc
,
(
2010
,
1
,
4
))
it
"throws error for year-month"
$
do
dateSplit
"2010-01"
`
shouldSatisfy
`
isLeft
test/drivers/tasty/Main.hs
View file @
06d2a999
...
...
@@ -30,6 +30,7 @@ main = do
utilSpec
<-
testSpec
"Utils"
Utils
.
test
clusteringSpec
<-
testSpec
"Graph Clustering"
Graph
.
test
dateParserSpec
<-
testSpec
"Date Parsing"
PD
.
testFromRFC3339
dateSplitSpec
<-
testSpec
"Date split"
PD
.
testDateSplit
cryptoSpec
<-
testSpec
"Crypto"
Crypto
.
test
nlpSpec
<-
testSpec
"NLP"
NLP
.
test
jobsSpec
<-
testSpec
"Jobs"
Jobs
.
test
...
...
@@ -38,6 +39,7 @@ main = do
[
utilSpec
,
clusteringSpec
,
dateParserSpec
,
dateSplitSpec
,
cryptoSpec
,
nlpSpec
,
jobsSpec
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment