Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
195
Issues
195
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
bf98b6c8
Verified
Commit
bf98b6c8
authored
Aug 08, 2023
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[refactor] simplify dateSplit (no lang needed)
Also, use ISO639 languages in some places.
parent
9b4bef67
Pipeline
#4496
failed with stages
in 10 minutes and 16 seconds
Changes
10
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
48 additions
and
46 deletions
+48
-46
gargantext.cabal
gargantext.cabal
+1
-0
DocumentUpload.hs
src/Gargantext/API/Node/DocumentUpload.hs
+1
-1
Core.hs
src/Gargantext/Core.hs
+19
-0
API.hs
src/Gargantext/Core/Text/Corpus/API.hs
+2
-2
Hal.hs
src/Gargantext/Core/Text/Corpus/API/Hal.hs
+8
-22
Isidore.hs
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
+6
-8
Istex.hs
src/Gargantext/Core/Text/Corpus/API/Istex.hs
+1
-1
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+1
-1
Date.hs
src/Gargantext/Core/Text/Corpus/Parsers/Date.hs
+7
-8
Wikidata.hs
src/Gargantext/Core/Text/Corpus/Parsers/Wikidata.hs
+2
-3
No files found.
gargantext.cabal
View file @
bf98b6c8
...
...
@@ -954,3 +954,4 @@ test-suite garg-test
, unordered-containers ^>= 0.2.16.0
, validity ^>= 0.11.0.1
default-language: Haskell2010
src/Gargantext/API/Node/DocumentUpload.hs
View file @
bf98b6c8
...
...
@@ -93,7 +93,7 @@ documentUpload nId doc = do
Just
c
->
c
Nothing
->
panic
$
T
.
pack
$
"[G.A.N.DU] Node has no corpus parent: "
<>
show
nId
(
theFullDate
,
(
year
,
month
,
day
))
<-
liftBase
$
dateSplit
EN
(
theFullDate
,
(
year
,
month
,
day
))
<-
liftBase
$
dateSplit
$
Just
$
view
du_date
doc
...
...
src/Gargantext/Core.hs
View file @
bf98b6c8
...
...
@@ -17,6 +17,7 @@ module Gargantext.Core
import
Data.Aeson
import
Data.Either
(
Either
(
Left
))
import
Data.Hashable
(
Hashable
)
import
Data.LanguageCodes
qualified
as
ISO639
import
Data.Maybe
(
fromMaybe
)
import
Data.Morpheus.Types
(
GQLType
)
import
Data.Swagger
...
...
@@ -45,6 +46,7 @@ import qualified Data.Map as Map
-- | All languages supported
-- NOTE: Use international country codes
-- https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes
-- TODO This should be deprecated in favor of iso-639 library
data
Lang
=
All
|
DE
|
EL
...
...
@@ -93,6 +95,23 @@ instance Hashable Lang
instance
Arbitrary
Lang
where
arbitrary
=
arbitraryBoundedEnum
toISO639
::
Lang
->
Maybe
ISO639
.
ISO639_1
toISO639
DE
=
Just
ISO639
.
DE
toISO639
EL
=
Just
ISO639
.
EL
toISO639
EN
=
Just
ISO639
.
EN
toISO639
ES
=
Just
ISO639
.
ES
toISO639
FR
=
Just
ISO639
.
FR
toISO639
IT
=
Just
ISO639
.
IT
toISO639
PL
=
Just
ISO639
.
PL
toISO639
PT
=
Just
ISO639
.
PT
toISO639
RU
=
Just
ISO639
.
RU
toISO639
UK
=
Just
ISO639
.
UK
toISO639
ZH
=
Just
ISO639
.
ZH
toISO639
All
=
Nothing
toISO639EN
::
Lang
->
ISO639
.
ISO639_1
toISO639EN
l
=
fromMaybe
ISO639
.
EN
$
toISO639
l
-- | https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
toISO639Lang
::
Lang
->
Maybe
Text
toISO639Lang
All
=
Nothing
...
...
src/Gargantext/Core/Text/Corpus/API.hs
View file @
bf98b6c8
...
...
@@ -24,7 +24,7 @@ import Data.Either (Either(..))
import
Data.Maybe
import
qualified
Data.Text
as
T
import
Gargantext.API.Admin.Orchestrator.Types
(
ExternalAPIs
(
..
),
externalAPIs
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core
(
Lang
(
..
)
,
toISO639
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
import
qualified
Gargantext.Core.Text.Corpus.API.Arxiv
as
Arxiv
...
...
@@ -63,7 +63,7 @@ get externalAPI la q mPubmedAPIKey limit = do
--docs <- PUBMED.get q default_limit -- EN only by default
--pure (Just $ fromIntegral $ length docs, yieldMany docs)
Arxiv
->
Right
<$>
Arxiv
.
get
la
corpusQuery
limit
HAL
->
first
ExternalAPIError
<$>
HAL
.
getC
la
(
Corpus
.
getRawQuery
q
)
(
Corpus
.
getLimit
<$>
limit
)
HAL
->
first
ExternalAPIError
<$>
HAL
.
getC
(
toISO639
la
)
(
Corpus
.
getRawQuery
q
)
(
Corpus
.
getLimit
<$>
limit
)
IsTex
->
do
docs
<-
ISTEX
.
get
la
(
Corpus
.
getRawQuery
q
)
(
Corpus
.
getLimit
<$>
limit
)
pure
$
Right
(
Just
$
fromIntegral
$
length
docs
,
yieldMany
docs
)
Isidore
->
do
docs
<-
ISIDORE
.
get
la
(
Corpus
.
getLimit
<$>
limit
)
(
Just
$
Corpus
.
getRawQuery
q
)
Nothing
...
...
src/Gargantext/Core/Text/Corpus/API/Hal.hs
View file @
bf98b6c8
...
...
@@ -18,7 +18,7 @@ import Data.LanguageCodes qualified as ISO639
import
Data.Map.Strict
qualified
as
Map
import
Data.Maybe
import
Data.Text
(
Text
,
pack
,
intercalate
)
import
Gargantext.Core
(
Lang
(
..
)
)
-- import Gargantext.Core (Lang(..), toISO639Lang
)
import
Gargantext.Core.Text.Corpus.Parsers.Date
qualified
as
Date
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Defaults
qualified
as
Defaults
...
...
@@ -28,40 +28,26 @@ import HAL.Client qualified as HAL
import
HAL.Doc.Corpus
qualified
as
HAL
import
Servant.Client
(
ClientError
)
toLang
::
Lang
->
Maybe
ISO639
.
ISO639_1
toLang
DE
=
Just
ISO639
.
DE
toLang
EL
=
Just
ISO639
.
EL
toLang
EN
=
Just
ISO639
.
EN
toLang
ES
=
Just
ISO639
.
ES
toLang
FR
=
Just
ISO639
.
FR
toLang
IT
=
Just
ISO639
.
IT
toLang
PL
=
Just
ISO639
.
PL
toLang
PT
=
Just
ISO639
.
PT
toLang
RU
=
Just
ISO639
.
RU
toLang
UK
=
Just
ISO639
.
UK
toLang
ZH
=
Just
ISO639
.
ZH
toLang
All
=
Nothing
get
::
Lang
->
Text
->
Maybe
Int
->
IO
[
HyperdataDocument
]
get
::
Maybe
ISO639
.
ISO639_1
->
Text
->
Maybe
Int
->
IO
[
HyperdataDocument
]
get
la
q
ml
=
do
eDocs
<-
HAL
.
getMetadataWith
[
q
]
(
Just
0
)
(
fromIntegral
<$>
ml
)
(
toLang
la
)
eDocs
<-
HAL
.
getMetadataWith
[
q
]
(
Just
0
)
(
fromIntegral
<$>
ml
)
la
either
(
panic
.
pack
.
show
)
(
\
d
->
mapM
(
toDoc'
la
)
$
HAL
.
_docs
d
)
eDocs
getC
::
Lang
->
Text
->
Maybe
Int
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
getC
::
Maybe
ISO639
.
ISO639_1
->
Text
->
Maybe
Int
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
getC
la
q
ml
=
do
eRes
<-
HAL
.
getMetadataWithC
[
q
]
(
Just
0
)
(
fromIntegral
<$>
ml
)
(
toLang
la
)
eRes
<-
HAL
.
getMetadataWithC
[
q
]
(
Just
0
)
(
fromIntegral
<$>
ml
)
la
pure
$
(
\
(
len
,
docsC
)
->
(
len
,
docsC
.|
mapMC
(
toDoc'
la
)))
<$>
eRes
-- case eRes of
-- Left err -> panic $ pack $ show err
-- Right (len, docsC) -> pure (len, docsC .| mapMC (toDoc' la))
toDoc'
::
Lang
->
HAL
.
Corpus
->
IO
HyperdataDocument
toDoc'
::
Maybe
ISO639
.
ISO639_1
->
HAL
.
Corpus
->
IO
HyperdataDocument
toDoc'
la
(
HAL
.
Corpus
{
..
})
=
do
-- printDebug "[toDoc corpus] h" h
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
la
(
maybe
(
Just
$
pack
$
show
Defaults
.
year
)
Just
_corpus_date
)
Date
.
dateSplit
(
maybe
(
Just
$
pack
$
show
Defaults
.
year
)
Just
_corpus_date
)
let
abstractDefault
=
intercalate
" "
_corpus_abstract
let
abstract
=
case
toLang
la
of
let
abstract
=
case
la
of
Nothing
->
abstractDefault
Just
l
->
fromMaybe
abstractDefault
(
intercalate
" "
<$>
Map
.
lookup
l
_corpus_abstract_lang_map
)
pure
HyperdataDocument
{
_hd_bdd
=
Just
"Hal"
...
...
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
View file @
bf98b6c8
...
...
@@ -35,13 +35,13 @@ get la l q a = do
let
printErr
(
DecodeFailure
e
_
)
=
panic
e
printErr
e
=
panic
(
cs
$
show
e
)
toIsidoreDocs
::
Reply
->
[
IsidoreDoc
]
toIsidoreDocs
(
ReplyOnly
r
)
=
[
r
]
toIsidoreDocs
(
Replies
rs
)
=
rs
iDocs
<-
either
printErr
_content
<$>
Isidore
.
get
l
q
a
hDocs
<-
mapM
(
\
d
->
isidoreToDoc
la
d
)
(
toIsidoreDocs
iDocs
)
pure
hDocs
...
...
@@ -58,7 +58,7 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
author
::
Author
->
Text
author
(
Author
fn
ln
)
=
(
_name
fn
)
<>
", "
<>
(
_name
ln
)
author
(
Authors
aus
)
=
Text
.
intercalate
". "
$
map
author
aus
creator2text
::
Creator
->
Text
creator2text
(
Creator
au
)
=
author
au
creator2text
(
Creators
aus'
)
=
Text
.
intercalate
". "
$
map
author
aus'
...
...
@@ -67,9 +67,9 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
langText
(
LangText
_l
t1
)
=
t1
langText
(
OnlyText
t2
)
=
t2
langText
(
ArrayText
ts
)
=
Text
.
intercalate
" "
$
map
langText
ts
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
l
(
maybe
(
Just
$
Text
.
pack
$
show
Defaults
.
year
)
(
Just
)
d
)
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
(
maybe
(
Just
$
Text
.
pack
$
show
Defaults
.
year
)
(
Just
)
d
)
pure
HyperdataDocument
{
_hd_bdd
=
Just
"Isidore"
,
_hd_doi
=
Nothing
...
...
@@ -91,5 +91,3 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
Text
.
pack
.
show
)
l
}
src/Gargantext/Core/Text/Corpus/API/Istex.hs
View file @
bf98b6c8
...
...
@@ -82,7 +82,7 @@ toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument
toDoc
la
(
ISTEX
.
Document
i
t
a
ab
d
s
)
=
do
--printDebug "ISTEX date" d
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
la
(
maybe
(
Just
$
pack
$
show
Defaults
.
year
)
(
Just
.
pack
.
show
)
d
)
Date
.
dateSplit
(
maybe
(
Just
$
pack
$
show
Defaults
.
year
)
(
Just
.
pack
.
show
)
d
)
--printDebug "toDoc Istex" (utctime, (pub_year, pub_month, pub_day))
pure
$
HyperdataDocument
{
_hd_bdd
=
Just
"Istex"
,
_hd_doi
=
Just
i
...
...
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
bf98b6c8
...
...
@@ -236,7 +236,7 @@ toDoc ff d = do
let
dateToParse
=
DT
.
replace
" "
""
<$>
lookup
"PY"
d
-- <> Just " " <> lookup "publication_date" d
-- printDebug "[G.C.T.C.Parsers] dateToParse" dateToParse
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
lang
dateToParse
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
dateToParse
let
hd
=
HyperdataDocument
{
_hd_bdd
=
Just
$
DT
.
pack
$
show
ff
,
_hd_doi
=
lookup
"doi"
d
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Date.hs
View file @
bf98b6c8
...
...
@@ -48,10 +48,10 @@ import qualified Data.List as List
------------------------------------------------------------------------
-- | Parse date to Ints
-- TODO add hours, minutes and seconds
dateSplit
::
Lang
->
Maybe
Text
->
IO
(
Maybe
UTCTime
,
(
Maybe
Year
,
Maybe
Month
,
Maybe
Day
))
dateSplit
_
Nothing
=
pure
(
Nothing
,
(
Nothing
,
Nothing
,
Nothing
))
dateSplit
l
(
Just
txt
)
=
do
utcTime
<-
parse
l
txt
dateSplit
::
Maybe
Text
->
IO
(
Maybe
UTCTime
,
(
Maybe
Year
,
Maybe
Month
,
Maybe
Day
))
dateSplit
Nothing
=
pure
(
Nothing
,
(
Nothing
,
Nothing
,
Nothing
))
dateSplit
(
Just
txt
)
=
do
utcTime
<-
parse
txt
let
(
y
,
m
,
d
)
=
split'
utcTime
pure
(
Just
utcTime
,
(
Just
y
,
Just
m
,
Just
d
))
...
...
@@ -72,15 +72,15 @@ type Day = Int
-- 1900-04-01 19:00:00 UTC
-- >>> parse EN (pack "April 1 1900")
-- 1900-04-01 00:00:00 UTC
parse
::
Lang
->
Text
->
IO
UTCTime
parse
lang
s
=
do
parse
::
Text
->
IO
UTCTime
parse
s
=
do
-- printDebug "Date: " s
let
result
=
dateFlow
(
DucklingFailure
s
)
--printDebug "Date': " dateStr'
case
result
of
DateFlowSuccess
ok
->
pure
ok
DateFlowFailure
->
(
withDebugMode
(
DebugMode
True
)
"[G.C.T.P.T.Date parse]"
(
lang
,
s
)
"[G.C.T.P.T.Date parse]"
s
$
getCurrentTime
)
_
->
panic
"[G.C.T.C.Parsers.Date] parse: Should not happen"
...
...
@@ -206,4 +206,3 @@ parseDateWithDuckling lang input options = do
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
-- TODO check/test Options False or True
analyze
input
contxt
options
$
HashSet
.
fromList
[(
Seal
Time
)]
src/Gargantext/Core/Text/Corpus/Parsers/Wikidata.hs
View file @
bf98b6c8
{-|
Module : Gargantext.Core.Text.Corpus.Parsers.Wikidata
<<<<<<< HEAD
Description : To query Wikidata
Description : To query Wikidata
=======
Description : To query Wikidata
>>>>>>> dev-clustering
...
...
@@ -68,7 +68,7 @@ wikiPageToDocument m wr = do
source
=
Nothing
abstract
=
Just
$
concat
$
take
m
sections
(
date
,
(
year
,
month
,
day
))
<-
dateSplit
EN
$
head
(
date
,
(
year
,
month
,
day
))
<-
dateSplit
$
head
$
catMaybes
[
wr
^.
wr_yearStart
,
wr
^.
wr_yearEnd
...
...
@@ -130,4 +130,3 @@ wikidataQuery n = List.unlines
,
" }"
,
" LIMIT "
<>
(
cs
$
show
n
)
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment