Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
bf98b6c8
Verified
Commit
bf98b6c8
authored
Aug 08, 2023
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[refactor] simplify dateSplit (no lang needed)
Also, use ISO639 languages in some places.
parent
9b4bef67
Pipeline
#4496
failed with stages
in 10 minutes and 16 seconds
Changes
10
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
48 additions
and
46 deletions
+48
-46
gargantext.cabal
gargantext.cabal
+1
-0
DocumentUpload.hs
src/Gargantext/API/Node/DocumentUpload.hs
+1
-1
Core.hs
src/Gargantext/Core.hs
+19
-0
API.hs
src/Gargantext/Core/Text/Corpus/API.hs
+2
-2
Hal.hs
src/Gargantext/Core/Text/Corpus/API/Hal.hs
+8
-22
Isidore.hs
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
+6
-8
Istex.hs
src/Gargantext/Core/Text/Corpus/API/Istex.hs
+1
-1
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+1
-1
Date.hs
src/Gargantext/Core/Text/Corpus/Parsers/Date.hs
+7
-8
Wikidata.hs
src/Gargantext/Core/Text/Corpus/Parsers/Wikidata.hs
+2
-3
No files found.
gargantext.cabal
View file @
bf98b6c8
...
...
@@ -954,3 +954,4 @@ test-suite garg-test
, unordered-containers ^>= 0.2.16.0
, validity ^>= 0.11.0.1
default-language: Haskell2010
src/Gargantext/API/Node/DocumentUpload.hs
View file @
bf98b6c8
...
...
@@ -93,7 +93,7 @@ documentUpload nId doc = do
Just
c
->
c
Nothing
->
panic
$
T
.
pack
$
"[G.A.N.DU] Node has no corpus parent: "
<>
show
nId
(
theFullDate
,
(
year
,
month
,
day
))
<-
liftBase
$
dateSplit
EN
(
theFullDate
,
(
year
,
month
,
day
))
<-
liftBase
$
dateSplit
$
Just
$
view
du_date
doc
...
...
src/Gargantext/Core.hs
View file @
bf98b6c8
...
...
@@ -17,6 +17,7 @@ module Gargantext.Core
import
Data.Aeson
import
Data.Either
(
Either
(
Left
))
import
Data.Hashable
(
Hashable
)
import
Data.LanguageCodes
qualified
as
ISO639
import
Data.Maybe
(
fromMaybe
)
import
Data.Morpheus.Types
(
GQLType
)
import
Data.Swagger
...
...
@@ -45,6 +46,7 @@ import qualified Data.Map as Map
-- | All languages supported
-- NOTE: Use international country codes
-- https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes
-- TODO This should be deprecated in favor of iso-639 library
data
Lang
=
All
|
DE
|
EL
...
...
@@ -93,6 +95,23 @@ instance Hashable Lang
instance
Arbitrary
Lang
where
arbitrary
=
arbitraryBoundedEnum
toISO639
::
Lang
->
Maybe
ISO639
.
ISO639_1
toISO639
DE
=
Just
ISO639
.
DE
toISO639
EL
=
Just
ISO639
.
EL
toISO639
EN
=
Just
ISO639
.
EN
toISO639
ES
=
Just
ISO639
.
ES
toISO639
FR
=
Just
ISO639
.
FR
toISO639
IT
=
Just
ISO639
.
IT
toISO639
PL
=
Just
ISO639
.
PL
toISO639
PT
=
Just
ISO639
.
PT
toISO639
RU
=
Just
ISO639
.
RU
toISO639
UK
=
Just
ISO639
.
UK
toISO639
ZH
=
Just
ISO639
.
ZH
toISO639
All
=
Nothing
toISO639EN
::
Lang
->
ISO639
.
ISO639_1
toISO639EN
l
=
fromMaybe
ISO639
.
EN
$
toISO639
l
-- | https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
toISO639Lang
::
Lang
->
Maybe
Text
toISO639Lang
All
=
Nothing
...
...
src/Gargantext/Core/Text/Corpus/API.hs
View file @
bf98b6c8
...
...
@@ -24,7 +24,7 @@ import Data.Either (Either(..))
import
Data.Maybe
import
qualified
Data.Text
as
T
import
Gargantext.API.Admin.Orchestrator.Types
(
ExternalAPIs
(
..
),
externalAPIs
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core
(
Lang
(
..
)
,
toISO639
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
import
qualified
Gargantext.Core.Text.Corpus.API.Arxiv
as
Arxiv
...
...
@@ -63,7 +63,7 @@ get externalAPI la q mPubmedAPIKey limit = do
--docs <- PUBMED.get q default_limit -- EN only by default
--pure (Just $ fromIntegral $ length docs, yieldMany docs)
Arxiv
->
Right
<$>
Arxiv
.
get
la
corpusQuery
limit
HAL
->
first
ExternalAPIError
<$>
HAL
.
getC
la
(
Corpus
.
getRawQuery
q
)
(
Corpus
.
getLimit
<$>
limit
)
HAL
->
first
ExternalAPIError
<$>
HAL
.
getC
(
toISO639
la
)
(
Corpus
.
getRawQuery
q
)
(
Corpus
.
getLimit
<$>
limit
)
IsTex
->
do
docs
<-
ISTEX
.
get
la
(
Corpus
.
getRawQuery
q
)
(
Corpus
.
getLimit
<$>
limit
)
pure
$
Right
(
Just
$
fromIntegral
$
length
docs
,
yieldMany
docs
)
Isidore
->
do
docs
<-
ISIDORE
.
get
la
(
Corpus
.
getLimit
<$>
limit
)
(
Just
$
Corpus
.
getRawQuery
q
)
Nothing
...
...
src/Gargantext/Core/Text/Corpus/API/Hal.hs
View file @
bf98b6c8
...
...
@@ -18,7 +18,7 @@ import Data.LanguageCodes qualified as ISO639
import
Data.Map.Strict
qualified
as
Map
import
Data.Maybe
import
Data.Text
(
Text
,
pack
,
intercalate
)
import
Gargantext.Core
(
Lang
(
..
)
)
-- import Gargantext.Core (Lang(..), toISO639Lang
)
import
Gargantext.Core.Text.Corpus.Parsers.Date
qualified
as
Date
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Defaults
qualified
as
Defaults
...
...
@@ -28,40 +28,26 @@ import HAL.Client qualified as HAL
import
HAL.Doc.Corpus
qualified
as
HAL
import
Servant.Client
(
ClientError
)
toLang
::
Lang
->
Maybe
ISO639
.
ISO639_1
toLang
DE
=
Just
ISO639
.
DE
toLang
EL
=
Just
ISO639
.
EL
toLang
EN
=
Just
ISO639
.
EN
toLang
ES
=
Just
ISO639
.
ES
toLang
FR
=
Just
ISO639
.
FR
toLang
IT
=
Just
ISO639
.
IT
toLang
PL
=
Just
ISO639
.
PL
toLang
PT
=
Just
ISO639
.
PT
toLang
RU
=
Just
ISO639
.
RU
toLang
UK
=
Just
ISO639
.
UK
toLang
ZH
=
Just
ISO639
.
ZH
toLang
All
=
Nothing
get
::
Lang
->
Text
->
Maybe
Int
->
IO
[
HyperdataDocument
]
get
::
Maybe
ISO639
.
ISO639_1
->
Text
->
Maybe
Int
->
IO
[
HyperdataDocument
]
get
la
q
ml
=
do
eDocs
<-
HAL
.
getMetadataWith
[
q
]
(
Just
0
)
(
fromIntegral
<$>
ml
)
(
toLang
la
)
eDocs
<-
HAL
.
getMetadataWith
[
q
]
(
Just
0
)
(
fromIntegral
<$>
ml
)
la
either
(
panic
.
pack
.
show
)
(
\
d
->
mapM
(
toDoc'
la
)
$
HAL
.
_docs
d
)
eDocs
getC
::
Lang
->
Text
->
Maybe
Int
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
getC
::
Maybe
ISO639
.
ISO639_1
->
Text
->
Maybe
Int
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
getC
la
q
ml
=
do
eRes
<-
HAL
.
getMetadataWithC
[
q
]
(
Just
0
)
(
fromIntegral
<$>
ml
)
(
toLang
la
)
eRes
<-
HAL
.
getMetadataWithC
[
q
]
(
Just
0
)
(
fromIntegral
<$>
ml
)
la
pure
$
(
\
(
len
,
docsC
)
->
(
len
,
docsC
.|
mapMC
(
toDoc'
la
)))
<$>
eRes
-- case eRes of
-- Left err -> panic $ pack $ show err
-- Right (len, docsC) -> pure (len, docsC .| mapMC (toDoc' la))
toDoc'
::
Lang
->
HAL
.
Corpus
->
IO
HyperdataDocument
toDoc'
::
Maybe
ISO639
.
ISO639_1
->
HAL
.
Corpus
->
IO
HyperdataDocument
toDoc'
la
(
HAL
.
Corpus
{
..
})
=
do
-- printDebug "[toDoc corpus] h" h
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
la
(
maybe
(
Just
$
pack
$
show
Defaults
.
year
)
Just
_corpus_date
)
Date
.
dateSplit
(
maybe
(
Just
$
pack
$
show
Defaults
.
year
)
Just
_corpus_date
)
let
abstractDefault
=
intercalate
" "
_corpus_abstract
let
abstract
=
case
toLang
la
of
let
abstract
=
case
la
of
Nothing
->
abstractDefault
Just
l
->
fromMaybe
abstractDefault
(
intercalate
" "
<$>
Map
.
lookup
l
_corpus_abstract_lang_map
)
pure
HyperdataDocument
{
_hd_bdd
=
Just
"Hal"
...
...
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
View file @
bf98b6c8
...
...
@@ -35,13 +35,13 @@ get la l q a = do
let
printErr
(
DecodeFailure
e
_
)
=
panic
e
printErr
e
=
panic
(
cs
$
show
e
)
toIsidoreDocs
::
Reply
->
[
IsidoreDoc
]
toIsidoreDocs
(
ReplyOnly
r
)
=
[
r
]
toIsidoreDocs
(
Replies
rs
)
=
rs
iDocs
<-
either
printErr
_content
<$>
Isidore
.
get
l
q
a
hDocs
<-
mapM
(
\
d
->
isidoreToDoc
la
d
)
(
toIsidoreDocs
iDocs
)
pure
hDocs
...
...
@@ -58,7 +58,7 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
author
::
Author
->
Text
author
(
Author
fn
ln
)
=
(
_name
fn
)
<>
", "
<>
(
_name
ln
)
author
(
Authors
aus
)
=
Text
.
intercalate
". "
$
map
author
aus
creator2text
::
Creator
->
Text
creator2text
(
Creator
au
)
=
author
au
creator2text
(
Creators
aus'
)
=
Text
.
intercalate
". "
$
map
author
aus'
...
...
@@ -67,9 +67,9 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
langText
(
LangText
_l
t1
)
=
t1
langText
(
OnlyText
t2
)
=
t2
langText
(
ArrayText
ts
)
=
Text
.
intercalate
" "
$
map
langText
ts
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
l
(
maybe
(
Just
$
Text
.
pack
$
show
Defaults
.
year
)
(
Just
)
d
)
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
(
maybe
(
Just
$
Text
.
pack
$
show
Defaults
.
year
)
(
Just
)
d
)
pure
HyperdataDocument
{
_hd_bdd
=
Just
"Isidore"
,
_hd_doi
=
Nothing
...
...
@@ -91,5 +91,3 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
Text
.
pack
.
show
)
l
}
src/Gargantext/Core/Text/Corpus/API/Istex.hs
View file @
bf98b6c8
...
...
@@ -82,7 +82,7 @@ toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument
toDoc
la
(
ISTEX
.
Document
i
t
a
ab
d
s
)
=
do
--printDebug "ISTEX date" d
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
la
(
maybe
(
Just
$
pack
$
show
Defaults
.
year
)
(
Just
.
pack
.
show
)
d
)
Date
.
dateSplit
(
maybe
(
Just
$
pack
$
show
Defaults
.
year
)
(
Just
.
pack
.
show
)
d
)
--printDebug "toDoc Istex" (utctime, (pub_year, pub_month, pub_day))
pure
$
HyperdataDocument
{
_hd_bdd
=
Just
"Istex"
,
_hd_doi
=
Just
i
...
...
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
bf98b6c8
...
...
@@ -236,7 +236,7 @@ toDoc ff d = do
let
dateToParse
=
DT
.
replace
" "
""
<$>
lookup
"PY"
d
-- <> Just " " <> lookup "publication_date" d
-- printDebug "[G.C.T.C.Parsers] dateToParse" dateToParse
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
lang
dateToParse
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
dateToParse
let
hd
=
HyperdataDocument
{
_hd_bdd
=
Just
$
DT
.
pack
$
show
ff
,
_hd_doi
=
lookup
"doi"
d
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Date.hs
View file @
bf98b6c8
...
...
@@ -48,10 +48,10 @@ import qualified Data.List as List
------------------------------------------------------------------------
-- | Parse date to Ints
-- TODO add hours, minutes and seconds
dateSplit
::
Lang
->
Maybe
Text
->
IO
(
Maybe
UTCTime
,
(
Maybe
Year
,
Maybe
Month
,
Maybe
Day
))
dateSplit
_
Nothing
=
pure
(
Nothing
,
(
Nothing
,
Nothing
,
Nothing
))
dateSplit
l
(
Just
txt
)
=
do
utcTime
<-
parse
l
txt
dateSplit
::
Maybe
Text
->
IO
(
Maybe
UTCTime
,
(
Maybe
Year
,
Maybe
Month
,
Maybe
Day
))
dateSplit
Nothing
=
pure
(
Nothing
,
(
Nothing
,
Nothing
,
Nothing
))
dateSplit
(
Just
txt
)
=
do
utcTime
<-
parse
txt
let
(
y
,
m
,
d
)
=
split'
utcTime
pure
(
Just
utcTime
,
(
Just
y
,
Just
m
,
Just
d
))
...
...
@@ -72,15 +72,15 @@ type Day = Int
-- 1900-04-01 19:00:00 UTC
-- >>> parse EN (pack "April 1 1900")
-- 1900-04-01 00:00:00 UTC
parse
::
Lang
->
Text
->
IO
UTCTime
parse
lang
s
=
do
parse
::
Text
->
IO
UTCTime
parse
s
=
do
-- printDebug "Date: " s
let
result
=
dateFlow
(
DucklingFailure
s
)
--printDebug "Date': " dateStr'
case
result
of
DateFlowSuccess
ok
->
pure
ok
DateFlowFailure
->
(
withDebugMode
(
DebugMode
True
)
"[G.C.T.P.T.Date parse]"
(
lang
,
s
)
"[G.C.T.P.T.Date parse]"
s
$
getCurrentTime
)
_
->
panic
"[G.C.T.C.Parsers.Date] parse: Should not happen"
...
...
@@ -206,4 +206,3 @@ parseDateWithDuckling lang input options = do
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
-- TODO check/test Options False or True
analyze
input
contxt
options
$
HashSet
.
fromList
[(
Seal
Time
)]
src/Gargantext/Core/Text/Corpus/Parsers/Wikidata.hs
View file @
bf98b6c8
{-|
Module : Gargantext.Core.Text.Corpus.Parsers.Wikidata
<<<<<<< HEAD
Description : To query Wikidata
Description : To query Wikidata
=======
Description : To query Wikidata
>>>>>>> dev-clustering
...
...
@@ -68,7 +68,7 @@ wikiPageToDocument m wr = do
source
=
Nothing
abstract
=
Just
$
concat
$
take
m
sections
(
date
,
(
year
,
month
,
day
))
<-
dateSplit
EN
$
head
(
date
,
(
year
,
month
,
day
))
<-
dateSplit
$
head
$
catMaybes
[
wr
^.
wr_yearStart
,
wr
^.
wr_yearEnd
...
...
@@ -130,4 +130,3 @@ wikidataQuery n = List.unlines
,
" }"
,
" LIMIT "
<>
(
cs
$
show
n
)
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment