Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Grégoire Locqueville
haskell-gargantext
Commits
b87c1360
Commit
b87c1360
authored
May 25, 2022
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'origin/97-dev-istex-search' into dev
parents
90e9cdf2
fe959c1d
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
170 additions
and
108 deletions
+170
-108
gargantext.cabal
gargantext.cabal
+1
-0
package.yaml
package.yaml
+1
-0
DocumentUpload.hs
src/Gargantext/API/Node/DocumentUpload.hs
+3
-4
DocumentsFromWriteNodes.hs
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
+4
-3
Search.hs
src/Gargantext/API/Search.hs
+7
-6
API.hs
src/Gargantext/Core/Text/Corpus/API.hs
+1
-2
Hal.hs
src/Gargantext/Core/Text/Corpus/API/Hal.hs
+22
-20
Isidore.hs
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
+23
-20
Istex.hs
src/Gargantext/Core/Text/Corpus/API/Istex.hs
+25
-5
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+22
-21
Date.hs
src/Gargantext/Core/Text/Corpus/Parsers/Date.hs
+29
-19
Json2Csv.hs
src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
+3
-2
Wikidata.hs
src/Gargantext/Core/Text/Corpus/Parsers/Wikidata.hs
+1
-2
Insert.hs
src/Gargantext/Database/Query/Table/Node/Document/Insert.hs
+4
-3
Defaults.hs
src/Gargantext/Defaults.hs
+23
-0
stack.yaml
stack.yaml
+1
-1
No files found.
gargantext.cabal
View file @
b87c1360
...
@@ -64,6 +64,7 @@ library
...
@@ -64,6 +64,7 @@ library
Gargantext.Database.Admin.Config
Gargantext.Database.Admin.Config
Gargantext.Database.Admin.Types.Hyperdata
Gargantext.Database.Admin.Types.Hyperdata
Gargantext.Database.Admin.Types.Node
Gargantext.Database.Admin.Types.Node
Gargantext.Defaults
Gargantext.Core.Text
Gargantext.Core.Text
Gargantext.Core.Text.Context
Gargantext.Core.Text.Context
Gargantext.Core.Text.Corpus.Parsers
Gargantext.Core.Text.Corpus.Parsers
...
...
package.yaml
View file @
b87c1360
...
@@ -88,6 +88,7 @@ library:
...
@@ -88,6 +88,7 @@ library:
-
Gargantext.Database.Admin.Config
-
Gargantext.Database.Admin.Config
-
Gargantext.Database.Admin.Types.Hyperdata
-
Gargantext.Database.Admin.Types.Hyperdata
-
Gargantext.Database.Admin.Types.Node
-
Gargantext.Database.Admin.Types.Node
-
Gargantext.Defaults
-
Gargantext.Core.Text
-
Gargantext.Core.Text
-
Gargantext.Core.Text.Context
-
Gargantext.Core.Text.Context
-
Gargantext.Core.Text.Corpus.Parsers
-
Gargantext.Core.Text.Corpus.Parsers
...
...
src/Gargantext/API/Node/DocumentUpload.hs
View file @
b87c1360
...
@@ -100,10 +100,9 @@ documentUpload nId doc = do
...
@@ -100,10 +100,9 @@ documentUpload nId doc = do
Just
c
->
c
Just
c
->
c
Nothing
->
panic
$
T
.
pack
$
"[G.A.N.DU] Node has no corpus parent: "
<>
show
nId
Nothing
->
panic
$
T
.
pack
$
"[G.A.N.DU] Node has no corpus parent: "
<>
show
nId
(
theFullDate
,
(
year
,
month
,
day
))
<-
liftBase
(
theFullDate
,
(
year
,
month
,
day
))
<-
liftBase
$
dateSplit
EN
$
dateSplit
EN
$
Just
$
Just
$
view
du_date
doc
<>
"T:0:0:0"
$
view
du_date
doc
<>
"T:0:0:0"
let
hd
=
HyperdataDocument
{
_hd_bdd
=
Nothing
let
hd
=
HyperdataDocument
{
_hd_bdd
=
Nothing
,
_hd_doi
=
Nothing
,
_hd_doi
=
Nothing
...
...
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
View file @
b87c1360
...
@@ -37,6 +37,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Frame
...
@@ -37,6 +37,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Frame
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Table.Node
(
getChildrenByType
,
getClosestParentIdByType'
,
getNodeWith
)
import
Gargantext.Database.Query.Table.Node
(
getChildrenByType
,
getClosestParentIdByType'
,
getNodeWith
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
qualified
Gargantext.Defaults
as
Defaults
import
Gargantext.Prelude
import
Gargantext.Prelude
import
GHC.Generics
(
Generic
)
import
GHC.Generics
(
Generic
)
import
Servant
import
Servant
...
@@ -114,9 +115,9 @@ hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, cont
...
@@ -114,9 +115,9 @@ hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, cont
date'
=
(
\
(
Date
{
year
,
month
,
day
})
->
T
.
concat
[
T
.
pack
$
show
year
,
"-"
date'
=
(
\
(
Date
{
year
,
month
,
day
})
->
T
.
concat
[
T
.
pack
$
show
year
,
"-"
,
T
.
pack
$
show
month
,
"-"
,
T
.
pack
$
show
month
,
"-"
,
T
.
pack
$
show
day
])
<$>
date
,
T
.
pack
$
show
day
])
<$>
date
year'
=
fromIntegral
$
maybe
2021
(
\
(
Date
{
year
})
->
year
)
date
year'
=
fromIntegral
$
maybe
Defaults
.
year
(
\
(
Date
{
year
})
->
year
)
date
month'
=
fromIntegral
$
maybe
10
(
\
(
Date
{
month
})
->
month
)
date
month'
=
maybe
Defaults
.
month
(
\
(
Date
{
month
})
->
fromIntegral
month
)
date
day'
=
fromIntegral
$
maybe
4
(
\
(
Date
{
day
})
->
day
)
date
in
day'
=
maybe
Defaults
.
day
(
\
(
Date
{
day
})
->
fromIntegral
day
)
date
in
Right
HyperdataDocument
{
_hd_bdd
=
Just
"FrameWrite"
Right
HyperdataDocument
{
_hd_bdd
=
Just
"FrameWrite"
,
_hd_doi
=
Nothing
,
_hd_doi
=
Nothing
,
_hd_url
=
Nothing
,
_hd_url
=
Nothing
...
...
src/Gargantext/API/Search.hs
View file @
b87c1360
...
@@ -31,6 +31,7 @@ import Gargantext.Database.Admin.Types.Hyperdata (HyperdataContact(..), Hyperdat
...
@@ -31,6 +31,7 @@ import Gargantext.Database.Admin.Types.Hyperdata (HyperdataContact(..), Hyperdat
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
_cw_organization
)
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
_cw_organization
)
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Facet
import
Gargantext.Database.Query.Facet
import
qualified
Gargantext.Defaults
as
Defaults
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Utils.Aeson
(
defaultTaggedObject
)
import
Gargantext.Utils.Aeson
(
defaultTaggedObject
)
import
Servant
import
Servant
...
@@ -258,12 +259,12 @@ instance ToHyperdataRow HyperdataDocument where
...
@@ -258,12 +259,12 @@ instance ToHyperdataRow HyperdataDocument where
,
_hr_language_iso2
=
fromMaybe
"EN"
_hd_language_iso2
,
_hr_language_iso2
=
fromMaybe
"EN"
_hd_language_iso2
,
_hr_page
=
fromMaybe
0
_hd_page
,
_hr_page
=
fromMaybe
0
_hd_page
,
_hr_publication_date
=
fromMaybe
""
_hd_publication_date
,
_hr_publication_date
=
fromMaybe
""
_hd_publication_date
,
_hr_publication_
day
=
fromMaybe
1
_hd_publication_day
,
_hr_publication_
year
=
fromMaybe
(
fromIntegral
Defaults
.
year
)
_hd_publication_year
,
_hr_publication_
hour
=
fromMaybe
1
_hd_publication_hour
,
_hr_publication_
month
=
fromMaybe
Defaults
.
month
_hd_publication_month
,
_hr_publication_
minute
=
fromMaybe
1
_hd_publication_minute
,
_hr_publication_
day
=
fromMaybe
Defaults
.
day
_hd_publication_day
,
_hr_publication_
month
=
fromMaybe
1
_hd_publication_month
,
_hr_publication_
hour
=
fromMaybe
0
_hd_publication_hour
,
_hr_publication_
second
=
fromMaybe
1
_hd_publication_second
,
_hr_publication_
minute
=
fromMaybe
0
_hd_publication_minute
,
_hr_publication_
year
=
fromMaybe
2020
_hd_publication_year
,
_hr_publication_
second
=
fromMaybe
0
_hd_publication_second
,
_hr_source
=
fromMaybe
""
_hd_source
,
_hr_source
=
fromMaybe
""
_hd_source
,
_hr_title
=
fromMaybe
"Title"
_hd_title
,
_hr_title
=
fromMaybe
"Title"
_hd_title
,
_hr_url
=
fromMaybe
""
_hd_url
,
_hr_url
=
fromMaybe
""
_hd_url
...
...
src/Gargantext/Core/Text/Corpus/API.hs
View file @
b87c1360
...
@@ -15,8 +15,7 @@ module Gargantext.Core.Text.Corpus.API
...
@@ -15,8 +15,7 @@ module Gargantext.Core.Text.Corpus.API
,
Limit
,
Limit
,
get
,
get
,
externalAPIs
,
externalAPIs
)
)
where
where
import
Conduit
import
Conduit
import
Data.Either
(
Either
(
..
))
import
Data.Either
(
Either
(
..
))
...
...
src/Gargantext/Core/Text/Corpus/API/Hal.hs
View file @
b87c1360
...
@@ -20,6 +20,7 @@ import Servant.Client (ClientError)
...
@@ -20,6 +20,7 @@ import Servant.Client (ClientError)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
qualified
Gargantext.Defaults
as
Defaults
import
Gargantext.Prelude
import
Gargantext.Prelude
import
qualified
Gargantext.Core.Text.Corpus.Parsers.Date
as
Date
import
qualified
Gargantext.Core.Text.Corpus.Parsers.Date
as
Date
import
qualified
HAL
as
HAL
import
qualified
HAL
as
HAL
...
@@ -41,24 +42,25 @@ getC la q ml = do
...
@@ -41,24 +42,25 @@ getC la q ml = do
toDoc'
::
Lang
->
HAL
.
Corpus
->
IO
HyperdataDocument
toDoc'
::
Lang
->
HAL
.
Corpus
->
IO
HyperdataDocument
toDoc'
la
(
HAL
.
Corpus
i
t
ab
d
s
aus
affs
struct_id
)
=
do
toDoc'
la
(
HAL
.
Corpus
i
t
ab
d
s
aus
affs
struct_id
)
=
do
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
la
(
maybe
(
Just
"2019"
)
Just
d
)
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
pure
$
HyperdataDocument
{
_hd_bdd
=
Just
"Hal"
Date
.
dateSplit
la
(
maybe
(
Just
$
pack
$
show
Defaults
.
year
)
Just
d
)
,
_hd_doi
=
Just
$
pack
$
show
i
pure
HyperdataDocument
{
_hd_bdd
=
Just
"Hal"
,
_hd_url
=
Nothing
,
_hd_doi
=
Just
$
pack
$
show
i
,
_hd_uniqId
=
Nothing
,
_hd_url
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_uniqId
=
Nothing
,
_hd_page
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_title
=
Just
$
intercalate
" "
t
,
_hd_page
=
Nothing
,
_hd_authors
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
aus
,
_hd_title
=
Just
$
intercalate
" "
t
,
_hd_institutes
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
$
affs
<>
map
(
cs
.
show
)
struct_id
,
_hd_authors
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
aus
,
_hd_source
=
Just
$
maybe
"Nothing"
identity
s
,
_hd_institutes
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
$
affs
<>
map
(
cs
.
show
)
struct_id
,
_hd_abstract
=
Just
$
intercalate
" "
ab
,
_hd_source
=
Just
$
maybe
"Nothing"
identity
s
,
_hd_publication_date
=
fmap
(
pack
.
show
)
utctime
,
_hd_abstract
=
Just
$
intercalate
" "
ab
,
_hd_publication_year
=
pub_year
,
_hd_publication_date
=
fmap
(
pack
.
show
)
utctime
,
_hd_publication_month
=
pub_month
,
_hd_publication_year
=
pub_year
,
_hd_publication_day
=
pub_day
,
_hd_publication_month
=
pub_month
,
_hd_publication_hour
=
Nothing
,
_hd_publication_day
=
pub_day
,
_hd_publication_minute
=
Nothing
,
_hd_publication_hour
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_language_iso2
=
Just
$
(
pack
.
show
)
la
}
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
pack
.
show
)
la
}
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
View file @
b87c1360
...
@@ -18,6 +18,7 @@ import Data.Text (Text)
...
@@ -18,6 +18,7 @@ import Data.Text (Text)
import
qualified
Data.Text
as
Text
import
qualified
Data.Text
as
Text
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
qualified
Gargantext.Defaults
as
Defaults
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Isidore.Client
import
Isidore.Client
import
Servant.Client
import
Servant.Client
...
@@ -67,26 +68,28 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
...
@@ -67,26 +68,28 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
langText
(
OnlyText
t2
)
=
t2
langText
(
OnlyText
t2
)
=
t2
langText
(
ArrayText
ts
)
=
Text
.
intercalate
" "
$
map
langText
ts
langText
(
ArrayText
ts
)
=
Text
.
intercalate
" "
$
map
langText
ts
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
l
(
maybe
(
Just
"2019"
)
(
Just
)
d
)
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
l
(
maybe
(
Just
$
Text
.
pack
$
show
Defaults
.
year
)
(
Just
)
d
)
pure
$
HyperdataDocument
(
Just
"Isidore"
)
pure
HyperdataDocument
Nothing
{
_hd_bdd
=
Just
"Isidore"
u
,
_hd_doi
=
Nothing
Nothing
,
_hd_url
=
u
Nothing
,
_hd_uniqId
=
Nothing
Nothing
,
_hd_uniqIdBdd
=
Nothing
(
Just
$
cleanText
$
langText
t
)
,
_hd_page
=
Nothing
(
creator2text
<$>
as
)
,
_hd_title
=
Just
$
cleanText
$
langText
t
Nothing
,
_hd_authors
=
creator2text
<$>
as
(
Just
$
maybe
"Nothing"
identity
$
_sourceName
<$>
s
)
,
_hd_institutes
=
Nothing
(
cleanText
<$>
langText
<$>
a
)
,
_hd_source
=
Just
$
maybe
"Nothing"
identity
$
_sourceName
<$>
s
(
fmap
(
Text
.
pack
.
show
)
utcTime
)
,
_hd_abstract
=
cleanText
<$>
langText
<$>
a
(
pub_year
)
,
_hd_publication_date
=
fmap
(
Text
.
pack
.
show
)
utcTime
(
pub_month
)
,
_hd_publication_year
=
pub_year
(
pub_day
)
,
_hd_publication_month
=
pub_month
Nothing
,
_hd_publication_day
=
pub_day
Nothing
,
_hd_publication_hour
=
Nothing
Nothing
,
_hd_publication_minute
=
Nothing
(
Just
$
(
Text
.
pack
.
show
)
l
)
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
Text
.
pack
.
show
)
l
}
src/Gargantext/Core/Text/Corpus/API/Istex.hs
View file @
b87c1360
...
@@ -13,12 +13,14 @@ Portability : POSIX
...
@@ -13,12 +13,14 @@ Portability : POSIX
module
Gargantext.Core.Text.Corpus.API.Istex
module
Gargantext.Core.Text.Corpus.API.Istex
where
where
import
Data.Either
(
Either
(
..
))
import
Data.List
(
concat
)
import
Data.List
(
concat
)
import
Data.Maybe
import
Data.Maybe
import
Data.Text
(
Text
,
pack
)
import
Data.Text
(
Text
,
pack
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
qualified
Gargantext.Defaults
as
Defaults
import
Gargantext.Prelude
import
Gargantext.Prelude
import
qualified
Gargantext.Core.Text.Corpus.Parsers.Date
as
Date
import
qualified
Gargantext.Core.Text.Corpus.Parsers.Date
as
Date
import
qualified
ISTEX
as
ISTEX
import
qualified
ISTEX
as
ISTEX
...
@@ -26,19 +28,37 @@ import qualified ISTEX.Client as ISTEX
...
@@ -26,19 +28,37 @@ import qualified ISTEX.Client as ISTEX
get
::
Lang
->
Text
->
Maybe
Integer
->
IO
[
HyperdataDocument
]
get
::
Lang
->
Text
->
Maybe
Integer
->
IO
[
HyperdataDocument
]
get
la
q
ml
=
do
get
la
q
ml
=
do
docs
<-
ISTEX
.
getMetadataWith
q
(
fromIntegral
<$>
ml
)
--docs <- ISTEX.getMetadataWith q (fromIntegral <$> ml)
either
(
panic
.
pack
.
show
)
(
toDoc'
la
)
docs
printDebug
"[Istex.get] calling getMetadataScrollProgress for la"
la
printDebug
"[Istex.get] calling getMetadataScrollProgress for q"
q
printDebug
"[Istex.get] calling getMetadataScrollProgress for ml"
ml
-- The "scroll" expects "d/h/m/s/ms" time interval. Let's set it to "1 month"
--eDocs <- ISTEX.getMetadataScroll q ((\_n -> pack $ "1m") <$> ml) Nothing 0 --(fromIntegral <$> ml)
eDocs
<-
ISTEX
.
getMetadataScroll
q
"1m"
Nothing
0
--(fromIntegral <$> ml)
printDebug
"[Istex.get] will print length"
(
0
::
Int
)
case
eDocs
of
Left
_
->
pure
()
Right
(
ISTEX
.
Documents
{
_documents_hits
})
->
printDebug
"[Istex.get] length docs"
$
length
_documents_hits
--ISTEX.getMetadataScrollProgress q ((\_ -> pack $ "1m") <$> ml) Nothing progress errorHandler
case
eDocs
of
Left
err
->
panic
.
pack
.
show
$
err
Right
docs
->
toDoc'
la
docs
--pure $ either (panic . pack . show) (toDoc' la) eDocs
-- where
-- progress (ISTEX.ScrollResponse { _scroll_documents = ISTEX.Documents { _documents_hits }}) =
-- printDebug "[Istex.get] got docs: " $ length _documents_hits
-- errorHandler err = printDebug "[Istex.get] error" $ show err
toDoc'
::
Lang
->
ISTEX
.
Documents
->
IO
[
HyperdataDocument
]
toDoc'
::
Lang
->
ISTEX
.
Documents
->
IO
[
HyperdataDocument
]
toDoc'
la
docs'
=
do
toDoc'
la
docs'
=
mapM
(
toDoc
la
)
(
ISTEX
.
_documents_hits
docs'
)
--printDebug "ISTEX" (ISTEX._documents_total docs')
--printDebug "ISTEX" (ISTEX._documents_total docs')
mapM
(
toDoc
la
)
(
ISTEX
.
_documents_hits
docs'
)
-- | TODO remove dateSplit here
-- | TODO remove dateSplit here
-- TODO current year as default
-- TODO current year as default
toDoc
::
Lang
->
ISTEX
.
Document
->
IO
HyperdataDocument
toDoc
::
Lang
->
ISTEX
.
Document
->
IO
HyperdataDocument
toDoc
la
(
ISTEX
.
Document
i
t
a
ab
d
s
)
=
do
toDoc
la
(
ISTEX
.
Document
i
t
a
ab
d
s
)
=
do
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
la
(
maybe
(
Just
"2019"
)
(
Just
.
pack
.
show
)
d
)
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
la
(
maybe
(
Just
$
pack
$
show
Defaults
.
year
)
(
Just
.
pack
.
show
)
d
)
pure
$
HyperdataDocument
{
_hd_bdd
=
Just
"Istex"
pure
$
HyperdataDocument
{
_hd_bdd
=
Just
"Istex"
,
_hd_doi
=
Just
i
,
_hd_doi
=
Just
i
,
_hd_url
=
Nothing
,
_hd_url
=
Nothing
...
...
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
b87c1360
...
@@ -164,6 +164,7 @@ parseFormatC _ _ _ = undefined
...
@@ -164,6 +164,7 @@ parseFormatC _ _ _ = undefined
-- | Parse file into documents
-- | Parse file into documents
-- TODO manage errors here
-- TODO manage errors here
-- TODO: to debug maybe add the filepath in error message
-- TODO: to debug maybe add the filepath in error message
parseFile
::
FileType
->
FileFormat
->
FilePath
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseFile
::
FileType
->
FileFormat
->
FilePath
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseFile
CsvHal
Plain
p
=
parseHal
p
parseFile
CsvHal
Plain
p
=
parseHal
p
parseFile
CsvGargV3
Plain
p
=
parseCsv
p
parseFile
CsvGargV3
Plain
p
=
parseCsv
p
...
@@ -185,27 +186,27 @@ toDoc ff d = do
...
@@ -185,27 +186,27 @@ toDoc ff d = do
let
dateToParse
=
DT
.
replace
"-"
" "
<$>
lookup
"PY"
d
<>
Just
" "
<>
lookup
"publication_date"
d
let
dateToParse
=
DT
.
replace
"-"
" "
<$>
lookup
"PY"
d
<>
Just
" "
<>
lookup
"publication_date"
d
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
lang
dateToParse
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
lang
dateToParse
pure
$
HyperdataDocument
{
_hd_bdd
=
Just
$
DT
.
pack
$
show
ff
pure
HyperdataDocument
{
_hd_bdd
=
Just
$
DT
.
pack
$
show
ff
,
_hd_doi
=
lookup
"doi"
d
,
_hd_doi
=
lookup
"doi"
d
,
_hd_url
=
lookup
"URL"
d
,
_hd_url
=
lookup
"URL"
d
,
_hd_uniqId
=
Nothing
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
lookup
"title"
d
,
_hd_title
=
lookup
"title"
d
,
_hd_authors
=
Nothing
,
_hd_authors
=
Nothing
,
_hd_institutes
=
lookup
"authors"
d
,
_hd_institutes
=
lookup
"authors"
d
,
_hd_source
=
lookup
"source"
d
,
_hd_source
=
lookup
"source"
d
,
_hd_abstract
=
lookup
"abstract"
d
,
_hd_abstract
=
lookup
"abstract"
d
,
_hd_publication_date
=
fmap
(
DT
.
pack
.
show
)
utcTime
,
_hd_publication_date
=
fmap
(
DT
.
pack
.
show
)
utcTime
,
_hd_publication_year
=
pub_year
,
_hd_publication_year
=
pub_year
,
_hd_publication_month
=
pub_month
,
_hd_publication_month
=
pub_month
,
_hd_publication_day
=
pub_day
,
_hd_publication_day
=
pub_day
,
_hd_publication_hour
=
Nothing
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
DT
.
pack
.
show
)
lang
}
,
_hd_language_iso2
=
Just
$
(
DT
.
pack
.
show
)
lang
}
enrichWith
::
FileType
enrichWith
::
FileType
->
(
a
,
[[[(
DB
.
ByteString
,
DB
.
ByteString
)]]])
->
(
a
,
[[(
Text
,
Text
)]])
->
(
a
,
[[[(
DB
.
ByteString
,
DB
.
ByteString
)]]])
->
(
a
,
[[(
Text
,
Text
)]])
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Date.hs
View file @
b87c1360
...
@@ -25,8 +25,10 @@ import Data.Aeson (toJSON, Value)
...
@@ -25,8 +25,10 @@ import Data.Aeson (toJSON, Value)
import
Data.Either
(
Either
(
..
))
import
Data.Either
(
Either
(
..
))
import
Data.HashMap.Strict
as
HM
hiding
(
map
)
import
Data.HashMap.Strict
as
HM
hiding
(
map
)
import
Data.Text
(
Text
,
unpack
,
splitOn
,
replace
)
import
Data.Text
(
Text
,
unpack
,
splitOn
,
replace
)
import
Data.Time
(
defaultTimeLocale
,
toGregorian
,
iso8601DateFormat
,
parseTimeM
)
import
Data.Time
(
defaultTimeLocale
,
iso8601DateFormat
,
parseTimeM
,
toGregorian
)
import
qualified
Data.Time.Calendar
as
DTC
import
Data.Time.Clock
(
UTCTime
(
..
),
getCurrentTime
)
import
Data.Time.Clock
(
UTCTime
(
..
),
getCurrentTime
)
import
Data.Time.Clock
(
secondsToDiffTime
)
import
Data.Time.LocalTime
(
utc
)
import
Data.Time.LocalTime
(
utc
)
import
Data.Time.LocalTime.TimeZone.Series
(
zonedTimeToZoneSeriesTime
)
import
Data.Time.LocalTime.TimeZone.Series
(
zonedTimeToZoneSeriesTime
)
import
Duckling.Api
(
analyze
)
import
Duckling.Api
(
analyze
)
...
@@ -37,7 +39,7 @@ import Duckling.Types (ResolvedToken(..), ResolvedVal(..))
...
@@ -37,7 +39,7 @@ import Duckling.Types (ResolvedToken(..), ResolvedVal(..))
import
Gargantext.Core
(
Lang
(
FR
,
EN
))
import
Gargantext.Core
(
Lang
(
FR
,
EN
))
import
Gargantext.Core.Types
(
DebugMode
(
..
),
withDebugMode
)
import
Gargantext.Core.Types
(
DebugMode
(
..
),
withDebugMode
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
qualified
Control.Exception
as
CE
--
import qualified Control.Exception as CE
import
qualified
Data.Aeson
as
Json
import
qualified
Data.Aeson
as
Json
import
qualified
Data.HashSet
as
HashSet
import
qualified
Data.HashSet
as
HashSet
import
qualified
Duckling.Core
as
DC
import
qualified
Duckling.Core
as
DC
...
@@ -136,28 +138,28 @@ parserLang lang = panic $ "[G.C.T.C.P.Date] Lang not implemented" <> (cs $ show
...
@@ -136,28 +138,28 @@ parserLang lang = panic $ "[G.C.T.C.P.Date] Lang not implemented" <> (cs $ show
parseRawSafe
::
Lang
->
Text
->
IO
DateFlow
parseRawSafe
::
Lang
->
Text
->
IO
DateFlow
parseRawSafe
lang
text
=
do
parseRawSafe
lang
text
=
do
triedParseRaw
<-
tryP
arseRaw
lang
text
let
triedParseRaw
=
p
arseRaw
lang
text
dateStr'
<-
case
triedParseRaw
of
dateStr'
<-
case
triedParseRaw
of
Left
(
CE
.
SomeException
err
)
->
do
--Left (CE.SomeException err) -> do
Left
err
->
do
envLang
<-
getEnv
"LANG"
envLang
<-
getEnv
"LANG"
printDebug
"[G.C.T.C.P.Date] Exception: "
(
err
,
envLang
,
lang
,
text
)
printDebug
"[G.C.T.C.P.Date] Exception: "
(
err
,
envLang
,
lang
,
text
)
pure
$
DucklingFailure
text
pure
$
DucklingFailure
text
Right
res
->
pure
$
DucklingSuccess
res
Right
res
->
pure
$
DucklingSuccess
res
pure
dateStr'
pure
dateStr'
tryParseRaw
::
CE
.
Exception
e
=>
Lang
->
Text
->
IO
(
Either
e
Text
)
--
tryParseRaw :: CE.Exception e => Lang -> Text -> IO (Either e Text)
tryParseRaw
lang
text
=
CE
.
try
(
parseRaw
lang
text
)
--
tryParseRaw lang text = CE.try (parseRaw lang text)
parseRaw
::
Lang
->
Text
->
IO
Text
parseRaw
::
Lang
->
Text
->
Either
Text
Text
parseRaw
lang
text
=
do
-- case result
parseRaw
lang
text
=
do
-- case result
maybeResult
<-
extractValue
<$>
getTimeValue
let
maybeResult
=
extractValue
$
getTimeValue
<$>
parseDateWithDuckling
lang
text
(
Options
True
)
$
parseDateWithDuckling
lang
text
(
Options
True
)
case
maybeResult
of
case
maybeResult
of
Just
result
->
pure
result
Just
result
->
Right
result
Nothing
->
do
Nothing
->
do
printDebug
(
"[G.C.T.C.P.D.parseRaw] ERROR "
<>
(
cs
.
show
)
lang
)
-- printDebug ("[G.C.T.C.P.D.parseRaw] ERROR " <> (cs . show) lang) text
text
Left
$
"[G.C.T.C.P.D.parseRaw ERROR] "
<>
(
cs
.
show
)
lang
<>
" :: "
<>
text
pure
""
getTimeValue
::
[
ResolvedToken
]
->
Maybe
Value
getTimeValue
::
[
ResolvedToken
]
->
Maybe
Value
getTimeValue
rt
=
case
head
rt
of
getTimeValue
rt
=
case
head
rt
of
...
@@ -182,13 +184,21 @@ utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time
...
@@ -182,13 +184,21 @@ utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time
-- | Local Context which depends on Lang and Time
-- | Local Context which depends on Lang and Time
localContext
::
Lang
->
DucklingTime
->
Context
localContext
::
Lang
->
DucklingTime
->
Context
localContext
lang
dt
=
Context
{
referenceTime
=
dt
,
locale
=
makeLocale
(
parserLang
lang
)
Nothing
}
localContext
lang
dt
=
Context
{
referenceTime
=
dt
,
locale
=
makeLocale
(
parserLang
lang
)
Nothing
}
defaultDay
::
DTC
.
Day
defaultDay
=
DTC
.
fromGregorian
1
1
1
defaultUTCTime
::
UTCTime
defaultUTCTime
=
UTCTime
{
utctDay
=
defaultDay
,
utctDayTime
=
secondsToDiffTime
0
}
-- | Date parser with Duckling
-- | Date parser with Duckling
parseDateWithDuckling
::
Lang
->
Text
->
Options
->
IO
[
ResolvedToken
]
parseDateWithDuckling
::
Lang
->
Text
->
Options
->
[
ResolvedToken
]
parseDateWithDuckling
lang
input
options
=
do
parseDateWithDuckling
lang
input
options
=
do
contxt
<-
localContext
lang
<$>
utcToDucklingTime
<$>
getCurrent
Time
let
contxt
=
localContext
lang
$
utcToDucklingTime
defaultUTC
Time
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
-- TODO check/test Options False or True
-- TODO check/test Options False or True
pure
$
analyze
input
contxt
options
$
HashSet
.
fromList
[(
Seal
Time
)]
analyze
input
contxt
options
$
HashSet
.
fromList
[(
Seal
Time
)]
src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
View file @
b87c1360
...
@@ -23,6 +23,7 @@ import Data.Aeson.TH (deriveJSON)
...
@@ -23,6 +23,7 @@ import Data.Aeson.TH (deriveJSON)
import
Data.ByteString.Lazy
(
readFile
)
import
Data.ByteString.Lazy
(
readFile
)
import
Data.Text
(
Text
,
unpack
)
import
Data.Text
(
Text
,
unpack
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
qualified
Gargantext.Defaults
as
Defaults
import
Gargantext.Prelude
import
Gargantext.Prelude
import
System.IO
(
FilePath
)
import
System.IO
(
FilePath
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
CsvDoc
(
..
),
writeFile
,
headerCsvGargV3
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
CsvDoc
(
..
),
writeFile
,
headerCsvGargV3
)
...
@@ -52,8 +53,8 @@ patent2csvDoc (Patent { .. }) =
...
@@ -52,8 +53,8 @@ patent2csvDoc (Patent { .. }) =
CsvDoc
{
csv_title
=
_patent_title
CsvDoc
{
csv_title
=
_patent_title
,
csv_source
=
"Source"
,
csv_source
=
"Source"
,
csv_publication_year
=
Just
$
read
(
unpack
_patent_year
)
,
csv_publication_year
=
Just
$
read
(
unpack
_patent_year
)
,
csv_publication_month
=
Just
1
,
csv_publication_month
=
Just
$
Defaults
.
month
,
csv_publication_day
=
Just
1
,
csv_publication_day
=
Just
$
Defaults
.
day
,
csv_abstract
=
_patent_abstract
,
csv_abstract
=
_patent_abstract
,
csv_authors
=
"Authors"
}
,
csv_authors
=
"Authors"
}
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Wikidata.hs
View file @
b87c1360
...
@@ -68,8 +68,7 @@ wikiPageToDocument m wr = do
...
@@ -68,8 +68,7 @@ wikiPageToDocument m wr = do
source
=
Nothing
source
=
Nothing
abstract
=
Just
$
concat
$
take
m
sections
abstract
=
Just
$
concat
$
take
m
sections
(
date
,
(
year
,
month
,
day
))
(
date
,
(
year
,
month
,
day
))
<-
dateSplit
EN
$
head
<-
dateSplit
EN
$
head
$
catMaybes
$
catMaybes
[
wr
^.
wr_yearStart
[
wr
^.
wr_yearStart
,
wr
^.
wr_yearEnd
,
wr
^.
wr_yearEnd
...
...
src/Gargantext/Database/Query/Table/Node/Document/Insert.hs
View file @
b87c1360
...
@@ -74,6 +74,7 @@ import Gargantext.Database.Admin.Types.Hyperdata
...
@@ -74,6 +74,7 @@ import Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Prelude
(
Cmd
,
runPGSQuery
{-, formatPGSQuery-}
)
import
Gargantext.Database.Prelude
(
Cmd
,
runPGSQuery
{-, formatPGSQuery-}
)
import
Gargantext.Database.Schema.Node
(
NodePoly
(
..
))
import
Gargantext.Database.Schema.Node
(
NodePoly
(
..
))
import
qualified
Gargantext.Defaults
as
Defaults
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Prelude.Crypto.Hash
(
hash
)
import
Gargantext.Prelude.Crypto.Hash
(
hash
)
import
qualified
Data.Text
as
DT
(
pack
,
concat
,
take
)
import
qualified
Data.Text
as
DT
(
pack
,
concat
,
take
)
...
@@ -282,9 +283,9 @@ instance ToNode HyperdataDocument where
...
@@ -282,9 +283,9 @@ instance ToNode HyperdataDocument where
-- NOTE: There is no year '0' in postgres, there is year 1 AD and beofre that year 1 BC:
-- NOTE: There is no year '0' in postgres, there is year 1 AD and beofre that year 1 BC:
-- select '0001-01-01'::date, '0001-01-01'::date - '1 day'::interval;
-- select '0001-01-01'::date, '0001-01-01'::date - '1 day'::interval;
-- 0001-01-01 0001-12-31 00:00:00 BC
-- 0001-01-01 0001-12-31 00:00:00 BC
y
=
maybe
1
fromIntegral
$
_hd_publication_year
h
y
=
fromIntegral
$
fromMaybe
Defaults
.
day
$
_hd_publication_year
h
m
=
fromMaybe
1
$
_hd_publication_month
h
m
=
fromMaybe
Defaults
.
month
$
_hd_publication_month
h
d
=
fromMaybe
1
$
_hd_publication_day
h
d
=
fromMaybe
(
fromIntegral
Defaults
.
year
)
$
_hd_publication_day
h
-- TODO better Node
-- TODO better Node
instance
ToNode
HyperdataContact
where
instance
ToNode
HyperdataContact
where
...
...
src/Gargantext/Defaults.hs
0 → 100644
View file @
b87c1360
{-|
Module : Gargantext.Defaults
Description : Gargantext default values
Copyright : (c) CNRS, 2021-present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module
Gargantext.Defaults
where
import
Gargantext.Prelude
year
::
Integer
year
=
1
month
::
Int
month
=
1
day
::
Int
day
=
1
stack.yaml
View file @
b87c1360
...
@@ -73,7 +73,7 @@ extra-deps:
...
@@ -73,7 +73,7 @@ extra-deps:
-
git
:
https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
-
git
:
https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit
:
02e03d9b856bd35d391f43da8525330f9d184615
commit
:
02e03d9b856bd35d391f43da8525330f9d184615
-
git
:
https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
-
git
:
https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit
:
daeae80365250c4bd539f0a65e271f9aa37f731f
commit
:
a4a6fb6a578255c9e5b52aab2afccf874976a3f5
-
git
:
https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
-
git
:
https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
commit
:
9a43470241690a19c1c381c42a62c5dd4e28dff2
commit
:
9a43470241690a19c1c381c42a62c5dd4e28dff2
-
git
:
https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
-
git
:
https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment