Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Grégoire Locqueville
haskell-gargantext
Commits
d0039f33
Commit
d0039f33
authored
Jun 16, 2022
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] IsTex basic working for simple queries without quotes
parent
db6214d3
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
22 additions
and
18 deletions
+22
-18
gargantext.cabal
gargantext.cabal
+1
-1
Istex.hs
src/Gargantext/Core/Text/Corpus/API/Istex.hs
+12
-9
Date.hs
src/Gargantext/Core/Text/Corpus/Parsers/Date.hs
+8
-7
stack.yaml
stack.yaml
+1
-1
No files found.
gargantext.cabal
View file @
d0039f33
...
...
@@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack
name: gargantext
version: 0.0.5.8.9.5
version:
0.0.5.8.9.5
synopsis: Search, map, share
description: Please see README.md
category: Data
...
...
src/Gargantext/Core/Text/Corpus/API/Istex.hs
View file @
d0039f33
...
...
@@ -26,6 +26,7 @@ import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
import
qualified
ISTEX
as
ISTEX
import
qualified
ISTEX.Client
as
ISTEX
get
::
Lang
->
Text
->
Maybe
Integer
->
IO
[
HyperdataDocument
]
get
la
q
_ml
=
do
--docs <- ISTEX.getMetadataWith q (fromIntegral <$> ml)
...
...
@@ -34,7 +35,7 @@ get la q _ml = do
--printDebug "[Istex.get] calling getMetadataScrollProgress for ml" ml
-- The "scroll" expects "d/h/m/s/ms" time interval. Let's set it to "1 month"
--eDocs <- ISTEX.getMetadataScroll q ((\_n -> pack $ "1m") <$> ml) Nothing 0 --(fromIntegral <$> ml)
eDocs
<-
ISTEX
.
getMetadataScroll
q
"1m"
Nothing
0
--(fromIntegral <$> ml)
eDocs
<-
ISTEX
.
getMetadataScroll
(
q
<>
" abstract:*"
)
"1m"
Nothing
0
--(fromIntegral <$> ml)
printDebug
"[Istex.get] will print length"
(
0
::
Int
)
case
eDocs
of
Left
_
->
pure
()
...
...
@@ -57,15 +58,17 @@ toDoc' la docs' = mapM (toDoc la) (ISTEX._documents_hits docs')
-- TODO current year as default
toDoc
::
Lang
->
ISTEX
.
Document
->
IO
HyperdataDocument
toDoc
la
(
ISTEX
.
Document
i
t
a
ab
d
s
)
=
do
--printDebug "ISTEX date" d
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
la
(
maybe
(
Just
$
pack
$
show
Defaults
.
year
)
(
Just
.
pack
.
show
)
d
)
pure
$
HyperdataDocument
{
_hd_bdd
=
Just
"Istex"
,
_hd_doi
=
Just
i
,
_hd_url
=
Nothing
,
_hd_uniqId
=
Nothing
--printDebug "toDoc Istex" (utctime, (pub_year, pub_month, pub_day))
pure
$
HyperdataDocument
{
_hd_bdd
=
Just
"Istex"
,
_hd_doi
=
Just
i
,
_hd_url
=
Nothing
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
t
,
_hd_page
=
Nothing
,
_hd_title
=
t
,
_hd_authors
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
map
ISTEX
.
_author_name
a
)
,
_hd_institutes
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
concat
$
(
map
ISTEX
.
_author_affiliations
)
a
)
,
_hd_source
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
catMaybes
$
map
ISTEX
.
_source_title
s
)
...
...
@@ -77,5 +80,5 @@ toDoc la (ISTEX.Document i t a ab d s) = do
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
pack
.
show
)
la
}
,
_hd_language_iso2
=
Just
$
(
pack
.
show
)
la
}
src/Gargantext/Core/Text/Corpus/Parsers/Date.hs
View file @
d0039f33
...
...
@@ -67,14 +67,15 @@ type Day = Int
-- | Date Parser
-- Parses dates mentions in full text given the language.
-- >>> parse FR (pack "1
0
avril 1900 à 19H")
-- 1900-04-
10
19:00:00 UTC
-- >>> parse EN (pack "April 1
0
1900")
-- 1900-04-
10
00:00:00 UTC
-- >>> parse FR (pack "1 avril 1900 à 19H")
-- 1900-04-
01
19:00:00 UTC
-- >>> parse EN (pack "April 1 1900")
-- 1900-04-
01
00:00:00 UTC
parse
::
Lang
->
Text
->
IO
UTCTime
parse
lang
s
=
do
printDebug
"Date: "
s
--
printDebug "Date: " s
dateStr'
<-
pure
$
dateFlow
(
DucklingFailure
s
)
-- parseRawSafe lang s
--printDebug "Date': " dateStr'
case
dateFlow
dateStr'
of
DateFlowSuccess
ok
->
pure
ok
_
->
withDebugMode
(
DebugMode
True
)
...
...
@@ -93,7 +94,7 @@ data DateFlow = DucklingSuccess { ds_result :: Text }
|
DucklingFailure
{
df_result
::
Text
}
|
ReadFailure1
{
rf1_result
::
Text
}
|
ReadFailure2
{
rf2_result
::
Text
}
|
DateFlowSuccess
{
success
::
UTCTime
}
|
DateFlowSuccess
{
success
::
UTCTime
}
|
DateFlowFailure
deriving
Show
...
...
@@ -126,7 +127,7 @@ readDate txt = do
-- | To get Homogeinity of the languages
-- TODO : put this in a more generic place in the source code
parserLang
::
Lang
->
DC
.
Lang
parserLang
FR
=
DC
.
FR
parserLang
FR
=
DC
.
FR
parserLang
EN
=
DC
.
EN
parserLang
lang
=
panic
$
"[G.C.T.C.P.Date] Lang not implemented"
<>
(
cs
$
show
lang
)
...
...
stack.yaml
View file @
d0039f33
...
...
@@ -73,7 +73,7 @@ extra-deps:
-
git
:
https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit
:
02e03d9b856bd35d391f43da8525330f9d184615
-
git
:
https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit
:
6821f41655e298fa1fac5021c5776a6aed2d18fd
commit
:
a34bb341236d82cf3d488210bc1d8448a98f5808
-
git
:
https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
commit
:
9a43470241690a19c1c381c42a62c5dd4e28dff2
-
git
:
https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment