Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
39ea62eb
Commit
39ea62eb
authored
Mar 21, 2024
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'origin/327-dev-rewrite-hal-crawler' into dev
parents
25a14cb2
629d7af7
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
18 additions
and
17 deletions
+18
-17
update-project-dependencies
bin/update-project-dependencies
+1
-1
cabal.project
cabal.project
+1
-1
API.hs
src/Gargantext/Core/Text/Corpus/API.hs
+4
-3
Hal.hs
src/Gargantext/Core/Text/Corpus/API/Hal.hs
+6
-6
Istex.hs
src/Gargantext/Core/Text/Corpus/API/Istex.hs
+5
-5
stack.yaml
stack.yaml
+1
-1
No files found.
bin/update-project-dependencies
View file @
39ea62eb
...
@@ -18,7 +18,7 @@ fi
...
@@ -18,7 +18,7 @@ fi
# with the `sha256sum` result calculated on the `cabal.project` and
# with the `sha256sum` result calculated on the `cabal.project` and
# `cabal.project.freeze`. This ensures the files stay deterministic so that CI
# `cabal.project.freeze`. This ensures the files stay deterministic so that CI
# cache can kick in.
# cache can kick in.
expected_cabal_project_hash
=
"
1cbb47fd3f929a01b3b968cc2e148dcbf5ef4e662e14ed9832d32471a68f6766
"
expected_cabal_project_hash
=
"
3bfa2552464823ff4f1d892e9dc2778a9cbf1a153a6639ec9caf87e6d9c75a7b
"
expected_cabal_project_freeze_hash
=
"2c8960ffcf1b94aa11a3543e3b5facd2db5af19569fecaec4bc0ab4c1edd22a5"
expected_cabal_project_freeze_hash
=
"2c8960ffcf1b94aa11a3543e3b5facd2db5af19569fecaec4bc0ab4c1edd22a5"
cabal
--store-dir
=
$STORE_DIR
v2-build
--dry-run
cabal
--store-dir
=
$STORE_DIR
v2-build
--dry-run
...
...
cabal.project
View file @
39ea62eb
...
@@ -106,7 +106,7 @@ source-repository-package
...
@@ -106,7 +106,7 @@ source-repository-package
source
-
repository
-
package
source
-
repository
-
package
type
:
git
type
:
git
location
:
https
://
gitlab
.
iscpif
.
fr
/
gargantext
/
crawlers
/
hal
.
git
location
:
https
://
gitlab
.
iscpif
.
fr
/
gargantext
/
crawlers
/
hal
.
git
tag
:
b
fa9069b4ff70f341ca3244e8aff9e83eb4b8b73
tag
:
b
99b9e568c8bdc73af2b8016ed03ba5ee83c2030
source
-
repository
-
package
source
-
repository
-
package
type
:
git
type
:
git
...
...
src/Gargantext/Core/Text/Corpus/API.hs
View file @
39ea62eb
...
@@ -18,8 +18,7 @@ module Gargantext.Core.Text.Corpus.API
...
@@ -18,8 +18,7 @@ module Gargantext.Core.Text.Corpus.API
,
externalAPIs
,
externalAPIs
)
where
)
where
import
Conduit
import
Conduit
(
ConduitT
,
yieldMany
)
import
Control.Monad.Except
import
Data.Text
qualified
as
T
import
Data.Text
qualified
as
T
import
EPO.API.Client.Types
qualified
as
EPO
import
EPO.API.Client.Types
qualified
as
EPO
import
Gargantext.API.Admin.Orchestrator.Types
(
ExternalAPIs
(
..
),
externalAPIs
)
import
Gargantext.API.Admin.Orchestrator.Types
(
ExternalAPIs
(
..
),
externalAPIs
)
...
@@ -32,7 +31,7 @@ import Gargantext.Core.Text.Corpus.API.Istex qualified as ISTEX
...
@@ -32,7 +31,7 @@ import Gargantext.Core.Text.Corpus.API.Istex qualified as ISTEX
import
Gargantext.Core.Text.Corpus.API.OpenAlex
qualified
as
OpenAlex
import
Gargantext.Core.Text.Corpus.API.OpenAlex
qualified
as
OpenAlex
import
Gargantext.Core.Text.Corpus.API.Pubmed
qualified
as
PUBMED
import
Gargantext.Core.Text.Corpus.API.Pubmed
qualified
as
PUBMED
import
Gargantext.Core.Text.Corpus.Query
qualified
as
Corpus
import
Gargantext.Core.Text.Corpus.Query
qualified
as
Corpus
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
.Document
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
hiding
(
get
)
import
Gargantext.Prelude
hiding
(
get
)
import
PUBMED.Types
qualified
as
PUBMED
import
PUBMED.Types
qualified
as
PUBMED
import
Servant.Client
(
ClientError
)
import
Servant.Client
(
ClientError
)
...
@@ -80,3 +79,5 @@ get externalAPI lang q mPubmedAPIKey epoAuthKey epoAPIUrl limit = do
...
@@ -80,3 +79,5 @@ get externalAPI lang q mPubmedAPIKey epoAuthKey epoAPIUrl limit = do
first
ExternalAPIError
<$>
EPO
.
get
epoAuthKey
epoAPIUrl
q
(
toISO639
lang
)
limit
first
ExternalAPIError
<$>
EPO
.
get
epoAuthKey
epoAPIUrl
q
(
toISO639
lang
)
limit
where
where
parse_query
=
first
(
InvalidInputQuery
q
.
T
.
pack
)
$
Corpus
.
parseQuery
q
parse_query
=
first
(
InvalidInputQuery
q
.
T
.
pack
)
$
Corpus
.
parseQuery
q
src/Gargantext/Core/Text/Corpus/API/Hal.hs
View file @
39ea62eb
...
@@ -20,9 +20,9 @@ import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
...
@@ -20,9 +20,9 @@ import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
)
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
)
)
import
Gargantext.Defaults
qualified
as
Defaults
import
Gargantext.Defaults
qualified
as
Defaults
import
Gargantext.Prelude
hiding
(
intercalate
)
import
Gargantext.Prelude
hiding
(
intercalate
)
import
HAL
qualified
as
HAL
import
HAL
qualified
import
HAL.Client
qualified
as
HAL
import
HAL.Doc.Corpus
qualified
as
HAL
import
HAL.Doc.Corpus
qualified
as
HAL
import
HAL.Types
qualified
as
HAL
import
Servant.Client
(
ClientError
)
import
Servant.Client
(
ClientError
)
get
::
Maybe
ISO639
.
ISO639_1
->
Text
->
Maybe
Int
->
IO
[
HyperdataDocument
]
get
::
Maybe
ISO639
.
ISO639_1
->
Text
->
Maybe
Int
->
IO
[
HyperdataDocument
]
...
@@ -32,7 +32,7 @@ get la q ml = do
...
@@ -32,7 +32,7 @@ get la q ml = do
getC
::
Maybe
ISO639
.
ISO639_1
->
Text
->
Maybe
Int
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
getC
::
Maybe
ISO639
.
ISO639_1
->
Text
->
Maybe
Int
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
getC
la
q
ml
=
do
getC
la
q
ml
=
do
eRes
<-
HAL
.
getMetadataWithC
[
q
]
(
Just
0
)
(
fromIntegral
<$>
ml
)
la
eRes
<-
HAL
.
getMetadataWithC
ursorC
q
(
fromIntegral
<$>
ml
)
la
pure
$
(
\
(
len
,
docsC
)
->
(
len
,
docsC
.|
mapMC
(
toDoc'
la
)))
<$>
eRes
pure
$
(
\
(
len
,
docsC
)
->
(
len
,
docsC
.|
mapMC
(
toDoc'
la
)))
<$>
eRes
-- case eRes of
-- case eRes of
-- Left err -> panic $ pack $ show err
-- Left err -> panic $ pack $ show err
...
@@ -41,7 +41,7 @@ getC la q ml = do
...
@@ -41,7 +41,7 @@ getC la q ml = do
toDoc'
::
Maybe
ISO639
.
ISO639_1
->
HAL
.
Corpus
->
IO
HyperdataDocument
toDoc'
::
Maybe
ISO639
.
ISO639_1
->
HAL
.
Corpus
->
IO
HyperdataDocument
toDoc'
la
(
HAL
.
Corpus
{
..
})
=
do
toDoc'
la
(
HAL
.
Corpus
{
..
})
=
do
-- printDebug "[toDoc corpus] h" h
-- printDebug "[toDoc corpus] h" h
let
mDateS
=
maybe
(
Just
$
pack
$
show
Defaults
.
year
)
Just
_corpus_date
let
mDateS
=
_corpus_date
<|>
Just
(
pack
$
show
Defaults
.
year
)
let
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
=
Date
.
mDateSplit
mDateS
let
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
=
Date
.
mDateSplit
mDateS
let
abstractDefault
=
unwords
_corpus_abstract
let
abstractDefault
=
unwords
_corpus_abstract
let
abstract
=
case
la
of
let
abstract
=
case
la
of
...
@@ -52,8 +52,8 @@ toDoc' la (HAL.Corpus { .. }) = do
...
@@ -52,8 +52,8 @@ toDoc' la (HAL.Corpus { .. }) = do
,
_hd_url
=
Nothing
,
_hd_url
=
Nothing
,
_hd_page
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
Just
$
unwords
_corpus_title
,
_hd_title
=
Just
$
unwords
_corpus_title
,
_hd_authors
=
Just
$
foldl
(
\
x
y
->
if
x
==
""
then
y
else
x
<>
", "
<>
y
)
""
_corpus_authors_names
,
_hd_authors
=
Just
$
foldl
'
(
\
x
y
->
if
x
==
""
then
y
else
x
<>
", "
<>
y
)
""
_corpus_authors_names
,
_hd_institutes
=
Just
$
foldl
(
\
x
y
->
if
x
==
""
then
y
else
x
<>
", "
<>
y
)
""
$
_corpus_authors_affiliations
<>
map
show
_corpus_struct_id
,
_hd_institutes
=
Just
$
foldl
'
(
\
x
y
->
if
x
==
""
then
y
else
x
<>
", "
<>
y
)
""
$
_corpus_authors_affiliations
<>
map
show
_corpus_struct_id
,
_hd_source
=
Just
$
maybe
"Nothing"
identity
_corpus_source
,
_hd_source
=
Just
$
maybe
"Nothing"
identity
_corpus_source
,
_hd_abstract
=
Just
abstract
,
_hd_abstract
=
Just
abstract
,
_hd_publication_date
=
fmap
show
utctime
,
_hd_publication_date
=
fmap
show
utctime
...
...
src/Gargantext/Core/Text/Corpus/API/Istex.hs
View file @
39ea62eb
...
@@ -20,9 +20,9 @@ import Data.List qualified as List
...
@@ -20,9 +20,9 @@ import Data.List qualified as List
import
Data.Text
qualified
as
Text
import
Data.Text
qualified
as
Text
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers.JSON.Istex
(
toDoc
)
import
Gargantext.Core.Text.Corpus.Parsers.JSON.Istex
(
toDoc
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
.Document
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
hiding
(
get
)
import
Gargantext.Prelude
hiding
(
get
)
import
ISTEX
qualified
as
ISTEX
import
ISTEX
qualified
import
ISTEX.Client
qualified
as
ISTEX
import
ISTEX.Client
qualified
as
ISTEX
type
Query
=
Text
type
Query
=
Text
...
@@ -40,14 +40,14 @@ get la query' maxResults = do
...
@@ -40,14 +40,14 @@ get la query' maxResults = do
-- eDocs <- ISTEX.getMetadataScroll (q <> " abstract:*") "1m" Nothing 0 --(fromIntegral <$> ml)
-- eDocs <- ISTEX.getMetadataScroll (q <> " abstract:*") "1m" Nothing 0 --(fromIntegral <$> ml)
-- eDocs <- ISTEX.getMetadataScroll q "1m" Nothing 0 --(fromIntegral <$> ml)
-- eDocs <- ISTEX.getMetadataScroll q "1m" Nothing 0 --(fromIntegral <$> ml)
let
query
=
case
(
List
.
length
$
Text
.
splitOn
":"
query'
)
==
1
of
let
query
=
if
List
.
length
(
Text
.
splitOn
":"
query'
)
==
1
then
-- True case means users is entering default search of IsTex
-- True case means users is entering default search of IsTex
-- In that case we need to enrich his query with 2 parameters
-- In that case we need to enrich his query with 2 parameters
-- First expected language: user has to define it in GTXT
-- First expected language: user has to define it in GTXT
-- Second : query in abstract
-- Second : query in abstract
True
->
(
"language:"
<>
toISTEXLanguageCode
la
)
<>
" AND abstract:"
<>
query'
(
"language:"
<>
toISTEXLanguageCode
la
)
<>
" AND abstract:"
<>
query'
False
->
query'
else
query'
-- Complex queries of IsTex needs parameters using ":" so we leave the query as it is
-- Complex queries of IsTex needs parameters using ":" so we leave the query as it is
-- in that case we suppose user is knowing what s.he is doing
-- in that case we suppose user is knowing what s.he is doing
...
...
stack.yaml
View file @
39ea62eb
...
@@ -135,7 +135,7 @@
...
@@ -135,7 +135,7 @@
git
:
"
https://gitlab.iscpif.fr/gargantext/crawlers/epo-proxy-api.git"
git
:
"
https://gitlab.iscpif.fr/gargantext/crawlers/epo-proxy-api.git"
subdirs
:
subdirs
:
-
.
-
.
-
commit
:
b
fa9069b4ff70f341ca3244e8aff9e83eb4b8b73
-
commit
:
b
99b9e568c8bdc73af2b8016ed03ba5ee83c2030
git
:
"
https://gitlab.iscpif.fr/gargantext/crawlers/hal.git"
git
:
"
https://gitlab.iscpif.fr/gargantext/crawlers/hal.git"
subdirs
:
subdirs
:
-
.
-
.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment