Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
d7571b77
Commit
d7571b77
authored
Aug 28, 2023
by
Alfredo Di Napoli
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Send PUBMED the raw query from the frontend
parent
78bc52e0
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
38 additions
and
30 deletions
+38
-30
API.hs
src/Gargantext/Core/Text/Corpus/API.hs
+23
-17
Pubmed.hs
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
+13
-11
Query.hs
test/Core/Text/Corpus/Query.hs
+2
-2
No files found.
src/Gargantext/Core/Text/Corpus/API.hs
View file @
d7571b77
...
...
@@ -19,14 +19,16 @@ module Gargantext.Core.Text.Corpus.API
)
where
import
Conduit
import
Control.Monad.Except
import
Data.Bifunctor
import
Data.Either
(
Either
(
..
))
import
Data.Maybe
import
qualified
Data.Text
as
T
import
Gargantext.API.Admin.Orchestrator.Types
(
ExternalAPIs
(
..
),
externalAPIs
)
import
Gargantext.Core
(
Lang
(
..
),
toISO639
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
import
Servant.Client
(
ClientError
)
import
qualified
Data.Text
as
T
import
qualified
Gargantext.Core.Text.Corpus.API.Arxiv
as
Arxiv
import
qualified
Gargantext.Core.Text.Corpus.API.Hal
as
HAL
import
qualified
Gargantext.Core.Text.Corpus.API.Isidore
as
ISIDORE
...
...
@@ -35,7 +37,6 @@ import qualified Gargantext.Core.Text.Corpus.API.OpenAlex as OpenAlex
import
qualified
Gargantext.Core.Text.Corpus.API.Pubmed
as
PUBMED
import
qualified
Gargantext.Core.Text.Corpus.Query
as
Corpus
import
qualified
PUBMED.Types
as
PUBMED
import
Servant.Client
(
ClientError
)
data
GetCorpusError
=
-- | We couldn't parse the user input query into something meaningful.
...
...
@@ -53,18 +54,23 @@ get :: ExternalAPIs
-- -> IO [HyperdataDocument]
->
IO
(
Either
GetCorpusError
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
get
externalAPI
la
q
mPubmedAPIKey
limit
=
do
case
Corpus
.
parseQuery
q
of
Left
err
->
pure
$
Left
$
InvalidInputQuery
q
(
T
.
pack
err
)
Right
corpusQuery
->
case
externalAPI
of
OpenAlex
->
first
ExternalAPIError
<$>
OpenAlex
.
get
(
fromMaybe
""
Nothing
{- email -}
)
q
(
toISO639
la
)
limit
PubMed
->
first
ExternalAPIError
<$>
PUBMED
.
get
(
fromMaybe
""
mPubmedAPIKey
)
corpusQuery
limit
--docs <- PUBMED.get q default_limit -- EN only by default
--pure (Just $ fromIntegral $ length docs, yieldMany docs)
Arxiv
->
Right
<$>
Arxiv
.
get
la
corpusQuery
limit
HAL
->
first
ExternalAPIError
<$>
HAL
.
getC
(
toISO639
la
)
(
Corpus
.
getRawQuery
q
)
(
Corpus
.
getLimit
<$>
limit
)
IsTex
->
do
docs
<-
ISTEX
.
get
la
(
Corpus
.
getRawQuery
q
)
(
Corpus
.
getLimit
<$>
limit
)
-- For PUBMED, HAL, IsTex, Isidore and OpenAlex, we want to send the query as-it.
-- For Arxiv we parse the query into a structured boolean query we submit over.
case
externalAPI
of
PubMed
->
first
ExternalAPIError
<$>
PUBMED
.
get
(
fromMaybe
""
mPubmedAPIKey
)
q
limit
OpenAlex
->
first
ExternalAPIError
<$>
OpenAlex
.
get
(
fromMaybe
""
Nothing
{- email -}
)
q
(
toISO639
la
)
limit
Arxiv
->
runExceptT
$
do
corpusQuery
<-
ExceptT
(
pure
parse_query
)
ExceptT
$
fmap
Right
(
Arxiv
.
get
la
corpusQuery
limit
)
HAL
->
first
ExternalAPIError
<$>
HAL
.
getC
(
toISO639
la
)
(
Corpus
.
getRawQuery
q
)
(
Corpus
.
getLimit
<$>
limit
)
IsTex
->
do
docs
<-
ISTEX
.
get
la
(
Corpus
.
getRawQuery
q
)
(
Corpus
.
getLimit
<$>
limit
)
pure
$
Right
(
Just
$
fromIntegral
$
length
docs
,
yieldMany
docs
)
Isidore
->
do
docs
<-
ISIDORE
.
get
la
(
Corpus
.
getLimit
<$>
limit
)
(
Just
$
Corpus
.
getRawQuery
q
)
Nothing
Isidore
->
do
docs
<-
ISIDORE
.
get
la
(
Corpus
.
getLimit
<$>
limit
)
(
Just
$
Corpus
.
getRawQuery
q
)
Nothing
pure
$
Right
(
Just
$
fromIntegral
$
length
docs
,
yieldMany
docs
)
where
parse_query
=
first
(
InvalidInputQuery
q
.
T
.
pack
)
$
Corpus
.
parseQuery
q
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
View file @
d7571b77
...
...
@@ -45,6 +45,15 @@ import PUBMED.Types (Config(..))
-- | A pubmed query.
-- See: https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
-- The documentation for PUBMED says:
-- Values for query keys may also be provided in term if they are preceeded by a
-- '#' (%23 in the URL). While only one query_key parameter can be provided to ESearch,
-- any number of query keys can be combined in term. Also, if query keys are provided in term,
-- they can be combined with OR or NOT in addition to AND.
-- Example:
-- esearch.fcgi?db=pubmed&term=%231+AND+asthma&WebEnv=<webenv string>&usehistory=y
--
-- Therefore, we can pretty-print our 'Query' back into something that PubMed could understand.
newtype
ESearch
=
ESearch
{
_ESearch
::
[
EscapeItem
]
}
deriving
stock
(
Show
,
Eq
)
deriving
newtype
(
Semigroup
,
Monoid
)
...
...
@@ -87,21 +96,14 @@ convertQuery q = ESearch (interpretQuery q transformAST)
->
[
QN
"NOT+"
,
QE
(
TE
.
encodeUtf8
term
)]
get
::
Text
->
Corpus
.
Query
->
Corpus
.
Raw
Query
->
Maybe
Limit
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
get
apiKey
q
l
=
do
-- The documentation for PUBMED says:
-- Values for query keys may also be provided in term if they are preceeded by a
-- '#' (%23 in the URL). While only one query_key parameter can be provided to ESearch,
-- any number of query keys can be combined in term. Also, if query keys are provided in term,
-- they can be combined with OR or NOT in addition to AND.
-- Example:
-- esearch.fcgi?db=pubmed&term=%231+AND+asthma&WebEnv=<webenv string>&usehistory=y
--
-- Therefore, we can pretty-print our 'Query' back into something that PubMed could understand.
-- NOTE(adinapoli): For now we do not interpret the PUBMED query into something
-- more structured, like an 'ESearch' term, but we could, in the future.
eRes
<-
runReaderT
PubMed
.
getMetadataWithC
(
Config
{
apiKey
=
Just
apiKey
,
query
=
get
ESearch
$
convert
Query
q
,
query
=
get
Raw
Query
q
,
perPage
=
Just
200
,
mWebEnv
=
Nothing
})
let
takeLimit
=
case
l
of
...
...
test/Core/Text/Corpus/Query.hs
View file @
d7571b77
...
...
@@ -226,7 +226,7 @@ testPubMedCovid_01 getPubmedKey = do
case
mb_key
of
Nothing
->
pure
()
Just
k
->
withValidQuery
"
\"
Covid
\"
"
$
\
query
->
do
res
<-
Pubmed
.
get
(
_PubmedApiKey
k
)
query
(
Just
1
)
res
<-
Pubmed
.
get
(
_PubmedApiKey
k
)
(
renderQuery
query
)
(
Just
1
)
case
res
of
Left
err
->
fail
(
show
err
)
Right
(
_
,
cnd
)
->
do
...
...
@@ -241,7 +241,7 @@ testPubMedCovid_02 getPubmedKey = do
case
mb_key
of
Nothing
->
pure
()
Just
k
->
withValidQuery
"
\"
Covid
\"
AND
\"
Alzheimer
\"
"
$
\
query
->
do
res
<-
Pubmed
.
get
(
_PubmedApiKey
k
)
query
(
Just
1
)
res
<-
Pubmed
.
get
(
_PubmedApiKey
k
)
(
renderQuery
query
)
(
Just
1
)
case
res
of
Left
err
->
fail
(
show
err
)
Right
(
_
,
cnd
)
->
do
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment