Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
cc436c77
Commit
cc436c77
authored
Jun 12, 2023
by
Alfredo Di Napoli
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Support HAL
parent
cfc1aec2
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
121 additions
and
23 deletions
+121
-23
gargantext.cabal
gargantext.cabal
+1
-1
API.hs
src/Gargantext/Core/Text/Corpus/API.hs
+3
-2
Hal.hs
src/Gargantext/Core/Text/Corpus/API/Hal.hs
+74
-10
Isidore.hs
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
+0
-5
Istex.hs
src/Gargantext/Core/Text/Corpus/API/Istex.hs
+0
-5
Query.hs
test/Test/Core/Text/Corpus/Query.hs
+43
-0
No files found.
gargantext.cabal
View file @
cc436c77
...
...
@@ -84,6 +84,7 @@ library
Gargantext.Core.Text.Corpus.API
Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.EPO
Gargantext.Core.Text.Corpus.API.Hal
Gargantext.Core.Text.Corpus.API.Isidore
Gargantext.Core.Text.Corpus.API.Istex
Gargantext.Core.Text.Corpus.API.OpenAlex
...
...
@@ -230,7 +231,6 @@ library
Gargantext.Core.Methods.Similarities.Distributional
Gargantext.Core.Statistics
Gargantext.Core.Text.Convert
Gargantext.Core.Text.Corpus.API.Hal
Gargantext.Core.Text.Corpus.Parsers.Book
Gargantext.Core.Text.Corpus.Parsers.Date.Attoparsec
Gargantext.Core.Text.Corpus.Parsers.FrameWrite
...
...
src/Gargantext/Core/Text/Corpus/API.hs
View file @
cc436c77
...
...
@@ -65,8 +65,9 @@ get externalAPI la q mPubmedAPIKey epoAuthKey epoAPIUrl limit = do
Arxiv
->
runExceptT
$
do
corpusQuery
<-
parse_query
ExceptT
$
first
ExternalAPIError
<$>
(
Arxiv
.
get
la
corpusQuery
limit
)
HAL
->
first
ExternalAPIError
<$>
HAL
.
getC
(
toISO639
la
)
(
Corpus
.
getRawQuery
q
)
(
Corpus
.
getLimit
<$>
limit
)
HAL
->
runExceptT
$
do
corpusQuery
<-
parse_query
ExceptT
$
first
ExternalAPIError
<$>
HAL
.
getC
(
toISO639
la
)
corpusQuery
limit
IsTex
->
runExceptT
$
do
corpusQuery
<-
parse_query
ExceptT
$
first
ExternalAPIError
<$>
ISTEX
.
get
la
corpusQuery
limit
...
...
src/Gargantext/Core/Text/Corpus/API/Hal.hs
View file @
cc436c77
...
...
@@ -9,7 +9,18 @@ Portability : POSIX
-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE ViewPatterns #-}
module
Gargantext.Core.Text.Corpus.API.Hal
(
getC
,
get
-- * Internal functions for testing
,
getHalQuery
,
HalQuery
(
..
)
,
convertQuery
)
where
import
Conduit
...
...
@@ -17,28 +28,81 @@ import Data.Either
import
Data.LanguageCodes
qualified
as
ISO639
import
Data.Map.Strict
qualified
as
Map
import
Data.Maybe
import
Data.Monoid
import
Data.Semigroup
import
Data.Text
(
pack
,
intercalate
)
import
Data.Text
qualified
as
Text
import
Data.Text.Encoding
qualified
as
TE
import
Gargantext.Core.Text.Corpus.Parsers.Date
qualified
as
Date
import
Gargantext.Core.Text.Corpus.Query
as
Corpus
import
Gargantext.Core.Types
(
Term
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Defaults
qualified
as
Defaults
import
Gargantext.Prelude
hiding
(
intercalate
)
import
Gargantext.Prelude
hiding
(
intercalate
,
get
)
import
HAL
qualified
as
HAL
import
HAL.Client
qualified
as
HAL
import
HAL.Doc.Corpus
qualified
as
HAL
import
Network.HTTP.Types.URI
(
EscapeItem
(
..
),
renderQueryPartialEscape
)
import
Servant.Client
(
ClientError
)
get
::
Maybe
ISO639
.
ISO639_1
->
Text
->
Maybe
Int
->
IO
[
HyperdataDocument
]
get
la
q
ml
=
do
eDocs
<-
HAL
.
getMetadataWith
[
q
]
(
Just
0
)
(
fromIntegral
<$>
ml
)
la
newtype
HalQuery
=
HalQuery
{
_HalQuery
::
[
EscapeItem
]
}
deriving
stock
(
Show
,
Eq
)
deriving
newtype
(
Semigroup
,
Monoid
)
-- | Returns an /url encoded/ query ready to be sent to Hal.
getHalQuery
::
HalQuery
->
HAL
.
Query
getHalQuery
(
HalQuery
items
)
=
Text
.
replace
"q="
""
.
TE
.
decodeUtf8
.
renderQueryPartialEscape
False
$
[
(
"q"
,
items
)
]
convertQuery
::
Corpus
.
Query
->
HalQuery
convertQuery
q
=
HalQuery
(
interpretQuery
q
transformAST
)
where
lParen
::
[
EscapeItem
]
lParen
=
[
QN
"("
]
rParen
::
[
EscapeItem
]
rParen
=
[
QN
")"
]
transformAST
::
BoolExpr
Term
->
[
EscapeItem
]
transformAST
ast
=
case
ast
of
BAnd
sub1
sub2
->
lParen
<>
transformAST
sub1
<>
[
QN
"+AND+"
]
<>
transformAST
sub2
<>
rParen
BOr
sub1
sub2
->
lParen
<>
transformAST
sub1
<>
[
QN
"+OR+"
]
<>
transformAST
sub2
<>
rParen
BNot
(
BConst
(
Negative
term
))
->
transformAST
(
BConst
(
Positive
term
))
-- double negation
BNot
sub
->
[
QN
"NOT+"
]
<>
lParen
<>
transformAST
sub
<>
rParen
-- BTrue cannot happen is the query parser doesn't support parsing 'TRUE' alone.
BTrue
->
mempty
-- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
BFalse
->
mempty
-- Maps the positive terms to contain 'abstract:'
BConst
(
Positive
(
Term
term
))
->
[
QE
(
TE
.
encodeUtf8
term
)]
BConst
(
Negative
sub
)
->
[
QN
"NOT+"
]
<>
transformAST
(
BConst
(
Positive
sub
))
get
::
Maybe
ISO639
.
ISO639_1
->
Corpus
.
Query
->
Maybe
Corpus
.
Limit
->
IO
[
HyperdataDocument
]
get
la
(
convertQuery
->
q
)
ml
=
do
eDocs
<-
HAL
.
getMetadataWith
[
getHalQuery
q
]
(
Just
0
)
(
fromIntegral
.
getLimit
<$>
ml
)
la
either
(
panic
.
pack
.
show
)
(
\
d
->
mapM
(
toDoc'
la
)
$
HAL
.
_docs
d
)
eDocs
getC
::
Maybe
ISO639
.
ISO639_1
->
Text
->
Maybe
Int
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
getC
la
q
ml
=
do
eRes
<-
HAL
.
getMetadataWithC
[
q
]
(
Just
0
)
(
fromIntegral
<$>
ml
)
la
getC
::
Maybe
ISO639
.
ISO639_1
->
Corpus
.
Query
->
Maybe
Corpus
.
Limit
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
getC
la
(
convertQuery
->
q
)
ml
=
do
eRes
<-
HAL
.
getMetadataWithC
[
getHalQuery
q
]
(
Just
0
)
(
fromIntegral
.
getLimit
<$>
ml
)
la
pure
$
(
\
(
len
,
docsC
)
->
(
len
,
docsC
.|
mapMC
(
toDoc'
la
)))
<$>
eRes
-- case eRes of
-- Left err -> panic $ pack $ show err
-- Right (len, docsC) -> pure (len, docsC .| mapMC (toDoc' la))
toDoc'
::
Maybe
ISO639
.
ISO639_1
->
HAL
.
Corpus
->
IO
HyperdataDocument
toDoc'
la
(
HAL
.
Corpus
{
..
})
=
do
...
...
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
View file @
cc436c77
...
...
@@ -59,11 +59,6 @@ convertQuery q = IsidoreQuery (interpretQuery q transformAST)
-- It seems like Isidore supports a similar query language to Pubmed.
transformAST
::
BoolExpr
Term
->
[
EscapeItem
]
transformAST
ast
=
case
ast
of
BAnd
sub
(
BConst
(
Negative
term
))
-- The second term become positive, so that it can be translated.
->
(
transformAST
sub
)
<>
[
QN
"+AND+NOT+"
]
<>
transformAST
(
BConst
(
Positive
term
))
BAnd
term1
(
BNot
term2
)
->
transformAST
term1
<>
[
QN
"+AND+NOT+"
]
<>
transformAST
term2
BAnd
sub1
sub2
->
transformAST
sub1
<>
[
QN
"+AND+"
]
<>
transformAST
sub2
BOr
sub1
sub2
...
...
src/Gargantext/Core/Text/Corpus/API/Istex.hs
View file @
cc436c77
...
...
@@ -67,11 +67,6 @@ convertQuery q = IstexQuery (interpretQuery q transformAST)
where
transformAST
::
BoolExpr
Term
->
[
EscapeItem
]
transformAST
ast
=
case
ast
of
BAnd
sub
(
BConst
(
Negative
term
))
-- The second term become positive, so that it can be translated.
->
(
transformAST
sub
)
<>
[
QN
"+AND+NOT+"
]
<>
transformAST
(
BConst
(
Positive
term
))
BAnd
term1
(
BNot
term2
)
->
transformAST
term1
<>
[
QN
"+AND+NOT+"
]
<>
transformAST
term2
BAnd
sub1
sub2
->
transformAST
sub1
<>
[
QN
"+AND+"
]
<>
transformAST
sub2
BOr
sub1
sub2
...
...
test/Test/Core/Text/Corpus/Query.hs
View file @
cc436c77
...
...
@@ -19,6 +19,7 @@ import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
import
qualified
Gargantext.Core.Text.Corpus.API.Pubmed
as
Pubmed
import
qualified
Gargantext.Core.Text.Corpus.API.Isidore
as
Isidore
import
qualified
Gargantext.Core.Text.Corpus.API.Istex
as
Istex
import
qualified
Gargantext.Core.Text.Corpus.API.Hal
as
Hal
import
qualified
Network.Api.Arxiv
as
Arxiv
import
Test.Tasty
...
...
@@ -86,6 +87,15 @@ tests = withResource pubmedSettings (const (pure ())) $ \getPubmedKey ->
,
testCase
"It supports '
\"
Haskell
\"
AND
\"
Idris
\"
'"
testIstex03
,
testCase
"It supports 'A OR B'"
testIstex04
]
,
testGroup
"HAL expression converter"
[
testCase
"It supports 'A'"
testHal01
,
testCase
"It supports '-A'"
testHal02_01
,
testCase
"It supports 'NOT A'"
testHal02_02
,
testCase
"It supports 'NOT (NOT A)'"
testHal02_03
,
testCase
"It supports '
\"
Haskell
\"
AND
\"
Idris
\"
'"
testHal03
,
testCase
"It supports 'A OR B'"
testHal04
,
testCase
"It supports 'Paris AND France AND history NOT (Texas AND history)'"
testHal05
]
,
testGroup
"PUBMED real queries (skipped if PUBMED_API_KEY env var not set)"
[
testCase
"It searches for
\"
Covid
\"
"
(
testPubMedCovid_01
getPubmedKey
)
,
testCase
"It searches for
\"
Covid
\"
AND
\"
Alzheimer
\"
"
(
testPubMedCovid_02
getPubmedKey
)
...
...
@@ -326,6 +336,39 @@ testIstex04 :: Assertion
testIstex04
=
withValidQuery
"A OR B"
$
\
q
->
Istex
.
getIstexQuery
EN
(
Istex
.
convertQuery
q
)
@?=
"language:eng+AND+abstract:A+OR+abstract:B"
--
-- Hal tests
--
testHal01
::
Assertion
testHal01
=
withValidQuery
"A"
$
\
q
->
assertBool
(
"Query not converted into expression: "
<>
show
@
(
BoolExpr
Term
)
(
fromCNF
$
getQuery
q
))
(
Hal
.
getHalQuery
(
Hal
.
convertQuery
q
)
==
"A"
)
testHal02_01
::
Assertion
testHal02_01
=
withValidQuery
"-A"
$
\
q
->
Hal
.
getHalQuery
(
Hal
.
convertQuery
q
)
@?=
"NOT+A"
testHal02_02
::
Assertion
testHal02_02
=
withValidQuery
"NOT A"
$
\
q
->
Hal
.
getHalQuery
(
Hal
.
convertQuery
q
)
@?=
"NOT+A"
testHal02_03
::
Assertion
testHal02_03
=
withValidQuery
"NOT (NOT A)"
$
\
q
->
Hal
.
getHalQuery
(
Hal
.
convertQuery
q
)
@?=
"A"
testHal03
::
Assertion
testHal03
=
withValidQuery
"
\"
Haskell
\"
AND
\"
Idris
\"
"
$
\
q
->
Hal
.
getHalQuery
(
Hal
.
convertQuery
q
)
@?=
"(Haskell+AND+Idris)"
testHal04
::
Assertion
testHal04
=
withValidQuery
"A OR B"
$
\
q
->
Hal
.
getHalQuery
(
Hal
.
convertQuery
q
)
@?=
"(A+OR+B)"
testHal05
::
Assertion
testHal05
=
withValidQuery
"Paris AND France AND history NOT (Texas AND history)"
$
\
q
->
Hal
.
getHalQuery
(
Hal
.
convertQuery
q
)
@?=
"(Paris+AND+(France+AND+(history+AND+(NOT+Texas+OR+NOT+history))))"
--
-- Integration tests against the real services
--
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment