Commit cc436c77 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Support HAL

parent cfc1aec2
......@@ -84,6 +84,7 @@ library
Gargantext.Core.Text.Corpus.API
Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.EPO
Gargantext.Core.Text.Corpus.API.Hal
Gargantext.Core.Text.Corpus.API.Isidore
Gargantext.Core.Text.Corpus.API.Istex
Gargantext.Core.Text.Corpus.API.OpenAlex
......@@ -230,7 +231,6 @@ library
Gargantext.Core.Methods.Similarities.Distributional
Gargantext.Core.Statistics
Gargantext.Core.Text.Convert
Gargantext.Core.Text.Corpus.API.Hal
Gargantext.Core.Text.Corpus.Parsers.Book
Gargantext.Core.Text.Corpus.Parsers.Date.Attoparsec
Gargantext.Core.Text.Corpus.Parsers.FrameWrite
......
......@@ -65,8 +65,9 @@ get externalAPI la q mPubmedAPIKey epoAuthKey epoAPIUrl limit = do
Arxiv -> runExceptT $ do
corpusQuery <- parse_query
ExceptT $ first ExternalAPIError <$> (Arxiv.get la corpusQuery limit)
HAL ->
first ExternalAPIError <$> HAL.getC (toISO639 la) (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
HAL -> runExceptT $ do
corpusQuery <- parse_query
ExceptT $ first ExternalAPIError <$> HAL.getC (toISO639 la) corpusQuery limit
IsTex -> runExceptT $ do
corpusQuery <- parse_query
ExceptT $ first ExternalAPIError <$> ISTEX.get la corpusQuery limit
......
......@@ -9,7 +9,18 @@ Portability : POSIX
-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Text.Corpus.API.Hal
( getC
, get
-- * Internal functions for testing
, getHalQuery
, HalQuery(..)
, convertQuery
)
where
import Conduit
......@@ -17,28 +28,81 @@ import Data.Either
import Data.LanguageCodes qualified as ISO639
import Data.Map.Strict qualified as Map
import Data.Maybe
import Data.Monoid
import Data.Semigroup
import Data.Text (pack, intercalate)
import Data.Text qualified as Text
import Data.Text.Encoding qualified as TE
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (intercalate)
import Gargantext.Prelude hiding (intercalate, get)
import HAL qualified as HAL
import HAL.Client qualified as HAL
import HAL.Doc.Corpus qualified as HAL
import Network.HTTP.Types.URI (EscapeItem(..), renderQueryPartialEscape)
import Servant.Client (ClientError)
get :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO [HyperdataDocument]
get la q ml = do
eDocs <- HAL.getMetadataWith [q] (Just 0) (fromIntegral <$> ml) la
newtype HalQuery = HalQuery { _HalQuery :: [EscapeItem] }
deriving stock (Show, Eq)
deriving newtype (Semigroup, Monoid)
-- | Returns an /url encoded/ query ready to be sent to Hal.
getHalQuery :: HalQuery -> HAL.Query
getHalQuery (HalQuery items) =
Text.replace "q=" "" . TE.decodeUtf8 . renderQueryPartialEscape False $ [
("q", items)
]
convertQuery :: Corpus.Query -> HalQuery
convertQuery q = HalQuery (interpretQuery q transformAST)
where
lParen :: [EscapeItem]
lParen = [QN "("]
rParen :: [EscapeItem]
rParen = [QN ")"]
transformAST :: BoolExpr Term -> [EscapeItem]
transformAST ast = case ast of
BAnd sub1 sub2
-> lParen <> transformAST sub1 <> [QN "+AND+"] <> transformAST sub2 <> rParen
BOr sub1 sub2
-> lParen <> transformAST sub1 <> [QN "+OR+"] <> transformAST sub2 <> rParen
BNot (BConst (Negative term))
-> transformAST (BConst (Positive term)) -- double negation
BNot sub
-> [QN "NOT+"] <> lParen <> transformAST sub <> rParen
-- BTrue cannot happen is the query parser doesn't support parsing 'TRUE' alone.
BTrue
-> mempty
-- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
BFalse
-> mempty
-- Maps the positive terms to contain 'abstract:'
BConst (Positive (Term term))
-> [QE (TE.encodeUtf8 term)]
BConst (Negative sub)
-> [QN "NOT+"] <> transformAST (BConst (Positive sub))
get :: Maybe ISO639.ISO639_1
-> Corpus.Query
-> Maybe Corpus.Limit
-> IO [HyperdataDocument]
get la (convertQuery -> q) ml = do
eDocs <- HAL.getMetadataWith [getHalQuery q] (Just 0) (fromIntegral . getLimit <$> ml) la
either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) eDocs
getC :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
getC la q ml = do
eRes <- HAL.getMetadataWithC [q] (Just 0) (fromIntegral <$> ml) la
getC :: Maybe ISO639.ISO639_1
-> Corpus.Query
-> Maybe Corpus.Limit
-> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
getC la (convertQuery -> q) ml = do
eRes <- HAL.getMetadataWithC [getHalQuery q] (Just 0) (fromIntegral . getLimit <$> ml) la
pure $ (\(len, docsC) -> (len, docsC .| mapMC (toDoc' la))) <$> eRes
-- case eRes of
-- Left err -> panic $ pack $ show err
-- Right (len, docsC) -> pure (len, docsC .| mapMC (toDoc' la))
toDoc' :: Maybe ISO639.ISO639_1 -> HAL.Corpus -> IO HyperdataDocument
toDoc' la (HAL.Corpus { .. }) = do
......
......@@ -59,11 +59,6 @@ convertQuery q = IsidoreQuery (interpretQuery q transformAST)
-- It seems like Isidore supports a similar query language to Pubmed.
transformAST :: BoolExpr Term -> [EscapeItem]
transformAST ast = case ast of
BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated.
-> (transformAST sub) <> [QN "+AND+NOT+"] <> transformAST (BConst (Positive term))
BAnd term1 (BNot term2)
-> transformAST term1 <> [QN "+AND+NOT+"] <> transformAST term2
BAnd sub1 sub2
-> transformAST sub1 <> [QN "+AND+"] <> transformAST sub2
BOr sub1 sub2
......
......@@ -67,11 +67,6 @@ convertQuery q = IstexQuery (interpretQuery q transformAST)
where
transformAST :: BoolExpr Term -> [EscapeItem]
transformAST ast = case ast of
BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated.
-> (transformAST sub) <> [QN "+AND+NOT+"] <> transformAST (BConst (Positive term))
BAnd term1 (BNot term2)
-> transformAST term1 <> [QN "+AND+NOT+"] <> transformAST term2
BAnd sub1 sub2
-> transformAST sub1 <> [QN "+AND+"] <> transformAST sub2
BOr sub1 sub2
......
......@@ -19,6 +19,7 @@ import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
import qualified Gargantext.Core.Text.Corpus.API.Pubmed as Pubmed
import qualified Gargantext.Core.Text.Corpus.API.Isidore as Isidore
import qualified Gargantext.Core.Text.Corpus.API.Istex as Istex
import qualified Gargantext.Core.Text.Corpus.API.Hal as Hal
import qualified Network.Api.Arxiv as Arxiv
import Test.Tasty
......@@ -86,6 +87,15 @@ tests = withResource pubmedSettings (const (pure ())) $ \getPubmedKey ->
, testCase "It supports '\"Haskell\" AND \"Idris\"'" testIstex03
, testCase "It supports 'A OR B'" testIstex04
]
, testGroup "HAL expression converter" [
testCase "It supports 'A'" testHal01
, testCase "It supports '-A'" testHal02_01
, testCase "It supports 'NOT A'" testHal02_02
, testCase "It supports 'NOT (NOT A)'" testHal02_03
, testCase "It supports '\"Haskell\" AND \"Idris\"'" testHal03
, testCase "It supports 'A OR B'" testHal04
, testCase "It supports 'Paris AND France AND history NOT (Texas AND history)'" testHal05
]
, testGroup "PUBMED real queries (skipped if PUBMED_API_KEY env var not set)" [
testCase "It searches for \"Covid\"" (testPubMedCovid_01 getPubmedKey)
, testCase "It searches for \"Covid\" AND \"Alzheimer\"" (testPubMedCovid_02 getPubmedKey)
......@@ -326,6 +336,39 @@ testIstex04 :: Assertion
testIstex04 = withValidQuery "A OR B" $ \q ->
Istex.getIstexQuery EN (Istex.convertQuery q) @?= "language:eng+AND+abstract:A+OR+abstract:B"
--
-- Hal tests
--
testHal01 :: Assertion
testHal01 = withValidQuery "A" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Hal.getHalQuery (Hal.convertQuery q) == "A")
testHal02_01 :: Assertion
testHal02_01 = withValidQuery "-A" $ \q ->
Hal.getHalQuery (Hal.convertQuery q) @?= "NOT+A"
testHal02_02 :: Assertion
testHal02_02 = withValidQuery "NOT A" $ \q ->
Hal.getHalQuery (Hal.convertQuery q) @?= "NOT+A"
testHal02_03 :: Assertion
testHal02_03 = withValidQuery "NOT (NOT A)" $ \q ->
Hal.getHalQuery (Hal.convertQuery q) @?= "A"
testHal03 :: Assertion
testHal03 = withValidQuery "\"Haskell\" AND \"Idris\"" $ \q ->
Hal.getHalQuery (Hal.convertQuery q) @?= "(Haskell+AND+Idris)"
testHal04 :: Assertion
testHal04 = withValidQuery "A OR B" $ \q ->
Hal.getHalQuery (Hal.convertQuery q) @?= "(A+OR+B)"
testHal05 :: Assertion
testHal05 = withValidQuery "Paris AND France AND history NOT (Texas AND history)" $ \q ->
Hal.getHalQuery (Hal.convertQuery q) @?= "(Paris+AND+(France+AND+(history+AND+(NOT+Texas+OR+NOT+history))))"
--
-- Integration tests against the real services
--
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment