Commit 1798aab8 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Modify Query nodes to be [QueryTerm]

This paves the way to a more effective search in corpus by modifying the
internal "leaves" type of the `Query` type from a `Term` to a new type
called `QueryTerm`, which can either be an extact match or a partial
match, which can be rendered into a proper Postgres TS query via the
":*" syntax.
parent c0ec7622
Pipeline #5735 passed with stages
in 100 minutes and 13 seconds
...@@ -20,11 +20,9 @@ module Gargantext.Core.Text.Corpus.API.Arxiv ...@@ -20,11 +20,9 @@ module Gargantext.Core.Text.Corpus.API.Arxiv
import Arxiv qualified as Arxiv import Arxiv qualified as Arxiv
import Conduit import Conduit
import Data.Text (unpack)
import Data.Text qualified as Text import Data.Text qualified as Text
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Query as Corpus import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude hiding (get) import Gargantext.Prelude hiding (get)
import Network.Api.Arxiv qualified as Ax import Network.Api.Arxiv qualified as Ax
...@@ -40,9 +38,12 @@ convertQuery q = mkQuery (interpretQuery q transformAST) ...@@ -40,9 +38,12 @@ convertQuery q = mkQuery (interpretQuery q transformAST)
, Ax.qStart = 0 , Ax.qStart = 0
, Ax.qItems = Arxiv.batchSize } , Ax.qItems = Arxiv.batchSize }
mergeTerms :: [QueryTerm] -> Maybe Ax.Expression
mergeTerms trms = Just $ Ax.Exp $ Ax.Abs [Text.unpack $ Text.unwords $ map renderQueryTerm trms]
-- Converts a 'BoolExpr' with 'Term's on the leaves into an Arxiv's expression. -- Converts a 'BoolExpr' with 'Term's on the leaves into an Arxiv's expression.
-- It yields 'Nothing' if the AST cannot be converted into a meaningful expression. -- It yields 'Nothing' if the AST cannot be converted into a meaningful expression.
transformAST :: BoolExpr Term -> Maybe Ax.Expression transformAST :: BoolExpr [QueryTerm] -> Maybe Ax.Expression
transformAST ast = case ast of transformAST ast = case ast of
BAnd sub (BConst (Negative term)) BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated. -- The second term become positive, so that it can be translated.
...@@ -64,11 +65,17 @@ convertQuery q = mkQuery (interpretQuery q transformAST) ...@@ -64,11 +65,17 @@ convertQuery q = mkQuery (interpretQuery q transformAST)
-- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone. -- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
BFalse BFalse
-> Nothing -> Nothing
BConst (Positive (Term term)) -- TODO(adinapoli) Apparently there is some fuzzy search going on under the hood
-> Just $ Ax.Exp $ Ax.Abs [unpack term] -- by Arxiv (see for example https://stackoverflow.com/questions/69003677/arxiv-api-problem-with-searching-for-two-keywords)
-- so it should be sufficient to search for the stemmed term. However, for simplicity and
-- backward compat, at the moment we don't stem.
BConst (Positive terms)
-> mergeTerms terms
-- We can handle negatives via `ANDNOT` with itself. -- We can handle negatives via `ANDNOT` with itself.
BConst (Negative (Term term)) -- TODO(adinapoli) Ditto as per the 'Positive' case (re partial matches)
-> Just $ Ax.AndNot (Ax.Exp $ Ax.Abs [unpack term]) (Ax.Exp $ Ax.Abs [unpack term]) BConst (Negative terms)
-> let term = Text.unpack $ Text.unwords (map renderQueryTerm terms)
in Just $ Ax.AndNot (Ax.Exp $ Ax.Abs [term]) (Ax.Exp $ Ax.Abs [term])
-- | TODO put default pubmed query in gargantext.ini -- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs -- by default: 10K docs
......
...@@ -10,6 +10,7 @@ Portability : POSIX ...@@ -10,6 +10,7 @@ Portability : POSIX
-} -}
{-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE LambdaCase #-}
module Gargantext.Core.Text.Corpus.API.Pubmed module Gargantext.Core.Text.Corpus.API.Pubmed
( get ( get
...@@ -25,7 +26,6 @@ import Data.Text qualified as Text ...@@ -25,7 +26,6 @@ import Data.Text qualified as Text
import Data.Text.Encoding qualified as TE import Data.Text.Encoding qualified as TE
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Query as Corpus import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude hiding (get) import Gargantext.Prelude hiding (get)
import Network.HTTP.Types.URI (EscapeItem(..), renderQueryPartialEscape) import Network.HTTP.Types.URI (EscapeItem(..), renderQueryPartialEscape)
...@@ -60,7 +60,11 @@ getESearch (ESearch items) = ...@@ -60,7 +60,11 @@ getESearch (ESearch items) =
convertQuery :: Corpus.Query -> ESearch convertQuery :: Corpus.Query -> ESearch
convertQuery q = ESearch (interpretQuery q transformAST) convertQuery q = ESearch (interpretQuery q transformAST)
where where
transformAST :: BoolExpr Term -> [EscapeItem]
mergeTerms :: [QueryTerm] -> [EscapeItem]
mergeTerms trms = [QE $ TE.encodeUtf8 (Text.unwords $ map renderQueryTerm trms)]
transformAST :: BoolExpr [QueryTerm] -> [EscapeItem]
transformAST ast = case ast of transformAST ast = case ast of
BAnd sub (BConst (Negative term)) BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated. -- The second term become positive, so that it can be translated.
...@@ -81,11 +85,12 @@ convertQuery q = ESearch (interpretQuery q transformAST) ...@@ -81,11 +85,12 @@ convertQuery q = ESearch (interpretQuery q transformAST)
-- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone. -- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
BFalse BFalse
-> mempty -> mempty
BConst (Positive (Term term)) BConst (Positive terms)
-> [QE (TE.encodeUtf8 term)] -> mergeTerms terms
-- TODO(adinapoli) Support partial match queries
-- We can handle negatives via `ANDNOT` with itself. -- We can handle negatives via `ANDNOT` with itself.
BConst (Negative (Term term)) BConst (Negative terms)
-> [QN "NOT+", QE (TE.encodeUtf8 term)] -> [QN "NOT+"] <> mergeTerms terms
get :: Text get :: Text
-> Corpus.RawQuery -> Corpus.RawQuery
......
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Text.Corpus.Query ( module Gargantext.Core.Text.Corpus.Query (
Query -- * opaque Query -- * opaque
, RawQuery(..) , RawQuery(..)
, Limit(..) , Limit(..)
, QueryTerm(..)
, getQuery , getQuery
, parseQuery , parseQuery
, mapQuery , mapQuery
, renderQuery , renderQuery
, renderQueryTerm
, interpretQuery , interpretQuery
, ExternalAPIs(..) , ExternalAPIs(..)
, module BoolExpr , module BoolExpr
...@@ -49,13 +52,32 @@ newtype Limit = Limit { getLimit :: Int } ...@@ -49,13 +52,32 @@ newtype Limit = Limit { getLimit :: Int }
, Aeson.FromJSON, Aeson.ToJSON , Aeson.FromJSON, Aeson.ToJSON
, Swagger.ToParamSchema, Swagger.ToSchema) , Swagger.ToParamSchema, Swagger.ToSchema)
-- | A /query/ term, i.e. a node of the query expression tree which can be
-- either a Gargantext 'Term' (i.e. just a textual value) or something else,
-- like a partial match (i.e. the user is asking to perform a search that would
-- match only a suffix of a word).
data QueryTerm
= QT_exact_match Term
| QT_partial_match Term
deriving (Show, Eq, Ord)
instance IsString QueryTerm where
fromString input = case P.runParser queryTermToken () "Corpus.Query.fromString" input of
Left _ -> QT_exact_match (Term $ T.pack input)
Right [qt] -> qt
Right _ -> QT_exact_match (Term $ T.pack input)
renderQueryTerm :: QueryTerm -> T.Text
renderQueryTerm (QT_exact_match (Term t)) = t
renderQueryTerm (QT_partial_match (Term t)) = t
-- | An opaque wrapper around a 'Query' type which can be parsed from a boolean -- | An opaque wrapper around a 'Query' type which can be parsed from a boolean
-- expression like (a AND b) OR c, and which can be interpreted in many ways -- expression like (a AND b) OR c, and which can be interpreted in many ways
-- according to the particular service we are targeting. -- according to the particular service we are targeting.
newtype Query = Query { getQuery :: (BoolExpr.CNF Term) } newtype Query = Query { getQuery :: (BoolExpr.CNF [QueryTerm]) }
deriving Show deriving Show
interpretQuery :: Query -> (BoolExpr.BoolExpr Term -> ast) -> ast interpretQuery :: Query -> (BoolExpr.BoolExpr [QueryTerm] -> ast) -> ast
interpretQuery (Query q) transform = transform . simplify . BoolExpr.fromCNF $ q interpretQuery (Query q) transform = transform . simplify . BoolExpr.fromCNF $ q
simplify :: BoolExpr.BoolExpr a -> BoolExpr.BoolExpr a simplify :: BoolExpr.BoolExpr a -> BoolExpr.BoolExpr a
...@@ -78,22 +100,36 @@ simplify expr = case expr of ...@@ -78,22 +100,36 @@ simplify expr = case expr of
BFalse -> BFalse BFalse -> BFalse
BConst signed -> BConst signed BConst signed -> BConst signed
unsafeMkQuery :: BoolExpr.BoolExpr Term -> Query unsafeMkQuery :: BoolExpr.BoolExpr [QueryTerm] -> Query
unsafeMkQuery = Query . BoolExpr.boolTreeToCNF unsafeMkQuery = Query . BoolExpr.boolTreeToCNF
termToken :: CharParser st Term queryTermToken :: CharParser st [QueryTerm]
termToken = Term <$> (try (T.pack <$> BoolExpr.identifier) <|> (between dubQuote dubQuote multipleTerms)) queryTermToken = do
map mkQueryTerm <$> termToken
where
mkQueryTerm :: Term -> QueryTerm
mkQueryTerm (Term (T.unpack -> t)) =
case t of
'"' : '~' : rest
-> QT_partial_match (Term $ T.pack $ '"' : rest)
'~' : rest
-> QT_partial_match (Term $ T.pack $ '"' : rest)
_
-> QT_exact_match (Term $ T.pack t)
termToken :: CharParser st [Term]
termToken = (try ((:[]) . Term . T.pack <$> BoolExpr.identifier) <|> (between dubQuote dubQuote multipleTerms))
where where
dubQuote = BoolExpr.symbol "\"" dubQuote = BoolExpr.symbol "\""
multipleTerms = T.intercalate " " . map T.pack <$> sepBy BoolExpr.identifier BoolExpr.whiteSpace multipleTerms = map (Term . T.pack) <$> sepBy BoolExpr.identifier BoolExpr.whiteSpace
-- | Parses an input 'Text' into a 'Query', reporting an error if it fails. -- | Parses an input 'Text' into a 'Query', reporting an error if it fails.
parseQuery :: RawQuery -> Either String Query parseQuery :: RawQuery -> Either String Query
parseQuery (RawQuery txt) = bimap show (Query . BoolExpr.boolTreeToCNF) $ parseQuery (RawQuery txt) = bimap show (Query . BoolExpr.boolTreeToCNF) $
P.runParser (BoolExpr.parseBoolExpr termToken) () "Corpus.Query" (T.unpack txt) P.runParser (BoolExpr.parseBoolExpr queryTermToken) () "Corpus.Query" (T.unpack txt)
renderQuery :: Query -> RawQuery renderQuery :: Query -> RawQuery
renderQuery (Query cnf) = RawQuery . T.pack $ BoolExpr.boolExprPrinter (showsPrec 0) (BoolExpr.fromCNF cnf) "" renderQuery (Query cnf) = RawQuery . T.pack $ BoolExpr.boolExprPrinter (showsPrec 0) (BoolExpr.fromCNF cnf) ""
mapQuery :: (Term -> Term) -> Query -> Query mapQuery :: (QueryTerm -> QueryTerm) -> Query -> Query
mapQuery f = Query . fmap f . getQuery mapQuery f = Query . fmap (map f) . getQuery
...@@ -61,7 +61,41 @@ import Opaleye qualified as O hiding (Order) ...@@ -61,7 +61,41 @@ import Opaleye qualified as O hiding (Order)
queryToTsSearch :: API.Query -> Field SqlTSQuery queryToTsSearch :: API.Query -> Field SqlTSQuery
queryToTsSearch q = sqlToTSQuery $ T.unpack $ (API.interpretQuery q transformAST) queryToTsSearch q = sqlToTSQuery $ T.unpack $ (API.interpretQuery q transformAST)
where where
transformAST :: BoolExpr Term -> T.Text
-- It's important to understand how things work under the hood: When we perform
-- a search, we do it on a /ts vector/ in Postgres, which is already stemmed in
-- lexemes. For example, this:
--
-- SELECT to_tsvector('Effects on postpartum on vitamins and minerals in women');
--
-- yields:
--
-- 'effect':1 'miner':7 'postpartum':3 'vitamin':5 'women':9
--
-- As you can see, minimum processing has happened: plurals have been stripped and
-- what it looks like the Porter stemming has been applied (we get 'miner' instead
-- of the original /mineral/, for example.
--
-- Therefore, in case of exact match searches, we need to perform stemming /regardless/,
-- and this stemming should ideally match the one performed by Postgres.
--
-- Now, if the user is doing a partial match search (like \"~postpartum\" for example)
-- then we need to stem /AND/ use the \":*\" operator to perform a
-- sort of fuzzy search. Compare the followings:
--
-- SELECT to_tsvector('Effects on postpartum on vitamins and minerals in women') @@ to_tsquery('postpartum');
-- SELECT to_tsvector('Effects on postpartum on vitamins and minerals in women') @@ to_tsquery('postpart');
-- SELECT to_tsvector('Effects on postpartum on vitamins and minerals in women') @@ to_tsquery('postpart:*');
--
-- The first will match, the second won't, the third will.
renderQueryTerms :: [API.QueryTerm] -> T.Text
renderQueryTerms trms = T.intercalate " & " $ trms <&> \case
API.QT_exact_match (Term term)
-> stem EN GargPorterAlgorithm term
API.QT_partial_match (Term term)
-> stem EN GargPorterAlgorithm term <> ":*"
transformAST :: BoolExpr [API.QueryTerm] -> T.Text
transformAST ast = case ast of transformAST ast = case ast of
BAnd sub1 sub2 BAnd sub1 sub2
-> " (" <> transformAST sub1 <> " & " <> transformAST sub2 <> ") " -> " (" <> transformAST sub1 <> " & " <> transformAST sub2 <> ") "
...@@ -77,11 +111,11 @@ queryToTsSearch q = sqlToTSQuery $ T.unpack $ (API.interpretQuery q transformAST ...@@ -77,11 +111,11 @@ queryToTsSearch q = sqlToTSQuery $ T.unpack $ (API.interpretQuery q transformAST
-- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone. -- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
BFalse BFalse
-> T.empty -> T.empty
BConst (Positive (Term term)) BConst (Positive queryTerms)
-> T.intercalate " & " $ T.words term -> renderQueryTerms queryTerms
-- We can handle negatives via `ANDNOT` with itself. -- We can handle negatives via `ANDNOT` with itself.
BConst (Negative (Term term)) BConst (Negative queryTerms)
-> "!" <> term -> "!" <> renderQueryTerms queryTerms
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -181,7 +215,7 @@ searchInCorpus :: HasDBid NodeType ...@@ -181,7 +215,7 @@ searchInCorpus :: HasDBid NodeType
searchInCorpus cId t q o l order = runOpaQuery searchInCorpus cId t q o l order = runOpaQuery
$ filterWith o l order $ filterWith o l order
$ queryInCorpus cId t $ queryInCorpus cId t
$ API.mapQuery (Term . stem EN GargPorterAlgorithm . getTerm) q $ q
searchCountInCorpus :: HasDBid NodeType searchCountInCorpus :: HasDBid NodeType
=> CorpusId => CorpusId
...@@ -190,7 +224,7 @@ searchCountInCorpus :: HasDBid NodeType ...@@ -190,7 +224,7 @@ searchCountInCorpus :: HasDBid NodeType
-> DBCmd err Int -> DBCmd err Int
searchCountInCorpus cId t q = runCountOpaQuery searchCountInCorpus cId t q = runCountOpaQuery
$ queryInCorpus cId t $ queryInCorpus cId t
$ API.mapQuery (Term . stem EN GargPorterAlgorithm . getTerm) q $ q
queryInCorpus :: HasDBid NodeType queryInCorpus :: HasDBid NodeType
=> CorpusId => CorpusId
...@@ -233,7 +267,7 @@ searchInCorpusWithContacts cId aId q o l _order = ...@@ -233,7 +267,7 @@ searchInCorpusWithContacts cId aId q o l _order =
$ offset' o $ offset' o
$ orderBy (desc _fp_score) $ orderBy (desc _fp_score)
$ selectGroup cId aId $ selectGroup cId aId
$ API.mapQuery (Term . stem EN GargPorterAlgorithm . getTerm) q $ q
selectGroup :: HasDBid NodeType selectGroup :: HasDBid NodeType
=> CorpusId => CorpusId
......
...@@ -81,7 +81,7 @@ tests = withResource pubmedSettings (const (pure ())) $ \getPubmedKey -> ...@@ -81,7 +81,7 @@ tests = withResource pubmedSettings (const (pure ())) $ \getPubmedKey ->
-- | Checks that the 'RawQuery' can be translated into the expected 'BoolExpr' form, -- | Checks that the 'RawQuery' can be translated into the expected 'BoolExpr' form,
-- by also checking that both renders back to the initial 'RawQuery'. -- by also checking that both renders back to the initial 'RawQuery'.
translatesInto :: RawQuery -> BoolExpr Term -> Property translatesInto :: RawQuery -> BoolExpr [QueryTerm] -> Property
(translatesInto) raw boolExpr = (translatesInto) raw boolExpr =
let parsed = parseQuery raw let parsed = parseQuery raw
expected = Right (unsafeMkQuery boolExpr) expected = Right (unsafeMkQuery boolExpr)
...@@ -89,70 +89,69 @@ translatesInto :: RawQuery -> BoolExpr Term -> Property ...@@ -89,70 +89,69 @@ translatesInto :: RawQuery -> BoolExpr Term -> Property
(renderQuery <$> parsed) === (renderQuery <$> expected) (renderQuery <$> parsed) === (renderQuery <$> expected)
testParse01 :: Property testParse01 :: Property
testParse01 = "A OR B" `translatesInto` (BConst (Positive "A") `BOr` BConst (Positive "B")) testParse01 = "A OR B" `translatesInto` (BConst (Positive ["A"]) `BOr` BConst (Positive ["B"]))
testParse02 :: Property testParse02 :: Property
testParse02 = "A AND B" `translatesInto` (BConst (Positive "A") `BAnd` BConst (Positive "B")) testParse02 = "A AND B" `translatesInto` (BConst (Positive ["A"]) `BAnd` BConst (Positive ["B"]))
testParse03 :: Property testParse03 :: Property
testParse03 = "-A" `translatesInto` (BConst (Negative "A")) testParse03 = "-A" `translatesInto` (BConst (Negative ["A"]))
testParse03_01 :: Property testParse03_01 :: Property
testParse03_01 = "NOT A" `translatesInto` (BConst (Negative "A")) testParse03_01 = "NOT A" `translatesInto` (BConst (Negative ["A"]))
testParse04 :: Property testParse04 :: Property
testParse04 = "A -B" `translatesInto` (BConst (Positive "A") `BAnd` BConst (Negative "B")) testParse04 = "A -B" `translatesInto` (BConst (Positive ["A"]) `BAnd` BConst (Negative ["B"]))
-- Both 'A -B' and 'A AND -B' desugars into the same form. -- Both 'A -B' and 'A AND -B' desugars into the same form.
testParse04_01 :: Property testParse04_01 :: Property
testParse04_01 = "A AND -B" `translatesInto` (BConst (Positive "A") `BAnd` BConst (Negative "B")) testParse04_01 = "A AND -B" `translatesInto` (BConst (Positive ["A"]) `BAnd` BConst (Negative ["B"]))
testParse05 :: Property testParse05 :: Property
testParse05 = "A AND B -C" `translatesInto` ((BConst (Positive "A") `BAnd` BConst (Positive "B")) `BAnd` BConst (Negative "C")) testParse05 = "A AND B -C" `translatesInto` ((BConst (Positive ["A"]) `BAnd` BConst (Positive ["B"])) `BAnd` BConst (Negative ["C"]))
testParse05_01 :: Property testParse05_01 :: Property
testParse05_01 = testParse05_01 =
"A AND (B -C)" `translatesInto` (BConst (Positive "A") `BAnd` (BConst (Positive "B") `BAnd` BConst (Negative "C"))) "A AND (B -C)" `translatesInto` (BConst (Positive ["A"]) `BAnd` (BConst (Positive ["B"]) `BAnd` BConst (Negative ["C"])))
testParse06 :: Property testParse06 :: Property
testParse06 = testParse06 =
translatesInto "(A OR B OR NOT C) AND (D OR E OR F) -(G OR H OR I)" translatesInto "(A OR B OR NOT C) AND (D OR E OR F) -(G OR H OR I)"
( (
( (
((BConst (Positive "A") `BOr` (BConst (Positive "B"))) `BOr` (BConst (Negative "C"))) ((BConst (Positive ["A"]) `BOr` (BConst (Positive ["B"]))) `BOr` (BConst (Negative ["C"])))
`BAnd` `BAnd`
((BConst (Positive "D") `BOr` (BConst (Positive "E"))) `BOr` (BConst (Positive "F"))) ((BConst (Positive ["D"]) `BOr` (BConst (Positive ["E"]))) `BOr` (BConst (Positive ["F"])))
) )
`BAnd` BNot ( `BAnd` BNot (
((BConst (Positive "G") `BOr` (BConst (Positive "H"))) `BOr` (BConst (Positive "I"))) ((BConst (Positive ["G"]) `BOr` (BConst (Positive ["H"]))) `BOr` (BConst (Positive ["I"])))
) )
) )
testParse07 :: Property testParse07 :: Property
testParse07 = testParse07 =
translatesInto "\"Haskell\" AND \"Agda\"" translatesInto "\"Haskell\" AND \"Agda\""
((BConst (Positive "Haskell") `BAnd` (BConst (Positive "Agda")))) ((BConst (Positive ["Haskell"]) `BAnd` (BConst (Positive ["Agda"]))))
testParse07_01 :: Property testParse07_01 :: Property
testParse07_01 = testParse07_01 =
translatesInto "Haskell AND Agda" translatesInto "Haskell AND Agda"
((BConst (Positive "Haskell") `BAnd` (BConst (Positive "Agda")))) ((BConst (Positive ["Haskell"]) `BAnd` (BConst (Positive ["Agda"]))))
testParse07_02 :: Property testParse07_02 :: Property
testParse07_02 = testParse07_02 =
translatesInto "Raphael" translatesInto "Raphael"
((BConst (Positive "Raphael"))) ((BConst (Positive ["Raphael"])))
testParse07_03 :: Property testParse07_03 :: Property
testParse07_03 = testParse07_03 =
translatesInto "Niki" ((BConst (Positive "Niki"))) .&&. translatesInto "Niki" ((BConst (Positive ["Niki"]))) .&&.
translatesInto "Ajeje" ((BConst (Positive "Ajeje"))) .&&. translatesInto "Ajeje" ((BConst (Positive ["Ajeje"]))) .&&.
translatesInto "Orf" ((BConst (Positive "Orf"))) translatesInto "Orf" ((BConst (Positive ["Orf"])))
testWordsIntoConst :: Assertion testWordsIntoConst :: Assertion
testWordsIntoConst = testWordsIntoConst =
let (expected :: BoolExpr Term) = let (expected :: BoolExpr [QueryTerm]) = fromCNF (boolTreeToCNF @[QueryTerm] $ (BAnd (BOr (BConst (Positive [QT_exact_match "The",QT_exact_match "Art",QT_exact_match "of",QT_exact_match "Computer",QT_exact_match "Programming"])) BFalse) (BAnd (BOr (BConst (Positive [QT_exact_match "Conceptual",QT_exact_match "Mathematics"])) BFalse) BTrue)))
fromCNF (boolTreeToCNF @Term $ (BConst (Positive "The Art of Computer Programming") `BAnd` (BConst (Positive "Conceptual Mathematics"))))
in case parseQuery "\"The Art of Computer Programming\" AND \"Conceptual Mathematics\"" of in case parseQuery "\"The Art of Computer Programming\" AND \"Conceptual Mathematics\"" of
Left err Left err
-> assertBool err False -> assertBool err False
...@@ -168,43 +167,43 @@ withValidQuery rawQuery onValidParse = do ...@@ -168,43 +167,43 @@ withValidQuery rawQuery onValidParse = do
testArxiv01_01 :: Assertion testArxiv01_01 :: Assertion
testArxiv01_01 = withValidQuery "A AND B" $ \q -> testArxiv01_01 = withValidQuery "A AND B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"])))) (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv01_02 :: Assertion testArxiv01_02 :: Assertion
testArxiv01_02 = withValidQuery "\"Haskell\" AND \"Agda\"" $ \q -> testArxiv01_02 = withValidQuery "\"Haskell\" AND \"Agda\"" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["Haskell"]) ((Arxiv.Exp $ Arxiv.Abs ["Agda"])))) (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["Haskell"]) ((Arxiv.Exp $ Arxiv.Abs ["Agda"]))))
testArxiv02 :: Assertion testArxiv02 :: Assertion
testArxiv02 = withValidQuery "A OR B" $ \q -> testArxiv02 = withValidQuery "A OR B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"])))) (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv03_01 :: Assertion testArxiv03_01 :: Assertion
testArxiv03_01 = withValidQuery "A AND NOT B" $ \q -> testArxiv03_01 = withValidQuery "A AND NOT B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"])))) (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv03_02 :: Assertion testArxiv03_02 :: Assertion
testArxiv03_02 = withValidQuery "A AND -B" $ \q -> testArxiv03_02 = withValidQuery "A AND -B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"])))) (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
-- Double negation get turned into positive. -- Double negation get turned into positive.
testArxiv04_01 :: Assertion testArxiv04_01 :: Assertion
testArxiv04_01 = withValidQuery "A AND NOT (NOT B)" $ \q -> testArxiv04_01 = withValidQuery "A AND NOT (NOT B)" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"])))) (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv04_02 :: Assertion testArxiv04_02 :: Assertion
testArxiv04_02 = withValidQuery "A AND NOT (NOT (NOT B))" $ \q -> testArxiv04_02 = withValidQuery "A AND NOT (NOT (NOT B))" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"])))) (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv05 :: Assertion testArxiv05 :: Assertion
testArxiv05 = withValidQuery "A OR NOT B" $ \q -> testArxiv05 = withValidQuery "A OR NOT B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just ( (Arxiv.qExp (Arxiv.convertQuery q) == Just (
Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"]) Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"])
(Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["B"]) (Arxiv.Exp $ Arxiv.Abs ["B"])) (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["B"]) (Arxiv.Exp $ Arxiv.Abs ["B"]))
...@@ -213,7 +212,7 @@ testArxiv05 = withValidQuery "A OR NOT B" $ \q -> ...@@ -213,7 +212,7 @@ testArxiv05 = withValidQuery "A OR NOT B" $ \q ->
testArxiv06 :: Assertion testArxiv06 :: Assertion
testArxiv06 = withValidQuery "-A" $ \q -> testArxiv06 = withValidQuery "-A" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just ( (Arxiv.qExp (Arxiv.convertQuery q) == Just (
Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) (Arxiv.Exp $ Arxiv.Abs ["A"]) Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) (Arxiv.Exp $ Arxiv.Abs ["A"])
) )
...@@ -225,7 +224,7 @@ testArxiv06 = withValidQuery "-A" $ \q -> ...@@ -225,7 +224,7 @@ testArxiv06 = withValidQuery "-A" $ \q ->
testPubMed01 :: Assertion testPubMed01 :: Assertion
testPubMed01 = withValidQuery "A" $ \q -> testPubMed01 = withValidQuery "A" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Pubmed.getESearch (Pubmed.convertQuery q) == "A") (Pubmed.getESearch (Pubmed.convertQuery q) == "A")
testPubMed02_01 :: Assertion testPubMed02_01 :: Assertion
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment