Commit d8612190 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Add Arxiv tests

This commit adds a bunch of Arxiv tests to demonstrate we can
successfully transform our `Query` type into the Arxiv AST.
parent 9173f8b3
......@@ -53,6 +53,7 @@ library
Gargantext.Core.Text
Gargantext.Core.Text.Context
Gargantext.Core.Text.Corpus.API
Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.Query
Gargantext.Core.Text.Corpus.Parsers
Gargantext.Core.Text.Corpus.Parsers.CSV
......@@ -186,7 +187,6 @@ library
Gargantext.Core.NodeStoryFile
Gargantext.Core.Statistics
Gargantext.Core.Text.Convert
Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.Hal
Gargantext.Core.Text.Corpus.API.Isidore
Gargantext.Core.Text.Corpus.API.Istex
......@@ -902,6 +902,7 @@ test-suite garg-test
, boolexpr
, bytestring
, containers
, crawlerArxiv
, duckling
, extra
, gargantext
......
......@@ -81,6 +81,7 @@ library:
- Gargantext.Core.Text
- Gargantext.Core.Text.Context
- Gargantext.Core.Text.Corpus.API
- Gargantext.Core.Text.Corpus.API.Arxiv
- Gargantext.Core.Text.Corpus.Query
- Gargantext.Core.Text.Corpus.Parsers
- Gargantext.Core.Text.Corpus.Parsers.CSV
......@@ -520,6 +521,7 @@ tests:
- boolexpr
- bytestring
- containers
- crawlerArxiv
- duckling
- gargantext
- gargantext-prelude
......
......@@ -7,6 +7,8 @@ import Data.BoolExpr
import Gargantext.Core.Text.Corpus.Query
import Gargantext.Core.Types
import Prelude
import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
import qualified Network.Api.Arxiv as Arxiv
import Test.Tasty
import Test.Tasty.HUnit
......@@ -24,7 +26,18 @@ tests = testGroup "Boolean Query Engine" [
, testProperty "Parses 'A AND (B -C)' (right associative)" testParse05_01
, testProperty "Parses (A OR B OR NOT C) AND (D OR E OR F) -(G OR H OR I)" testParse06
, testCase "Parses words into a single constant" testWordsIntoConst
, testGroup "Arxiv expression converter" [
testCase "It supports 'A AND B'" testArxiv01_01
, testCase "It supports '\"Haskell\" AND \"Agda\"'" testArxiv01_02
, testCase "It supports 'A OR B'" testArxiv02
, testCase "It supports 'A AND NOT B'" testArxiv03_01
, testCase "It supports 'A AND -B'" testArxiv03_02
, testCase "It supports 'A AND -B'" testArxiv03_02
, testCase "It supports 'A AND NOT (NOT B)'" testArxiv04_01
, testCase "It supports 'A AND NOT (NOT (NOT B))'" testArxiv04_02
, testCase "It supports 'A OR NOT B'" testArxiv05
, testCase "It supports '-A'" testArxiv06
]
]
-- | Checks that the 'RawQuery' can be translated into the expected 'BoolExpr' form,
......@@ -85,3 +98,63 @@ testWordsIntoConst =
-> assertBool err False
Right x
-> fromCNF (getQuery x) @?= expected
withValidQuery :: RawQuery -> (Query -> Assertion) -> Assertion
withValidQuery rawQuery onValidParse = do
case parseQuery rawQuery of
Left err -> assertBool err False
Right x -> onValidParse x
testArxiv01_01 :: Assertion
testArxiv01_01 = withValidQuery "A AND B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv01_02 :: Assertion
testArxiv01_02 = withValidQuery "\"Haskell\" AND \"Agda\"" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["Haskell"]) ((Arxiv.Exp $ Arxiv.Abs ["Agda"]))))
testArxiv02 :: Assertion
testArxiv02 = withValidQuery "A OR B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv03_01 :: Assertion
testArxiv03_01 = withValidQuery "A AND NOT B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv03_02 :: Assertion
testArxiv03_02 = withValidQuery "A AND -B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
-- Double negation get turned into positive.
testArxiv04_01 :: Assertion
testArxiv04_01 = withValidQuery "A AND NOT (NOT B)" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv04_02 :: Assertion
testArxiv04_02 = withValidQuery "A AND NOT (NOT (NOT B))" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv05 :: Assertion
testArxiv05 = withValidQuery "A OR NOT B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (
Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"])
(Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["B"]) (Arxiv.Exp $ Arxiv.Abs ["B"]))
)
)
testArxiv06 :: Assertion
testArxiv06 = withValidQuery "-A" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (
Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) (Arxiv.Exp $ Arxiv.Abs ["A"])
)
)
......@@ -13,7 +13,10 @@ Portability : POSIX
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Text.Corpus.API.Arxiv
where
( get
-- * Internals for testing
, convertQuery
) where
import Conduit
import Data.Maybe
......@@ -34,6 +37,14 @@ import qualified Network.Api.Arxiv as Ax
convertQuery :: Corpus.Query -> Ax.Query
convertQuery q = mkQuery (interpretQuery q transformAST)
where
mkQuery :: Maybe Ax.Expression -> Ax.Query
mkQuery mb_exp = Ax.Query { Ax.qExp = mb_exp
, Ax.qIds = []
, Ax.qStart = 0
, Ax.qItems = Arxiv.batchSize }
-- Converts a 'BoolExpr' with 'Term's on the leaves into an Arxiv's expression.
-- It yields 'Nothing' if the AST cannot be converted into a meaningful expression.
transformAST :: BoolExpr Term -> Maybe Ax.Expression
transformAST ast = case ast of
BAnd sub (BConst (Negative term))
......@@ -47,23 +58,20 @@ convertQuery q = mkQuery (interpretQuery q transformAST)
-> Ax.Or <$> transformAST sub1 <*> transformAST sub2
BNot (BConst (Negative term))
-> transformAST (BConst (Positive term)) -- double negation
BNot _
-> Nothing
-- We can handle negatives via `ANDNOT` with itself.
BNot sub
-> Ax.AndNot <$> transformAST sub <*> transformAST sub
-- BTrue cannot happen is the query parser doesn't support parsing 'TRUE' alone.
BTrue
-> Nothing
-- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
BFalse
-> Nothing
BConst (Positive (Term term))
-> Just $ Ax.Exp $ Ax.Abs [unpack term]
-- Do not handle negative terms, because we don't have a way to represent them in Arxiv.
BConst (Negative _)
-> Nothing
mkQuery :: Maybe Ax.Expression -> Ax.Query
mkQuery mb_exp = Ax.Query { Ax.qExp = mb_exp
, Ax.qIds = []
, Ax.qStart = 0
, Ax.qItems = Arxiv.batchSize }
-- We can handle negatives via `ANDNOT` with itself.
BConst (Negative (Term term))
-> Just $ Ax.AndNot (Ax.Exp $ Ax.Abs [unpack term]) (Ax.Exp $ Ax.Abs [unpack term])
-- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs
......
......@@ -51,7 +51,27 @@ newtype Query = Query { getQuery :: (BoolExpr.CNF Term) }
deriving Show
interpretQuery :: Query -> (BoolExpr.BoolExpr Term -> ast) -> ast
interpretQuery (Query q) transform = transform (BoolExpr.fromCNF q)
interpretQuery (Query q) transform = transform . simplify . BoolExpr.fromCNF $ q
simplify :: BoolExpr.BoolExpr a -> BoolExpr.BoolExpr a
simplify expr = case expr of
BAnd sub BTrue -> simplify sub
BAnd BTrue sub -> simplify sub
BAnd BFalse _ -> BFalse
BAnd _ BFalse -> BFalse
BAnd sub1 sub2 -> BAnd (simplify sub1) (simplify sub2)
BOr _ BTrue -> BTrue
BOr BTrue _ -> BTrue
BOr sub BFalse -> simplify sub
BOr BFalse sub -> simplify sub
BOr sub1 sub2 -> BOr (simplify sub1) (simplify sub2)
BNot BTrue -> BFalse
BNot BFalse -> BTrue
BNot (BNot sub) -> simplify sub
BNot sub -> BNot (simplify sub)
BTrue -> BTrue
BFalse -> BFalse
BConst signed -> BConst signed
unsafeMkQuery :: BoolExpr.BoolExpr Term -> Query
unsafeMkQuery = Query . BoolExpr.boolTreeToCNF
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment