Commit d8612190 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Add Arxiv tests

This commit adds a bunch of Arxiv tests to demonstrate we can
successfully transform our `Query` type into the Arxiv AST.
parent 9173f8b3
...@@ -53,6 +53,7 @@ library ...@@ -53,6 +53,7 @@ library
Gargantext.Core.Text Gargantext.Core.Text
Gargantext.Core.Text.Context Gargantext.Core.Text.Context
Gargantext.Core.Text.Corpus.API Gargantext.Core.Text.Corpus.API
Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.Query Gargantext.Core.Text.Corpus.Query
Gargantext.Core.Text.Corpus.Parsers Gargantext.Core.Text.Corpus.Parsers
Gargantext.Core.Text.Corpus.Parsers.CSV Gargantext.Core.Text.Corpus.Parsers.CSV
...@@ -186,7 +187,6 @@ library ...@@ -186,7 +187,6 @@ library
Gargantext.Core.NodeStoryFile Gargantext.Core.NodeStoryFile
Gargantext.Core.Statistics Gargantext.Core.Statistics
Gargantext.Core.Text.Convert Gargantext.Core.Text.Convert
Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.Hal Gargantext.Core.Text.Corpus.API.Hal
Gargantext.Core.Text.Corpus.API.Isidore Gargantext.Core.Text.Corpus.API.Isidore
Gargantext.Core.Text.Corpus.API.Istex Gargantext.Core.Text.Corpus.API.Istex
...@@ -902,6 +902,7 @@ test-suite garg-test ...@@ -902,6 +902,7 @@ test-suite garg-test
, boolexpr , boolexpr
, bytestring , bytestring
, containers , containers
, crawlerArxiv
, duckling , duckling
, extra , extra
, gargantext , gargantext
......
...@@ -81,6 +81,7 @@ library: ...@@ -81,6 +81,7 @@ library:
- Gargantext.Core.Text - Gargantext.Core.Text
- Gargantext.Core.Text.Context - Gargantext.Core.Text.Context
- Gargantext.Core.Text.Corpus.API - Gargantext.Core.Text.Corpus.API
- Gargantext.Core.Text.Corpus.API.Arxiv
- Gargantext.Core.Text.Corpus.Query - Gargantext.Core.Text.Corpus.Query
- Gargantext.Core.Text.Corpus.Parsers - Gargantext.Core.Text.Corpus.Parsers
- Gargantext.Core.Text.Corpus.Parsers.CSV - Gargantext.Core.Text.Corpus.Parsers.CSV
...@@ -520,6 +521,7 @@ tests: ...@@ -520,6 +521,7 @@ tests:
- boolexpr - boolexpr
- bytestring - bytestring
- containers - containers
- crawlerArxiv
- duckling - duckling
- gargantext - gargantext
- gargantext-prelude - gargantext-prelude
......
...@@ -7,6 +7,8 @@ import Data.BoolExpr ...@@ -7,6 +7,8 @@ import Data.BoolExpr
import Gargantext.Core.Text.Corpus.Query import Gargantext.Core.Text.Corpus.Query
import Gargantext.Core.Types import Gargantext.Core.Types
import Prelude import Prelude
import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
import qualified Network.Api.Arxiv as Arxiv
import Test.Tasty import Test.Tasty
import Test.Tasty.HUnit import Test.Tasty.HUnit
...@@ -24,7 +26,18 @@ tests = testGroup "Boolean Query Engine" [ ...@@ -24,7 +26,18 @@ tests = testGroup "Boolean Query Engine" [
, testProperty "Parses 'A AND (B -C)' (right associative)" testParse05_01 , testProperty "Parses 'A AND (B -C)' (right associative)" testParse05_01
, testProperty "Parses (A OR B OR NOT C) AND (D OR E OR F) -(G OR H OR I)" testParse06 , testProperty "Parses (A OR B OR NOT C) AND (D OR E OR F) -(G OR H OR I)" testParse06
, testCase "Parses words into a single constant" testWordsIntoConst , testCase "Parses words into a single constant" testWordsIntoConst
, testGroup "Arxiv expression converter" [
testCase "It supports 'A AND B'" testArxiv01_01
, testCase "It supports '\"Haskell\" AND \"Agda\"'" testArxiv01_02
, testCase "It supports 'A OR B'" testArxiv02
, testCase "It supports 'A AND NOT B'" testArxiv03_01
, testCase "It supports 'A AND -B'" testArxiv03_02
, testCase "It supports 'A AND -B'" testArxiv03_02
, testCase "It supports 'A AND NOT (NOT B)'" testArxiv04_01
, testCase "It supports 'A AND NOT (NOT (NOT B))'" testArxiv04_02
, testCase "It supports 'A OR NOT B'" testArxiv05
, testCase "It supports '-A'" testArxiv06
]
] ]
-- | Checks that the 'RawQuery' can be translated into the expected 'BoolExpr' form, -- | Checks that the 'RawQuery' can be translated into the expected 'BoolExpr' form,
...@@ -85,3 +98,63 @@ testWordsIntoConst = ...@@ -85,3 +98,63 @@ testWordsIntoConst =
-> assertBool err False -> assertBool err False
Right x Right x
-> fromCNF (getQuery x) @?= expected -> fromCNF (getQuery x) @?= expected
withValidQuery :: RawQuery -> (Query -> Assertion) -> Assertion
withValidQuery rawQuery onValidParse = do
case parseQuery rawQuery of
Left err -> assertBool err False
Right x -> onValidParse x
testArxiv01_01 :: Assertion
testArxiv01_01 = withValidQuery "A AND B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv01_02 :: Assertion
testArxiv01_02 = withValidQuery "\"Haskell\" AND \"Agda\"" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["Haskell"]) ((Arxiv.Exp $ Arxiv.Abs ["Agda"]))))
testArxiv02 :: Assertion
testArxiv02 = withValidQuery "A OR B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv03_01 :: Assertion
testArxiv03_01 = withValidQuery "A AND NOT B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv03_02 :: Assertion
testArxiv03_02 = withValidQuery "A AND -B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
-- Double negation get turned into positive.
testArxiv04_01 :: Assertion
testArxiv04_01 = withValidQuery "A AND NOT (NOT B)" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv04_02 :: Assertion
testArxiv04_02 = withValidQuery "A AND NOT (NOT (NOT B))" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv05 :: Assertion
testArxiv05 = withValidQuery "A OR NOT B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (
Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"])
(Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["B"]) (Arxiv.Exp $ Arxiv.Abs ["B"]))
)
)
testArxiv06 :: Assertion
testArxiv06 = withValidQuery "-A" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (
Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) (Arxiv.Exp $ Arxiv.Abs ["A"])
)
)
...@@ -13,7 +13,10 @@ Portability : POSIX ...@@ -13,7 +13,10 @@ Portability : POSIX
{-# LANGUAGE ViewPatterns #-} {-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Text.Corpus.API.Arxiv module Gargantext.Core.Text.Corpus.API.Arxiv
where ( get
-- * Internals for testing
, convertQuery
) where
import Conduit import Conduit
import Data.Maybe import Data.Maybe
...@@ -34,6 +37,14 @@ import qualified Network.Api.Arxiv as Ax ...@@ -34,6 +37,14 @@ import qualified Network.Api.Arxiv as Ax
convertQuery :: Corpus.Query -> Ax.Query convertQuery :: Corpus.Query -> Ax.Query
convertQuery q = mkQuery (interpretQuery q transformAST) convertQuery q = mkQuery (interpretQuery q transformAST)
where where
mkQuery :: Maybe Ax.Expression -> Ax.Query
mkQuery mb_exp = Ax.Query { Ax.qExp = mb_exp
, Ax.qIds = []
, Ax.qStart = 0
, Ax.qItems = Arxiv.batchSize }
-- Converts a 'BoolExpr' with 'Term's on the leaves into an Arxiv's expression.
-- It yields 'Nothing' if the AST cannot be converted into a meaningful expression.
transformAST :: BoolExpr Term -> Maybe Ax.Expression transformAST :: BoolExpr Term -> Maybe Ax.Expression
transformAST ast = case ast of transformAST ast = case ast of
BAnd sub (BConst (Negative term)) BAnd sub (BConst (Negative term))
...@@ -47,23 +58,20 @@ convertQuery q = mkQuery (interpretQuery q transformAST) ...@@ -47,23 +58,20 @@ convertQuery q = mkQuery (interpretQuery q transformAST)
-> Ax.Or <$> transformAST sub1 <*> transformAST sub2 -> Ax.Or <$> transformAST sub1 <*> transformAST sub2
BNot (BConst (Negative term)) BNot (BConst (Negative term))
-> transformAST (BConst (Positive term)) -- double negation -> transformAST (BConst (Positive term)) -- double negation
BNot _ -- We can handle negatives via `ANDNOT` with itself.
-> Nothing BNot sub
-> Ax.AndNot <$> transformAST sub <*> transformAST sub
-- BTrue cannot happen is the query parser doesn't support parsing 'TRUE' alone.
BTrue BTrue
-> Nothing -> Nothing
-- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
BFalse BFalse
-> Nothing -> Nothing
BConst (Positive (Term term)) BConst (Positive (Term term))
-> Just $ Ax.Exp $ Ax.Abs [unpack term] -> Just $ Ax.Exp $ Ax.Abs [unpack term]
-- Do not handle negative terms, because we don't have a way to represent them in Arxiv. -- We can handle negatives via `ANDNOT` with itself.
BConst (Negative _) BConst (Negative (Term term))
-> Nothing -> Just $ Ax.AndNot (Ax.Exp $ Ax.Abs [unpack term]) (Ax.Exp $ Ax.Abs [unpack term])
mkQuery :: Maybe Ax.Expression -> Ax.Query
mkQuery mb_exp = Ax.Query { Ax.qExp = mb_exp
, Ax.qIds = []
, Ax.qStart = 0
, Ax.qItems = Arxiv.batchSize }
-- | TODO put default pubmed query in gargantext.ini -- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs -- by default: 10K docs
......
...@@ -51,7 +51,27 @@ newtype Query = Query { getQuery :: (BoolExpr.CNF Term) } ...@@ -51,7 +51,27 @@ newtype Query = Query { getQuery :: (BoolExpr.CNF Term) }
deriving Show deriving Show
interpretQuery :: Query -> (BoolExpr.BoolExpr Term -> ast) -> ast interpretQuery :: Query -> (BoolExpr.BoolExpr Term -> ast) -> ast
interpretQuery (Query q) transform = transform (BoolExpr.fromCNF q) interpretQuery (Query q) transform = transform . simplify . BoolExpr.fromCNF $ q
simplify :: BoolExpr.BoolExpr a -> BoolExpr.BoolExpr a
simplify expr = case expr of
BAnd sub BTrue -> simplify sub
BAnd BTrue sub -> simplify sub
BAnd BFalse _ -> BFalse
BAnd _ BFalse -> BFalse
BAnd sub1 sub2 -> BAnd (simplify sub1) (simplify sub2)
BOr _ BTrue -> BTrue
BOr BTrue _ -> BTrue
BOr sub BFalse -> simplify sub
BOr BFalse sub -> simplify sub
BOr sub1 sub2 -> BOr (simplify sub1) (simplify sub2)
BNot BTrue -> BFalse
BNot BFalse -> BTrue
BNot (BNot sub) -> simplify sub
BNot sub -> BNot (simplify sub)
BTrue -> BTrue
BFalse -> BFalse
BConst signed -> BConst signed
unsafeMkQuery :: BoolExpr.BoolExpr Term -> Query unsafeMkQuery :: BoolExpr.BoolExpr Term -> Query
unsafeMkQuery = Query . BoolExpr.boolTreeToCNF unsafeMkQuery = Query . BoolExpr.boolTreeToCNF
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment