{-# LANGUAGE DerivingStrategies  #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications    #-}
module Test.Core.Text.Corpus.Query (tests) where

import Data.BoolExpr
import Data.Conduit
import Data.String
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Query
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Prelude
import System.Environment
import qualified Data.Text as T
import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
import qualified Gargantext.Core.Text.Corpus.API.Pubmed as Pubmed
import qualified Network.Api.Arxiv as Arxiv

import Test.Tasty
import Test.Tasty.HUnit
import Test.Tasty.QuickCheck hiding (Positive, Negative)

newtype PubmedApiKey
  = PubmedApiKey { _PubmedApiKey :: T.Text }
  deriving stock (Show, Eq)
  deriving newtype IsString

pubmedSettings :: IO (Maybe PubmedApiKey)
pubmedSettings = fmap fromString <$> lookupEnv "PUBMED_API_KEY"

tests :: TestTree
tests = withResource pubmedSettings (const (pure ())) $ \getPubmedKey ->
  testGroup "Boolean Query Engine" [
    testProperty "Parses 'A OR B'" testParse01
  , testProperty "Parses 'A AND B'" testParse02
  , testProperty "Parses '-A'" testParse03
  , testProperty "Parses 'NOT A'" testParse03_01
  , testProperty "Parses 'A -B'" testParse04
  , testProperty "Parses 'A NOT -B'" testParse04_01
  , testProperty "Parses 'A AND B -C'   (left associative)" testParse05
  , testProperty "Parses 'A AND (B -C)' (right associative)" testParse05_01
  , testProperty "Parses (A OR B OR NOT C) AND (D OR E OR F) -(G OR H OR I)" testParse06
  , testProperty "It supports '\"Haskell\" AND \"Idris\"'" testParse07
  , testProperty "It supports 'Haskell AND Idris'" testParse07_01
  , testProperty "It supports 'Raphael'" testParse07_02
  , testProperty "It supports 'Niki', 'Ajeje' and 'Orf'" testParse07_03
  , testCase     "Parses words into a single constant" testWordsIntoConst
  , testGroup "Arxiv expression converter" [
      testCase "It supports 'A AND B'"     testArxiv01_01
    , testCase "It supports '\"Haskell\" AND \"Agda\"'" testArxiv01_02
    , testCase "It supports 'A OR B'"        testArxiv02
    , testCase "It supports 'A AND NOT B'"   testArxiv03_01
    , testCase "It supports 'A AND -B'"      testArxiv03_02
    , testCase "It supports 'A AND -B'"      testArxiv03_02
    , testCase "It supports 'A AND NOT (NOT B)'"  testArxiv04_01
    , testCase "It supports 'A AND NOT (NOT (NOT B))'"  testArxiv04_02
    , testCase "It supports 'A OR NOT B'"  testArxiv05
    , testCase "It supports '-A'"  testArxiv06
    ]
  , testGroup "PUBMED expression converter" [
      testCase "It supports 'A'" testPubMed01
    , testCase "It supports '-A'" testPubMed02_01
    , testCase "It supports 'NOT A'" testPubMed02_02
    , testCase "It supports 'NOT (NOT A)'" testPubMed02_03
    , testCase "It supports '\"Haskell\" AND \"Idris\"'" testPubMed03
    , testCase "It supports 'A OR B'" testPubMed04
    ]
  , testGroup "PUBMED real queries (skipped if PUBMED_API_KEY env var not set)" [
      testCase "It searches for \"Covid\"" (testPubMedCovid_01 getPubmedKey)
    , testCase "It searches for \"Covid\" AND \"Alzheimer\"" (testPubMedCovid_02 getPubmedKey)
    ]
  -- We skip the Arxiv tests if the PUBMED_API_KEY is not set just for conveniency, to have
  -- only a single flow-control mechanism.
  , testGroup "ARXIV real queries (skipped if PUBMED_API_KEY env var not set)" [
      testCase "It searches for \"Haskell\"" (testArxivRealWorld_01 getPubmedKey)
    , testCase "It searches for \"Haskell\" AND \"Agda\"" (testArxivRealWorld_02 getPubmedKey)
    ]
  ]

-- | Checks that the 'RawQuery' can be translated into the expected 'BoolExpr' form,
-- by also checking that both renders back to the initial 'RawQuery'.
translatesInto :: RawQuery -> BoolExpr [QueryTerm] -> Property
(translatesInto) raw boolExpr =
  let parsed   = parseQuery raw
      expected = Right (unsafeMkQuery boolExpr)
  in counterexample (show parsed <> " != " <> show expected) $
       (renderQuery <$> parsed) === (renderQuery <$> expected)

testParse01 :: Property
testParse01 = "A OR B" `translatesInto` (BConst (Positive ["A"]) `BOr` BConst (Positive ["B"]))

testParse02 :: Property
testParse02 = "A AND B" `translatesInto` (BConst (Positive ["A"]) `BAnd` BConst (Positive ["B"]))

testParse03 :: Property
testParse03 = "-A" `translatesInto` (BConst (Negative ["A"]))

testParse03_01 :: Property
testParse03_01 = "NOT A" `translatesInto` (BConst (Negative ["A"]))

testParse04 :: Property
testParse04 = "A -B" `translatesInto` (BConst (Positive ["A"]) `BAnd` BConst (Negative ["B"]))

-- Both 'A -B' and 'A AND -B' desugars into the same form.
testParse04_01 :: Property
testParse04_01 = "A AND -B" `translatesInto` (BConst (Positive ["A"]) `BAnd` BConst (Negative ["B"]))

testParse05 :: Property
testParse05 = "A AND B -C" `translatesInto` ((BConst (Positive ["A"]) `BAnd` BConst (Positive ["B"])) `BAnd` BConst (Negative ["C"]))

testParse05_01 :: Property
testParse05_01 =
  "A AND (B -C)" `translatesInto` (BConst (Positive ["A"]) `BAnd` (BConst (Positive ["B"]) `BAnd` BConst (Negative ["C"])))

testParse06 :: Property
testParse06 =
  translatesInto "(A OR B OR NOT C) AND (D OR E OR F) -(G OR H OR I)"
    (
      (
        ((BConst (Positive ["A"]) `BOr` (BConst (Positive ["B"]))) `BOr` (BConst  (Negative ["C"])))
        `BAnd`
        ((BConst (Positive ["D"]) `BOr` (BConst (Positive ["E"]))) `BOr` (BConst (Positive ["F"])))
      )
      `BAnd` BNot (
        ((BConst (Positive ["G"]) `BOr` (BConst (Positive ["H"]))) `BOr` (BConst (Positive ["I"])))
      )
    )

testParse07 :: Property
testParse07 =
  translatesInto "\"Haskell\" AND \"Agda\""
    ((BConst (Positive ["Haskell"]) `BAnd` (BConst (Positive ["Agda"]))))

testParse07_01 :: Property
testParse07_01 =
  translatesInto "Haskell AND Agda"
    ((BConst (Positive ["Haskell"]) `BAnd` (BConst (Positive ["Agda"]))))

testParse07_02 :: Property
testParse07_02 =
  translatesInto "Raphael"
    ((BConst (Positive ["Raphael"])))

testParse07_03 :: Property
testParse07_03 =
  translatesInto "Niki" ((BConst (Positive ["Niki"]))) .&&.
  translatesInto "Ajeje" ((BConst (Positive ["Ajeje"]))) .&&.
  translatesInto "Orf" ((BConst (Positive ["Orf"])))

testWordsIntoConst :: Assertion
testWordsIntoConst =
  let (expected :: BoolExpr [QueryTerm]) = fromCNF (boolTreeToCNF @[QueryTerm] $ (BAnd (BOr (BConst (Positive [QT_exact_match "The",QT_exact_match "Art",QT_exact_match "of",QT_exact_match "Computer",QT_exact_match "Programming"])) BFalse) (BAnd (BOr (BConst (Positive [QT_exact_match "Conceptual",QT_exact_match "Mathematics"])) BFalse) BTrue)))
  in case parseQuery "\"The Art of Computer Programming\" AND \"Conceptual Mathematics\"" of
    Left err
      -> assertBool err False
    Right x
      -> fromCNF (getQuery x) @?= expected

withValidQuery :: RawQuery -> (Query -> Assertion) -> Assertion
withValidQuery rawQuery onValidParse = do
  case parseQuery rawQuery of
    Left err -> assertBool err False
    Right x  -> onValidParse x


testArxiv01_01 :: Assertion
testArxiv01_01 = withValidQuery "A AND B" $ \q ->
  assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
             (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))

testArxiv01_02 :: Assertion
testArxiv01_02 = withValidQuery "\"Haskell\" AND \"Agda\"" $ \q ->
  assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
             (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["Haskell"]) ((Arxiv.Exp $ Arxiv.Abs ["Agda"]))))

testArxiv02 :: Assertion
testArxiv02 = withValidQuery "A OR B" $ \q ->
  assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
             (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))

testArxiv03_01 :: Assertion
testArxiv03_01 = withValidQuery "A AND NOT B" $ \q ->
  assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
             (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))

testArxiv03_02 :: Assertion
testArxiv03_02 = withValidQuery "A AND -B" $ \q ->
  assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
             (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))

-- Double negation get turned into positive.
testArxiv04_01 :: Assertion
testArxiv04_01 = withValidQuery "A AND NOT (NOT B)" $ \q ->
  assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
             (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))

testArxiv04_02 :: Assertion
testArxiv04_02 = withValidQuery "A AND NOT (NOT (NOT B))" $ \q ->
  assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
             (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))

testArxiv05 :: Assertion
testArxiv05 = withValidQuery "A OR NOT B" $ \q ->
  assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
             (Arxiv.qExp (Arxiv.convertQuery q) == Just (
                Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"])
                         (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["B"]) (Arxiv.Exp $ Arxiv.Abs ["B"]))
                )
             )

testArxiv06 :: Assertion
testArxiv06 = withValidQuery "-A" $ \q ->
  assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
             (Arxiv.qExp (Arxiv.convertQuery q) == Just (
                Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) (Arxiv.Exp $ Arxiv.Abs ["A"])
                )
             )

--
-- PUBMED tests
--

testPubMed01 :: Assertion
testPubMed01 = withValidQuery "A" $ \q ->
  assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
             (Pubmed.getESearch (Pubmed.convertQuery q) == "A")

testPubMed02_01 :: Assertion
testPubMed02_01 = withValidQuery "-A" $ \q -> Pubmed.getESearch (Pubmed.convertQuery q) @?= "NOT+A"

testPubMed02_02 :: Assertion
testPubMed02_02 = withValidQuery "NOT A" $ \q -> Pubmed.getESearch (Pubmed.convertQuery q) @?= "NOT+A"

testPubMed02_03 :: Assertion
testPubMed02_03 = withValidQuery "NOT (NOT A)" $ \q -> Pubmed.getESearch (Pubmed.convertQuery q) @?= "A"

testPubMed03 :: Assertion
testPubMed03 = withValidQuery "\"Haskell\" AND \"Idris\"" $ \q ->
  Pubmed.getESearch (Pubmed.convertQuery q) @?= "Haskell+AND+Idris"

testPubMed04 :: Assertion
testPubMed04 = withValidQuery "A OR B" $ \q ->
  Pubmed.getESearch (Pubmed.convertQuery q) @?= "A+OR+B"

testPubMedCovid_01 :: IO (Maybe PubmedApiKey) -> Assertion
testPubMedCovid_01 getPubmedKey = do
  mb_key <- getPubmedKey
  case mb_key of
    Nothing -> pure ()
    Just k  -> withValidQuery "\"Covid\"" $ \query -> do
      res <- Pubmed.get (_PubmedApiKey k) (renderQuery query) (Just 1)
      case res of
        Left err       -> fail (show err)
        Right (_, cnd) -> do
          hyperDocs <- sourceToList cnd
          case hyperDocs of
            []    -> fail "No documents found."
            (x:_) -> _hd_title x @?= Just "Being a Hospice Nurse in Times of the COVID-19 Pandemic: A Phenomenological Study of Providing End-of-Life Care."

testPubMedCovid_02 :: IO (Maybe PubmedApiKey) -> Assertion
testPubMedCovid_02 getPubmedKey = do
  mb_key <- getPubmedKey
  case mb_key of
    Nothing -> pure ()
    Just k  -> withValidQuery "\"Covid\" AND \"Alzheimer\"" $ \query -> do
      res <- Pubmed.get (_PubmedApiKey k) (renderQuery query) (Just 1)
      case res of
        Left err       -> fail (show err)
        Right (_, cnd) -> do
          hyperDocs <- sourceToList cnd
          case hyperDocs of
            []    -> fail "No documents found."
            (x:_) -> _hd_title x @?= Just "Neurodegenerative and Neurodevelopmental Diseases and the Gut-Brain Axis: The Potential of Therapeutic Targeting of the Microbiome."

testArxivRealWorld_01 :: IO (Maybe PubmedApiKey) -> Assertion
testArxivRealWorld_01 getPubmedKey = do
  mb_key <- getPubmedKey
  case mb_key of
    Nothing -> pure ()
    Just _  -> withValidQuery "\"Haskell\"" $ \query -> do
      (_, cnd) <- Arxiv.get EN query (Just 1)
      hyperDocs <- sourceToList cnd
      case hyperDocs of
        []    -> fail "No documents found."
        (x:_) -> _hd_title x @?= Just "Haskell for OCaml programmers"

testArxivRealWorld_02 :: IO (Maybe PubmedApiKey) -> Assertion
testArxivRealWorld_02 getPubmedKey = do
  mb_key <- getPubmedKey
  case mb_key of
    Nothing -> pure ()
    Just _  -> withValidQuery "\"Haskell\" AND \"Agda\"" $ \query -> do
      (_, cnd) <- Arxiv.get EN query (Just 1)
      hyperDocs <- sourceToList cnd
      case hyperDocs of
        []    -> fail "No documents found."
        (x:_) -> _hd_title x @?= Just "Toward Hole-Driven Development with Liquid Haskell"