{-|
Module      : Test.Database.Operations.DocumentSearch
Description : GarganText database tests
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX
-}

{-# LANGUAGE QuasiQuotes       #-}

module Test.Database.Operations.DocumentSearch where

import Prelude

import Control.Lens (view)
import Control.Monad.Reader
import Data.Aeson.QQ.Simple
import Data.Aeson.Types
-- import Gargantext.API.Node.Update (updateDocs)
import Gargantext.Core
import Gargantext.Core.NLP (nlpServerGet)
import Gargantext.Core.Types.Individu
import Gargantext.Database.Action.Flow
import Gargantext.Database.Action.Search
import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Tree.Root
import Gargantext.Database.Schema.Node (NodePoly(..))
-- import Network.URI (parseURI)

import Test.Database.Types
import Test.Hspec.Expectations
import Test.Tasty.HUnit
import Gargantext.Core.Text.Terms.Mono.Stem
import Gargantext.Database.Admin.Config (userMaster)
import qualified Data.Text as T
import qualified Gargantext.Core.Text.Corpus.Query as API
import Gargantext.Database.Query.Facet


exampleDocument_01 :: HyperdataDocument
exampleDocument_01 = either error id $ parseEither parseJSON $ [aesonQQ|
  { "doi":"01"
  , "publication_day":6
  , "language_iso2":"EN"
  , "publication_minute":0
  , "publication_month":7
  , "language_iso3":"eng"
  , "publication_second":0
  , "authors":"Nils Hovdenak, Kjell Haram"
  , "publication_year":2012
  , "publication_date":"2012-07-06 00:00:00+00:00"
  , "language_name":"English"
  , "realdate_full_":"2012 01 12"
  , "source":"European journal of obstetrics, gynecology, and reproductive biology Institute"
  , "abstract":"The literature was searched for publications on minerals and vitamins during pregnancy and the possible influence of supplements on pregnancy outcome."
  , "title":"Influence of mineral and vitamin supplements on pregnancy outcome."
  , "publication_hour":0
  }
|]

exampleDocument_02 :: HyperdataDocument
exampleDocument_02 = either error id $ parseEither parseJSON $ [aesonQQ|
  { "doi":""
  , "bdd": "Arxiv"
  , "publication_day":6
  , "language_iso2":"EN"
  , "publication_second":0
  , "authors":"Ajeje Brazorf, Manuel Agnelli"
  , "publication_year":2012
  , "publication_date":"2012-07-06 00:00:00+00:00"
  , "language_name":"English"
  , "realdate_full_":"2012 01 12"
  , "source":"Malagrotta Institute of Technology"
  , "abstract":"We present PyPlasm, an innovative approach to computational graphics"
  , "title":"PyPlasm: computational geometry made easy"
  , "publication_hour":0
  }
|]

exampleDocument_03 :: HyperdataDocument
exampleDocument_03 = either error id $ parseEither parseJSON $ [aesonQQ|
{
    "bdd": "Arxiv"
  , "doi": ""
  , "url": "http://arxiv.org/pdf/1405.3072v2"
  , "title": "Haskell for OCaml programmers"
  , "source": ""
  , "authors": "Raphael Poss, Herbert Ballerina"
  , "abstract": "  This introduction to Haskell is written to optimize learning by programmers who already know OCaml. "
  , "institutes": ""
  , "language_iso2": "EN"
  , "publication_date": "2014-05-13T09:10:32Z"
  , "publication_year": 2014
}
|]

exampleDocument_04 :: HyperdataDocument
exampleDocument_04 = either error id $ parseEither parseJSON $ [aesonQQ|
{
    "bdd": "Arxiv"
  , "doi": ""
  , "url": "http://arxiv.org/pdf/1407.5670v1"
  , "title": "Rust for functional programmers"
  , "source": ""
  , "authors": "Raphael Poss"
  , "abstract": "  This article provides an introduction to Rust , a systems language by Mozilla , to programmers already familiar with Haskell , OCaml or other functional languages. " , "institutes": ""
  , "language_iso2": "EN"
  , "publication_date": "2014-07-21T21:20:31Z"
  , "publication_year": 2014
}
|]

corpusAddDocuments :: TestEnv -> Assertion
corpusAddDocuments env = do
  flip runReaderT env $ runTestMonad $ do
    -- NOTE(adn) We need to create user 'gargantua'(!!) in order
    -- for 'addDocumentsToHyperCorpus' to work.
    parentId <- getRootId (UserName userMaster)
    [corpus] <- getCorporaWithParentId parentId
    let corpusId = _node_id corpus

    let lang = EN
    server <- view (nlpServerGet lang)
    ids <- addDocumentsToHyperCorpus server
                                     (Just $ _node_hyperdata $ corpus)
                                     (Multi lang)
                                     corpusId
                                     [exampleDocument_01, exampleDocument_02, exampleDocument_03, exampleDocument_04]
    liftIO $ length ids `shouldBe` 4

stemmingTest :: TestEnv -> Assertion
stemmingTest _env = do
  stem EN GargPorterAlgorithm "Ajeje"        `shouldBe` "Ajeje"
  stem EN GargPorterAlgorithm "PyPlasm:"     `shouldBe` "PyPlasm:"
  stem EN GargPorterAlgorithm "soy"          `shouldBe` "soy"
  stem EN GargPorterAlgorithm "cry"          `shouldBe` "cri"
  -- This test outlines the main differences between Porter and Lancaster.
  stem EN GargPorterAlgorithm "dancer"       `shouldBe` "dancer"
  stem EN LancasterAlgorithm  "dancer"       `shouldBe` "dant"
  stem EN GargPorterAlgorithm "postpartum"   `shouldBe` "postpartum"
  stem EN LancasterAlgorithm  "postpartum"   `shouldBe` "postpart"
  stem IT PorterAlgorithm     "catechizzare" `shouldBe` "catechizz"
  stem IT LancasterAlgorithm  "catechizzare" `shouldBe` "catechizzare" -- lancaster doesn't support Italian

mkQ :: T.Text -> API.Query
mkQ txt = either (\e -> error $ "(query) = " <> T.unpack txt <> ": " <> e) id . API.parseQuery . API.RawQuery $ txt

corpusSearch01 :: TestEnv -> Assertion
corpusSearch01 env = do
  flip runReaderT env $ runTestMonad $ do

    parentId <- getRootId (UserName userMaster)
    [corpus] <- getCorporaWithParentId parentId

    results1 <- searchInCorpus (_node_id corpus) False (mkQ "mineral") Nothing Nothing Nothing
    results2 <- searchInCorpus (_node_id corpus) False (mkQ "computational") Nothing Nothing Nothing

    liftIO $ length results1 `shouldBe` 1
    liftIO $ length results2 `shouldBe` 1

-- | Check that we support more complex queries
corpusSearch02 :: TestEnv -> Assertion
corpusSearch02 env = do
  flip runReaderT env $ runTestMonad $ do

    parentId <- getRootId (UserName userMaster)
    [corpus] <- getCorporaWithParentId parentId

    results1 <- searchInCorpus (_node_id corpus) False (mkQ "Raphael") Nothing Nothing Nothing
    results2 <- searchInCorpus (_node_id corpus) False (mkQ "Raphael Poss") Nothing Nothing Nothing

    liftIO $ do
      length results1 `shouldBe` 2 -- Haskell & Rust
      map facetDoc_title results2 `shouldBe` ["Haskell for OCaml programmers", "Rust for functional programmers"]

-- | Check that we support more complex queries via the bool API
corpusSearch03 :: TestEnv -> Assertion
corpusSearch03 env = do
  flip runReaderT env $ runTestMonad $ do

    parentId <- getRootId (UserName userMaster)
    [corpus] <- getCorporaWithParentId parentId

    results1 <- searchInCorpus (_node_id corpus) False (mkQ "\"Manuel Agnelli\"") Nothing Nothing Nothing
    results2 <- searchInCorpus (_node_id corpus) False (mkQ "Raphael AND -Rust") Nothing Nothing Nothing
    results3 <- searchInCorpus (_node_id corpus) False (mkQ "(Raphael AND (NOT Rust)) OR PyPlasm") Nothing Nothing Nothing

    liftIO $ do
      length results1 `shouldBe` 1
      map facetDoc_title results2 `shouldBe` ["Haskell for OCaml programmers"]
      map facetDoc_title results3 `shouldBe` ["PyPlasm: computational geometry made easy", "Haskell for OCaml programmers"]

-- | Check that the score doc count is correct
--   TODO This test is unfinished because `updateDocs` needs more work
corpusScore01 :: TestEnv -> Assertion
corpusScore01 env = do
  flip runReaderT env $ runTestMonad $ do

    parentId <- getRootId (UserName userMaster)
    [corpus] <- getCorporaWithParentId parentId

    results <- searchInCorpus (_node_id corpus) False (mkQ "Haskell") Nothing Nothing Nothing

    liftIO $ do
      map facetDoc_title results `shouldBe` ["Haskell for OCaml programmers", "Rust for functional programmers"]

      map facetDoc_score results `shouldBe` [Just 0.0, Just 0.0]

    -- _ <- updateDocs (_node_id corpus)

    liftIO $ do
      map facetDoc_score results `shouldBe` [Just 0.0, Just 0.0]

-- | Check that we support search with tsquery
corpusSearchDB01 :: TestEnv -> Assertion
corpusSearchDB01 env = do
  flip runReaderT env $ runTestMonad $ do

    parentId <- getRootId (UserName userMaster)
    [corpus] <- getCorporaWithParentId parentId

    results <- searchDocInDatabase (_node_id corpus) ("first second")

    liftIO $ do
      length results `shouldBe` 0 -- doesn't exist, we just check that proper to_tsquery is called