{-# LANGUAGE TypeApplications #-}
module Test.Offline.Ngrams (tests) where

import Prelude

import Data.Text qualified as T
import Gargantext.API.Ngrams.Types qualified as NT
import Gargantext.Core
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Types
import Gargantext.Database.Action.Flow.Utils (docNgrams)
import Gargantext.Database.Schema.Context
import Gargantext.Database.Admin.Types.Hyperdata
import Test.Instances ()
import Test.QuickCheck
import Test.Tasty
import Test.Tasty.QuickCheck (testProperty)
import Control.Lens
import qualified Test.QuickCheck as QC
import Gargantext.Core.Text.Terms.Mono (isSep)


genScientificText :: Gen T.Text
genScientificText = T.pack <$> listOf genScientificChar

-- | Roughly simulate the kind of text we might find in scientific papers and their abstracts.
genScientificChar :: Gen Char
genScientificChar = frequency
  [ (10, choose ('a', 'z'))
  , (10, choose ('A', 'Z'))
  , (5,  choose ('0', '9'))
  , (2,  QC.elements ws)                       -- Whitespace
  , (2,  QC.elements punctuation)              -- Punctuation
  , (1,  QC.elements "éèàçöñüßøåÆŒ")           -- Diacritics
  , (1,  QC.elements "αβγδΔθλμπσφχΩ≤≥≠≈")      -- Greek/math
  ]

ws :: String
ws = " \t\n"

punctuation :: String
punctuation = ",.();:-"

genNgramsTermNonEmpty :: Gen NgramsTermNonEmpty
genNgramsTermNonEmpty = do
    singleChar <- arbitrary `suchThat` (\x -> x /= ' ' && isAllowed x)
    txt        <- filter isAllowed <$> listOf1 genScientificChar
    pure $ NgramsTermNonEmpty $ (T.pack $ singleChar : txt)
  where
    isAllowed :: Char -> Bool
    isAllowed s = not (s `elem` punctuation) && not (s `elem` ws) && not (isSep s)

-- In order to test the behaviour of 'docNgrams' we create wrappers around 'NgramsTerm' to have two
-- different 'Arbitrary' flavours, one that always produces non-empty 'Text' fragments, and one that
-- /might/ occasionally generate empty text fragments.
newtype NgramsTermNonEmpty = NgramsTermNonEmpty { unNgramsTermNonEmpty :: T.Text }
  deriving (Eq, Show)

instance Arbitrary NgramsTermNonEmpty where
  arbitrary = genNgramsTermNonEmpty

data DocumentWithMatches =
  DocumentWithMatches
    { _dwm_terms    :: NonEmptyList NgramsTermNonEmpty
    , _dwn_document :: ContextOnlyId HyperdataDocument
    } deriving (Show)

-- | Generates a document where the title contains at least one
-- of the generated terms.
instance Arbitrary DocumentWithMatches where
  arbitrary = do
    generatedTerms <- arbitrary
    txtFragments   <- listOf1 genScientificText
    mkText <- do
      sf <- shuffle txtFragments
      let txt' = (map unNgramsTermNonEmpty $ getNonEmpty generatedTerms) <> sf
      shuffle txt'
    contextOnlyDoc <- arbitrary
    let doc = _context_oid_hyperdata contextOnlyDoc
    let doc' = doc { _hd_title = Just $ T.intercalate " " mkText }
    let hyperDoc =  contextOnlyDoc { _context_oid_hyperdata = doc' }
    pure $ DocumentWithMatches generatedTerms hyperDoc


tests :: TestTree
tests = testGroup "Ngrams" [
    testGroup "buildPatterns internal correctness" [
     testProperty "patterns, no matter how simple, can be searched" prop_patterns_internal_consistency
    ]
    , testGroup "buildPatternsWith" [
     testProperty "return results for non-empty input terms" testBuildPatternsNonEmpty
    ]
    , testGroup "docNgrams" [
     testProperty "always matches if the input text contains any of the terms" testDocNgramsOKMatch
    ]
  ]

testDocNgramsOKMatch :: Lang -> DocumentWithMatches -> Property
testDocNgramsOKMatch lang (DocumentWithMatches ts doc) =
  let ts' = map (NT.NgramsTerm . unNgramsTermNonEmpty) $ getNonEmpty ts
  in conjoin [
        counterexample "patterns empty" $ length (buildPatternsWith lang ts') > 0
      , counterexample "termsInText empty" $ length (termsInText lang (buildPatternsWith lang ts') (doc ^. context_oid_hyperdata . hd_title . _Just)) > 0
      , counterexample "docNgrams returned no results" $ length (docNgrams lang ts' doc) > 0
      ]

testBuildPatternsNonEmpty :: Lang -> NonEmptyList NgramsTermNonEmpty -> Property
testBuildPatternsNonEmpty lang ts =
  let ts' = map (NT.NgramsTerm . unNgramsTermNonEmpty) $ getNonEmpty ts
  in counterexample "buildPatterns returned no results" $ length (buildPatternsWith lang ts') > 0