

module Test.Core.Text.Corpus.TSV (tests) where

import Gargantext.Core.Text.Corpus.Parsers.TSV
import Gargantext.Core.Text.Corpus.Parsers.TSV.TSVv3 qualified as TSVv3
import Test.QuickCheck
import Test.QuickCheck.Instances ()
import Data.ByteString.Lazy.UTF8 as BLU
import Data.ByteString.Lazy as BL
import Data.Char ( ord )
import Data.Text as DT (Text, pack, null, elem)
import Data.Text.Encoding as DT

import Prelude

import Test.Hspec
import Test.Hspec.QuickCheck

tests :: Spec
tests = describe "TSV Parser" $ do
  prop "Parses 'Valid Text'" testValidText
  prop "Parses 'Valid Number'" testValidNumber
  prop "Parses 'Error Per Line On A File'" testTestErrorPerLine
  prop "Parses 'Correct File'" testTestCorrectFile
  prop "Parses 'Correct File With New Line In Last Header'" testTestCorrectFileWithNewLine
  prop "Parses 'Find Delimiter'" testFindDelimiter
  prop "Parses 'Get Headers'" testGetHeader


delimiterBS :: Delimiter -> BL.ByteString
delimiterBS Tab   = BLU.fromString "\t"
delimiterBS Comma = BLU.fromString ","
delimiterBS Line = BLU.fromString "\n"

data File = File { fDelimiter :: Delimiter
                 , allCorpus  :: [RandomCorpus]
                 }
                 deriving (Show)

data RandomCorpus =
    RandomCorpus { abstract :: Text
                 , title    :: Text
                 , authors  :: Text
                 , source   :: Text
                 , day      :: Int
                 , month    :: Int
                 , years    :: Int
                 }
                 deriving (Show)

instance Arbitrary File where
  arbitrary = sized arbitrarySizedFile

arbitrarySizedFile :: Int -> Gen File
arbitrarySizedFile m = do
  del <- elements [Tab, Comma]
  corp <- vectorOf m (generateRandomCorpus)
  return (File del corp)

delimiterToText :: Delimiter -> Text
delimiterToText Tab   = DT.pack "\t"
delimiterToText Comma = DT.pack ","
delimiterToText Line  = DT.pack "\n"

delimiterToString :: Delimiter -> Char
delimiterToString Tab   = '\t'
delimiterToString Comma = ','
delimiterToString Line  = '\n'

textToBL :: Text -> BL.ByteString
textToBL b = BL.fromChunks . return . DT.encodeUtf8 $ b

generateRandomCorpus :: Gen RandomCorpus
generateRandomCorpus = RandomCorpus
                    <$> generateString
                    <*> generateString
                    <*> generateString
                    <*> generateString
                    <*> generateNumber
                    <*> generateNumber
                    <*> generateNumber

generateFileDelimiter :: Gen File
generateFileDelimiter = do
  del <- elements [Tab, Comma]
  m <- choose (1,5)
  corp <- vectorOf m (generateRandomCorpus)
  return (File del corp)

generateFile :: Gen File
generateFile = arbitrary :: Gen File

generateString :: Gen Text
generateString = arbitrary :: Gen Text

generateNumber :: Gen Int
generateNumber = arbitrary :: Gen Int

randomHeaderList :: Gen [String]
randomHeaderList = frequency [
    (1, return [])
  , (7, (:) <$> (elements ["Publication Day", "Publication Month", "Publication Year", "Authors", "Title", "Source", "Abstract"]) <*> randomHeaderList)
  ]

--TODO add delimiter
createLineFromCorpus :: RandomCorpus -> Delimiter -> BL.ByteString
createLineFromCorpus corpus delD = do
    let aut = (DT.pack "\"") <> (authors corpus) <> (DT.pack "\"")
    let tit = (DT.pack "\"") <> (title corpus) <> (DT.pack "\"")
    let sou = (DT.pack "\"") <> (source corpus) <> (DT.pack "\"")
    let abt = (DT.pack "\"") <> (abstract corpus) <> (DT.pack "\"")
    let pDay = (DT.pack "\"") <> (DT.pack $ show $ day corpus) <> (DT.pack "\"")
    let pMonth = (DT.pack "\"") <> (DT.pack $ show $ month corpus) <> (DT.pack "\"")
    let pYears = (DT.pack "\"") <> (DT.pack $ show $ years corpus) <> (DT.pack "\"")
    let del = delimiterToText delD
    textToBL(pDay <> del <> pMonth <> del <> pYears <> del <> aut <> del <> tit <> del <> sou <> del <> abt)

createLineFromCorpusWithNewLine :: RandomCorpus -> Delimiter -> BL.ByteString
createLineFromCorpusWithNewLine corpus delD = do
    let aut = (DT.pack "\"") <> (authors corpus) <> (DT.pack "\"")
    let tit = (DT.pack "\"") <> (title corpus) <> (DT.pack "\"")
    let sou = (DT.pack "\"") <> (source corpus) <> (DT.pack "\"")
    let abt = (DT.pack "\"") <> (abstract corpus) <> (DT.pack "\n") <> (abstract corpus) <> (DT.pack "\"")
    let pDay = (DT.pack "\"") <> (DT.pack $ show $ day corpus) <> (DT.pack "\"")
    let pMonth = (DT.pack "\"") <> (DT.pack $ show $ month corpus) <> (DT.pack "\"")
    let pYears = (DT.pack "\"") <> (DT.pack $ show $ years corpus) <> (DT.pack "\"")
    let del = delimiterToText delD
    textToBL(pDay <> del <> pMonth <> del <> pYears <> del <> aut <> del <> tit <> del <> sou <> del <> abt)


createFile :: File -> BL.ByteString
createFile file = do
    let headers = BL.intercalate (delimiterBS (fDelimiter file)) $ Prelude.map BLU.fromString ["Publication Day", "Publication Month", "Publication Year", "Authors", "Title", "Source", "Abstract"]
    let allLines = BL.intercalate (BLU.fromString "\n") $ Prelude.map (\x -> createLineFromCorpusWithNewLine x (fDelimiter file)) (allCorpus file)
    headers <> (BLU.fromString "\n") <> allLines

createFileWithNewLine :: File -> BL.ByteString
createFileWithNewLine file = do
    let headers = BL.intercalate (delimiterBS (fDelimiter file)) $ Prelude.map BLU.fromString ["Publication Day", "Publication Month", "Publication Year", "Authors", "Title", "Source", "Abstract"]
    let allLines = BL.intercalate (BLU.fromString "\n") $ Prelude.map (\x -> createLineFromCorpus x (fDelimiter file)) (allCorpus file)
    headers <> (BLU.fromString "\n") <> allLines

validRandomCorpus :: RandomCorpus -> Delimiter -> Bool
validRandomCorpus tsv del
    | BL.length (BL.filter (==delimiter del) (createLineFromCorpus tsv del)) > 3= True
    | DT.null $ abstract tsv                                                    = True
    | DT.null $ title tsv                                                       = True
    | DT.null $ authors tsv                                                     = True
    | DT.null $ source tsv                                                      = True
    | DT.elem '\"' (abstract tsv )                                              = True
    | DT.elem '\"' (title tsv)                                                  = True
    | DT.elem '\"' (authors tsv)                                                = True
    | DT.elem '\"' (source tsv)                                                 = True
    | otherwise                                                                 = False

-- Test the 'validTextField' function (test if a field is good on garganText)
testValidNumber :: Property
testValidNumber = forAll generateNumber (\s -> do
    let nbText = DT.pack $ show s
    let bl = textToBL nbText
    case validNumber bl nbText 1 of
        Right _                 -> True
        Left  _ | BL.empty == bl -> True
                | s < 1 -> True
                | otherwise     -> False)

-- Test the 'validTextField' function (test if a field is good on garganText)
testValidText :: Property
testValidText = forAll generateString (\s ->
    let bl = textToBL s in
        case validTextField bl s 1 of
            Right _                 -> True
            Left  _ | BL.empty == bl -> True
                    | (fromIntegral $ ord '\"') `BL.elem` bl -> True
                    | otherwise     -> False)


-- Test if a single line id OK
testTestErrorPerLine :: Property
testTestErrorPerLine = forAll generateRandomCorpus (\tsv -> do
    let del = Tab
    let line = createLineFromCorpus tsv del
    let headers = Prelude.map DT.pack ["Publication Day", "Publication Month", "Publication Year", "Authors", "Title", "Source", "Abstract"]
    let splitLine = BL.splitWith (==delimiter del) line
    case testErrorPerLine TSVv3.validators splitLine del headers 1 of
        Right _ -> True
        Left  _ -> validRandomCorpus tsv del)

--check :
-- True Del
-- False Error

-- Test if a file is OK
testTestCorrectFile :: Property
testTestCorrectFile = forAll generateFile (\file -> do
    let tsv = createFile file
    case testCorrectFile TSVv3.validators tsv of
        Right del -> del == fDelimiter file
        Left _ -> Prelude.all (\x -> do
            let del = fDelimiter file
            let headers = Prelude.map DT.pack ["Publication Day", "Publication Month", "Publication Year", "Authors", "Title", "Source", "Abstract"]
            let splitLine = BL.splitWith (==delimiter del) $ createLineFromCorpus x del
            case testErrorPerLine TSVv3.validators splitLine del headers 1 of
                Right _ -> True
                Left  _ -> validRandomCorpus x del) (allCorpus file))


-- almost the same as the one above but also test if a corpus with abstract of multiple line is OK
testTestCorrectFileWithNewLine :: Property
testTestCorrectFileWithNewLine = forAll generateFile (\file -> do
    let tsv = createFileWithNewLine file
    case testCorrectFile TSVv3.validators tsv of
        Right _ -> True
        Left _ -> Prelude.all (\x -> do
            let del = fDelimiter file
            let headers = Prelude.map DT.pack ["Publication Day", "Publication Month", "Publication Year", "Authors", "Title", "Source", "Abstract"]
            let splitLine = BL.splitWith (==delimiter del) $ createLineFromCorpus x del
            case testErrorPerLine TSVv3.validators splitLine del headers 1 of
                Right _ -> True
                Left  _ -> validRandomCorpus x del) (allCorpus file))

testFindDelimiter :: Property
testFindDelimiter = forAll generateFileDelimiter (\file -> do
    let tsv = createFile file
    case findDelimiter tsv of
        Right _ -> True
        Left _ -> do
            let line = Prelude.head $ allCorpus file
            let del = delimiterToString $ fDelimiter file
            let delLine = delimiterToString Line
            del `DT.elem` (abstract line) || del `DT.elem` (authors line) || del `DT.elem` (title line) || del `DT.elem` (source line) || delLine `DT.elem` (abstract line) || delLine `DT.elem` (authors line) || delLine `DT.elem` (title line) || delLine `DT.elem` (source line))

testGetHeader :: Property
testGetHeader = forAll randomHeaderList (\headers -> do
    let headersLines = (BL.intercalate (delimiterBS Tab) $ Prelude.map BLU.fromString headers):[]
    case getHeaders TSVv3.validators headersLines Tab of
        Right _                                                     -> True
        Left  _ | not ("Publication Day" `Prelude.elem` headers)    -> True
                | not ("Publication Month" `Prelude.elem` headers)  -> True
                | not ("Publication Year" `Prelude.elem` headers)   -> True
                | not ("Authors" `Prelude.elem` headers)            -> True
                | not ("Source" `Prelude.elem` headers)             -> True
                | not ("Title" `Prelude.elem` headers)              -> True
                | not ("Abstract" `Prelude.elem` headers)           -> True
                | otherwise                                         -> False
    )
