Commit 05e98637 authored by Loïc Chapron's avatar Loïc Chapron

add test to TSV import error message

parent c5671e08
......@@ -782,6 +782,7 @@ test-suite garg-test-tasty
Test.Core.Similarity
Test.Core.Text
Test.Core.Text.Corpus.Query
Test.Core.Text.Corpus.TSV
Test.Core.Text.Examples
Test.Core.Text.Flow
Test.Core.Utils
......@@ -855,6 +856,7 @@ test-suite garg-test-tasty
, postgresql-simple >= 0.6.4 && < 0.7
, pretty
, process ^>= 1.6.13.2
, protolude ^>= 0.3.3
, quickcheck-instances ^>= 0.3.25.2
, raw-strings-qq
, recover-rtti >= 0.4 && < 0.5
......@@ -882,6 +884,7 @@ test-suite garg-test-tasty
, tree-diff
, unordered-containers ^>= 0.2.16.0
, unicode-collation >= 0.1.3.6
, utf8-string ^>= 1.0.2
, validity ^>= 0.11.0.1
, vector ^>= 0.12.3.0
, wai
......
module Test.Core.Text.Corpus.TSV (tests) where
import Gargantext.Core.Text.Corpus.Parsers.TSV
import Test.QuickCheck
import Test.QuickCheck.Instances ()
import Data.ByteString.Lazy.UTF8 as BLU
import Data.ByteString.Lazy as BL
import Data.Char ( ord )
import Data.Text as DT (Text, pack, null, elem)
import Data.Text.Encoding as DT
import Prelude
import Test.Tasty
import Test.Tasty.HUnit
import Test.Tasty.QuickCheck hiding (Positive, Negative)
tests :: TestTree
tests = testGroup "TSV Parser" [
testProperty "Parses 'Valid Text'" testValidText
, testProperty "Parses 'Valid Number'" testValidNumber
, testProperty "Parses 'Error Per Line On A File'" testTestErrorPerLine
, testProperty "Parses 'Correct File'" testTestCorrectFile
, testProperty "Parses 'Correct File With New Line In Last Header'" testTestCorrectFileWithNewLine]
delimiterBS :: Delimiter -> BL.ByteString
delimiterBS Tab = BLU.fromString "\t"
delimiterBS Comma = BLU.fromString ","
delimiterBS Line = BLU.fromString "\n"
data File = File { fDelimiter :: Delimiter
, allCorpus :: [RandomCorpus]
}
deriving (Show)
data RandomCorpus =
RandomCorpus { abstract :: Text
, title :: Text
, authors :: Text
, source :: Text
, day :: Int
, month :: Int
, years :: Int
}
deriving (Show)
instance Arbitrary File where
arbitrary = sized arbitrarySizedFile
arbitrarySizedFile :: Int -> Gen File
arbitrarySizedFile m = do
del <- elements [Tab, Comma]
corp <- vectorOf m (generateRandomCorpus)
return (File del corp)
delimiterToText :: Delimiter -> Text
delimiterToText Tab = DT.pack "\t"
delimiterToText Comma = DT.pack ","
delimiterToText Line = DT.pack "\n"
textToBL :: Text -> BL.ByteString
textToBL b = BL.fromChunks . return . DT.encodeUtf8 $ b
generateRandomCorpus :: Gen RandomCorpus
generateRandomCorpus = RandomCorpus
<$> generateString
<*> generateString
<*> generateString
<*> generateString
<*> generateNumber
<*> generateNumber
<*> generateNumber
generateFile :: Gen File
generateFile = arbitrary :: Gen File
generateString :: Gen Text
generateString = arbitrary :: Gen Text
generateNumber :: Gen Int
generateNumber = arbitrary :: Gen Int
--TODO add delimiter
createLineFromCorpus :: RandomCorpus -> Delimiter -> BL.ByteString
createLineFromCorpus corpus delD = do
let aut = (DT.pack "\"") <> (authors corpus) <> (DT.pack "\"")
let tit = (DT.pack "\"") <> (title corpus) <> (DT.pack "\"")
let sou = (DT.pack "\"") <> (source corpus) <> (DT.pack "\"")
let abt = (DT.pack "\"") <> (abstract corpus) <> (DT.pack "\"")
let pDay = (DT.pack "\"") <> (DT.pack $ show $ day corpus) <> (DT.pack "\"")
let pMonth = (DT.pack "\"") <> (DT.pack $ show $ month corpus) <> (DT.pack "\"")
let pYears = (DT.pack "\"") <> (DT.pack $ show $ years corpus) <> (DT.pack "\"")
let del = delimiterToText delD
textToBL(pDay <> del <> pMonth <> del <> pYears <> del <> aut <> del <> tit <> del <> sou <> del <> abt)
createLineFromCorpusWithNewLine :: RandomCorpus -> Delimiter -> BL.ByteString
createLineFromCorpusWithNewLine corpus delD = do
let aut = (DT.pack "\"") <> (authors corpus) <> (DT.pack "\"")
let tit = (DT.pack "\"") <> (title corpus) <> (DT.pack "\"")
let sou = (DT.pack "\"") <> (source corpus) <> (DT.pack "\"")
let abt = (DT.pack "\"") <> (abstract corpus) <> (DT.pack "\n") <> (abstract corpus) <> (DT.pack "\"")
let pDay = (DT.pack "\"") <> (DT.pack $ show $ day corpus) <> (DT.pack "\"")
let pMonth = (DT.pack "\"") <> (DT.pack $ show $ month corpus) <> (DT.pack "\"")
let pYears = (DT.pack "\"") <> (DT.pack $ show $ years corpus) <> (DT.pack "\"")
let del = delimiterToText delD
textToBL(pDay <> del <> pMonth <> del <> pYears <> del <> aut <> del <> tit <> del <> sou <> del <> abt)
createFile :: File -> BL.ByteString
createFile file = do
let headers = BL.intercalate (delimiterBS (fDelimiter file)) $ Prelude.map BLU.fromString ["Publication Day", "Publication Month", "Publication Year", "Authors", "Title", "Source", "Abstract"]
let allLines = BL.intercalate (BLU.fromString "\n") $ Prelude.map (\x -> createLineFromCorpusWithNewLine x (fDelimiter file)) (allCorpus file)
headers <> (BLU.fromString "\n") <> allLines
createFileWithNewLine :: File -> BL.ByteString
createFileWithNewLine file = do
let headers = BL.intercalate (delimiterBS (fDelimiter file)) $ Prelude.map BLU.fromString ["Publication Day", "Publication Month", "Publication Year", "Authors", "Title", "Source", "Abstract"]
let allLines = BL.intercalate (BLU.fromString "\n") $ Prelude.map (\x -> createLineFromCorpus x (fDelimiter file)) (allCorpus file)
headers <> (BLU.fromString "\n") <> allLines
validRandomCorpus :: RandomCorpus -> Delimiter -> Bool
validRandomCorpus tsv del
| BL.length (BL.filter (==delimiter del) (createLineFromCorpus tsv del)) > 3= True
| DT.null $ abstract tsv = True
| DT.null $ title tsv = True
| DT.null $ authors tsv = True
| DT.null $ source tsv = True
| DT.elem '\"' (abstract tsv ) = True
| DT.elem '\"' (title tsv) = True
| DT.elem '\"' (authors tsv) = True
| DT.elem '\"' (source tsv) = True
| otherwise = False
-- Test the 'validTextField' function (test if a field is good on garganText)
testValidNumber :: Property
testValidNumber = forAll generateNumber (\s -> do
let nbText = DT.pack $ show s
let bl = textToBL nbText
case validNumber bl nbText 1 of
Right _ -> True
Left _ | BL.empty == bl -> True
| s < 1 -> True
| otherwise -> False)
-- Test the 'validTextField' function (test if a field is good on garganText)
testValidText :: Property
testValidText = forAll generateString (\s ->
let bl = textToBL s in
case validTextField bl s 1 of
Right _ -> True
Left _ | BL.empty == bl -> True
| (fromIntegral $ ord '\"') `BL.elem` bl -> True
| otherwise -> False)
-- Test if a single line id OK
testTestErrorPerLine :: Property
testTestErrorPerLine = forAll generateRandomCorpus (\tsv -> do
let del = Tab
let line = createLineFromCorpus tsv del
let headers = Prelude.map DT.pack ["Publication Day", "Publication Month", "Publication Year", "Authors", "Title", "Source", "Abstract"]
let splitLine = BL.splitWith (==delimiter del) line
case testErrorPerLine splitLine del headers 1 of
Right _ -> True
Left _ -> validRandomCorpus tsv del)
--check :
-- True Del
-- False Error
-- Test if a file is OK
testTestCorrectFile :: Property
testTestCorrectFile = forAll generateFile (\file -> do
let tsv = createFile file
case testCorrectFile tsv of
Right del -> del == fDelimiter file
Left _ -> Prelude.all (\x -> do
let del = fDelimiter file
let headers = Prelude.map DT.pack ["Publication Day", "Publication Month", "Publication Year", "Authors", "Title", "Source", "Abstract"]
let splitLine = BL.splitWith (==delimiter del) $ createLineFromCorpus x del
case testErrorPerLine splitLine del headers 1 of
Right _ -> True
Left _ -> validRandomCorpus x del) (allCorpus file))
-- almost the same as the one above but also test if a corpus with abstract of multiple line is OK
testTestCorrectFileWithNewLine :: Property
testTestCorrectFileWithNewLine = forAll generateFile (\file -> do
let tsv = createFileWithNewLine file
case testCorrectFile tsv of
Right _ -> True
Left _ -> Prelude.all (\x -> do
let del = fDelimiter file
let headers = Prelude.map DT.pack ["Publication Day", "Publication Month", "Publication Year", "Authors", "Title", "Source", "Abstract"]
let splitLine = BL.splitWith (==delimiter del) $ createLineFromCorpus x del
case testErrorPerLine splitLine del headers 1 of
Right _ -> True
Left _ -> validRandomCorpus x del) (allCorpus file))
......@@ -13,6 +13,7 @@ module Main where
import Gargantext.Prelude
import qualified Test.Core.Text.Corpus.Query as CorpusQuery
import qualified Test.Core.Text.Corpus.TSV as TSVParser
import qualified Test.Core.Utils as Utils
import qualified Test.Graph.Clustering as Graph
import qualified Test.Ngrams.NLP as NLP
......@@ -48,6 +49,7 @@ main = do
, jobsSpec
, NgramsQuery.tests
, CorpusQuery.tests
, TSVParser.tests
, JSON.tests
, Errors.tests
, similaritySpec
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment