...
  View open merge request
Commits (11)
......@@ -20,7 +20,7 @@ import Data.Tuple.Extra (both)
import Data.Vector qualified as DV
import GHC.Generics
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers.TSV (readTSVFile, tsv_title, tsv_abstract, tsv_publication_year, fromMIntOrDec, defaultYear)
import Gargantext.Core.Text.Corpus.Parsers.TSV (readTSVFile, unIntOrDec, tsv_title, tsv_abstract, tsv_publication_year, defaultYear)
import Gargantext.Core.Text.List.Formats.TSV (tsvMapTermList)
import Gargantext.Core.Text.Metrics.Count (coocOnContexts, Coocs)
import Gargantext.Core.Text.Terms.WithList ( Patterns, buildPatterns, extractTermsWithList )
......@@ -52,7 +52,7 @@ filterTermsAndCoocCLI (CorpusFile corpusFile) (TermListFile termListFile) (Outpu
Right cf -> do
let corpus = DM.fromListWith (<>)
. DV.toList
. DV.map (\n -> (fromMIntOrDec defaultYear $ tsv_publication_year n, [(tsv_title n) <> " " <> (tsv_abstract n)]))
. DV.map (\n -> (maybe defaultYear unIntOrDec $ tsv_publication_year n, [(tsv_title n) <> " " <> (tsv_abstract n)]))
. snd $ cf
-- termListMap :: [Text]
......
......@@ -82,8 +82,8 @@ tsvToDocs parser patterns time path =
Wos _ -> Prelude.error "tsvToDocs: unimplemented"
Tsv limit -> Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (Tsv.fromMIntOrDec Tsv.defaultYear $ tsv_publication_year row) (fromMaybe Tsv.defaultMonth $ tsv_publication_month row) (fromMaybe Tsv.defaultDay $ tsv_publication_day row) time)
(toPhyloDate' (Tsv.fromMIntOrDec Tsv.defaultYear $ tsv_publication_year row) (fromMaybe Tsv.defaultMonth $ tsv_publication_month row) (fromMaybe Tsv.defaultDay $ tsv_publication_day row) time)
<$> Vector.map (\row -> Document (toPhyloDate (maybe Tsv.defaultYear Tsv.unIntOrDec $ tsv_publication_year row) (fromMaybe Tsv.defaultMonth $ tsv_publication_month row) (fromMaybe Tsv.defaultDay $ tsv_publication_day row) time)
(toPhyloDate' (maybe Tsv.defaultYear Tsv.unIntOrDec $ tsv_publication_year row) (fromMaybe Tsv.defaultMonth $ tsv_publication_month row) (fromMaybe Tsv.defaultDay $ tsv_publication_day row) time)
(termsInText patterns $ (tsv_title row) <> " " <> (tsv_abstract row))
Nothing
[]
......
......@@ -197,6 +197,7 @@ library
Gargantext.Core.Text.Corpus.Parsers
Gargantext.Core.Text.Corpus.Parsers.Date
Gargantext.Core.Text.Corpus.Parsers.TSV
Gargantext.Core.Text.Corpus.Parsers.TSV.Internal
Gargantext.Core.Text.Corpus.Query
Gargantext.Core.Text.List
Gargantext.Core.Text.List.Formats.TSV
......
......@@ -51,7 +51,6 @@ import Gargantext.Core.NodeStory
import Gargantext.Database.Prelude (HasConnectionPool(..))
import Gargantext.Prelude hiding (to)
import Gargantext.System.Logging
import Gargantext.Utils.Jobs.Internal (pollJob)
import Gargantext.Utils.Jobs.Map (LoggerM, J(..), jTask, rjGetLog)
import Gargantext.Utils.Jobs.Monad qualified as Jobs
import Network.HTTP.Client (Manager)
......@@ -60,7 +59,6 @@ import Servant.Client (BaseUrl)
import Servant.Job.Async (HasJobEnv(..), Job)
import Servant.Job.Async qualified as SJ
import Servant.Job.Core qualified
import Servant.Job.Types qualified as SJ
import System.Log.FastLogger qualified as FL
data Mode = Dev | Mock | Prod
......@@ -198,19 +196,9 @@ mkJobHandle jId = JobHandle jId
-- | Updates the status of a 'JobHandle' by using the input 'updateJobStatus' function.
updateJobProgress :: ConcreteJobHandle err -> (JobLog -> JobLog) -> GargM Env err ()
updateJobProgress ConcreteNullHandle _ = pure ()
updateJobProgress hdl@(JobHandle jId logStatus) updateJobStatus = do
jobLog <- Jobs.getLatestJobStatus hdl
let jobLogNew = updateJobStatus jobLog
logStatus jobLogNew
mJb <- Jobs.findJob jId
case mJb of
Nothing -> pure ()
Just je -> do
-- We use the same endpoint as the one for polling jobs via
-- API. This way we can send the job status directly in the
-- notification
j <- pollJob (Just $ SJ.Limit 1) Nothing jId je
CET.ce_notify $ CET.UpdateJobProgress j
updateJobProgress hdl@(JobHandle _ logStatus) updateJobStatus =
Jobs.getLatestJobStatus hdl >>= logStatus . updateJobStatus
instance Jobs.MonadJobStatus (GargM Env err) where
......@@ -251,6 +239,13 @@ instance Jobs.MonadJobStatus (GargM Env err) where
markComplete jh = updateJobProgress jh jobLogComplete
markCompleteWithWarning h diag = do
Jobs.emitWarning h diag
Jobs.markComplete h
emitWarning jh warn = updateJobProgress jh (addWarningEvent warn)
markFailed mb_msg jh =
updateJobProgress jh (\latest -> case mb_msg of
Nothing -> jobLogFailTotal latest
......@@ -328,6 +323,10 @@ instance Jobs.MonadJobStatus (GargM DevEnv err) where
markComplete _ = pure ()
markCompleteWithWarning _ _ = pure ()
emitWarning _ _ = pure ()
markFailed _ _ = pure ()
addMoreSteps _ _ = pure ()
......
......@@ -20,6 +20,7 @@ module Gargantext.API.Job (
, jobLogFailTotalWithMessage
, RemainingSteps(..)
, addErrorEvent
, addWarningEvent
) where
import Control.Lens (over, _Just)
......@@ -49,6 +50,9 @@ addEvent level message (JobLog { _scst_events = mEvts, .. }) = JobLog { _scst_ev
addErrorEvent :: ToHumanFriendlyError e => e -> JobLog -> JobLog
addErrorEvent message = addEvent "ERROR" (mkHumanFriendly message)
addWarningEvent :: WarningDiagnostic -> JobLog -> JobLog
addWarningEvent message = addEvent "WARNING" (renderWarningDiagnostic message)
jobLogProgress :: Int -> JobLog -> JobLog
jobLogProgress n jl = over (scst_succeeded . _Just) (+ n) $
over (scst_remaining . _Just) (\x -> max 0 (x - n)) jl
......
......@@ -39,8 +39,8 @@ import Gargantext.Core (withDefaultLanguage, defaultLanguage)
import Gargantext.Core.Config (gc_jobs, hasConfig)
import Gargantext.Core.Config.Types (jc_max_docs_parsers)
import Gargantext.Core.NodeStory (HasNodeStoryImmediateSaver, HasNodeArchiveStoryImmediateSaver, currentVersion, NgramsStatePatch')
import Gargantext.Core.Text.Corpus.Parsers qualified as Parser (FileType(..), parseFormatC, _ParseFormatError)
import Gargantext.Core.Text.Corpus.Query qualified as API
import Gargantext.Core.Text.Corpus.Parsers qualified as Parser (FileType(..), parseFormatC, _ParseFormatError, ParseFormatError(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow (flowCorpus, getDataText, flowDataText, TermType(..){-, allDataOrigins-})
......@@ -59,6 +59,7 @@ import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude
import Gargantext.System.Logging ( logLocM, LogLevel(..) )
import Gargantext.Utils.Jobs.Monad (JobHandle, MonadJobStatus(..))
import Gargantext.Utils.Jobs.Error as Warn
------------------------------------------------------------------------
{-
......@@ -254,7 +255,7 @@ addToCorpusWithForm user cid nwf jobHandle = do
Right decoded -> decoded
eDocsC <- liftBase $ parseC (nwf ^. wf_fileformat) data'
case eDocsC of
Right (count, docsC) -> do
Right (count, docsC, warn) -> do
-- TODO Add progress (jobStatus) update for docs - this is a
-- long action
......@@ -295,7 +296,11 @@ addToCorpusWithForm user cid nwf jobHandle = do
-- TODO uncomment this
--sendMail user
markComplete jobHandle
if (Warn.renderWarningDiagnostic warn == "") then
markComplete jobHandle
else
markCompleteWithWarning jobHandle warn
Left parseErr -> do
$(logLocM) ERROR $ "parse error: " <> (Parser._ParseFormatError parseErr)
markFailed (Just parseErr) jobHandle
......
......@@ -22,7 +22,7 @@ import Data.Csv ( (.:), header, decodeByNameWith, FromNamedRecord(..), Header )
import Data.Text qualified as T
import Data.Vector (Vector)
import Data.Vector qualified as Vector
import Gargantext.Core.Text.Corpus.Parsers.TSV ( tsvDecodeOptions, Delimiter(Tab) )
import Gargantext.Core.Text.Corpus.Parsers.TSV (defaultDecodingOptionsWithDelimiter, ColumnDelimiter(Tab) )
import Gargantext.Database.Admin.Types.Hyperdata.Contact
import Gargantext.Prelude
import System.FilePath.Posix (takeExtension)
......@@ -119,7 +119,7 @@ readTSVFile_Annuaire' :: FilePath -> IO (Header, Vector IMTUser)
readTSVFile_Annuaire' = fmap readTsvHalLazyBS' . BL.readFile
where
readTsvHalLazyBS' :: BL.ByteString -> (Header, Vector IMTUser)
readTsvHalLazyBS' bs = case decodeByNameWith (tsvDecodeOptions Tab) bs of
readTsvHalLazyBS' bs = case decodeByNameWith (defaultDecodingOptionsWithDelimiter Tab) bs of
Left e -> panicTrace (cs e)
Right rows -> rows
......
......@@ -44,7 +44,7 @@ import Data.ByteString.Lazy qualified as DBL
import Data.List (lookup)
import Data.Map qualified as DM
import Data.Text qualified as DT
import Data.Tuple.Extra (both) -- , first, second)
import Data.Tuple.Extra (both, second3, fst3, snd3) -- , first, second)
import Gargantext.API.Node.Corpus.New.Types (FileFormat(..))
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.TSV (parseHal, parseTsv, parseTsvC)
......@@ -101,23 +101,23 @@ parseFormatC :: forall m. MonadBaseControl IO m
=> FileType
-> FileFormat
-> DB.ByteString
-> m (Either ParseFormatError (Integer, ConduitT () HyperdataDocument IO ()))
-> m (Either ParseFormatError (Integer, ConduitT () HyperdataDocument IO (), WarningDiagnostic))
parseFormatC ft ff bs0 = first ParseFormatError <$> do_parse ft ff bs0
where
do_parse :: MonadBaseControl IO m
=> FileType
-> FileFormat
-> DB.ByteString
-> m (Either DT.Text (Integer, ConduitT () HyperdataDocument IO ()))
-> m (Either DT.Text (Integer, ConduitT () HyperdataDocument IO (), WarningDiagnostic))
do_parse TsvGargV3 Plain bs = do
let eParsedC = parseTsvC $ DBL.fromStrict bs
pure (second (transPipe (pure . runIdentity)) <$> eParsedC)
pure (second3 (transPipe (pure . runIdentity)) <$> eParsedC)
do_parse TsvHal Plain bs = do
let eParsedC = parseTsvC $ DBL.fromStrict bs
pure (second (transPipe (pure . runIdentity)) <$> eParsedC)
pure (second3 (transPipe (pure . runIdentity)) <$> eParsedC)
do_parse Istex Plain bs = do
ep <- liftBase $ parseIstex EN $ DBL.fromStrict bs
pure $ (\p -> (1, yieldMany [p])) <$> ep
pure $ (\p -> (1, yieldMany [p], MalformedCorpus "")) <$> ep
do_parse RisPresse Plain bs = do
--docs <- enrichWith RisPresse
let eDocs = runParser' RisPresse bs
......@@ -126,7 +126,8 @@ parseFormatC ft ff bs0 = first ParseFormatError <$> do_parse ft ff bs0
, yieldMany docs
.| mapC presseEnrich
.| mapC (map $ both decodeUtf8)
.| mapMC (toDoc RIS)) ) <$> eDocs
.| mapMC (toDoc RIS)
, MalformedCorpus "") ) <$> eDocs
do_parse WOS Plain bs = do
let eDocs = runParser' WOS bs
pure $ (\docs ->
......@@ -134,7 +135,8 @@ parseFormatC ft ff bs0 = first ParseFormatError <$> do_parse ft ff bs0
, yieldMany docs
.| mapC (map $ first WOS.keys)
.| mapC (map $ both decodeUtf8)
.| mapMC (toDoc WOS)) ) <$> eDocs
.| mapMC (toDoc WOS)
, MalformedCorpus "") ) <$> eDocs
do_parse Iramuteq Plain bs = do
let eDocs = runParser' Iramuteq bs
pure $ (\docs ->
......@@ -143,12 +145,12 @@ parseFormatC ft ff bs0 = first ParseFormatError <$> do_parse ft ff bs0
.| mapC (map $ first Iramuteq.keys)
.| mapC (map $ both decodeUtf8)
.| mapMC (toDoc Iramuteq . map (second (DT.replace "_" " ")))
)
, MalformedCorpus "")
)
<$> eDocs
do_parse JSON Plain bs = do
let eParsedC = parseJSONC $ DBL.fromStrict bs
pure (second (transPipe (pure . runIdentity)) <$> eParsedC)
pure (second3 (transPipe (pure . runIdentity)) <$> eParsedC)
do_parse fty ZIP bs = liftBase $ UZip.withZipFileBS bs $ do
fileNames <- filter (filterZIPFileNameP ft) . DM.keys <$> getEntries
printDebug "[do_parse] fileNames" fileNames
......@@ -163,11 +165,12 @@ parseFormatC ft ff bs0 = first ParseFormatError <$> do_parse ft ff bs0
case contents of
[] -> pure $ Left "No files in zip"
_ -> do
let lenghts = fst <$> contents
let contents' = snd <$> contents
let lenghts = fst3 <$> contents
let contents' = snd3 <$> contents
let totalLength = sum lenghts
pure $ Right ( totalLength
, void (sequenceConduits contents') ) -- .| mapM_C (printDebug "[do_parse] doc")
, void (sequenceConduits contents')
, MalformedCorpus "" ) -- .| mapM_C (printDebug "[do_parse] doc")
_ -> pure $ Left $ DT.intercalate "\n" errs
do_parse _ _ _ = pure $ Left "Not implemented"
......
......@@ -25,6 +25,7 @@ import Gargantext.Core (Lang)
import Gargantext.Core.Text.Corpus.Parsers.JSON.Istex qualified as Istex
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude hiding (length)
import Gargantext.Utils.Jobs.Error as Warn
import Protolude
......@@ -66,12 +67,13 @@ readJSONLazyBS bs = first T.pack $ eitherDecode bs
parseJSONC :: BL.ByteString
-> Either Text (Integer, ConduitT () HyperdataDocument Identity ())
-> Either Text (Integer, ConduitT () HyperdataDocument Identity (), WarningDiagnostic)
parseJSONC bs = f <$> readJSONLazyBS bs
where
f (JSONStruct { documents }) =
( fromIntegral $ length documents
, yieldMany documents .| mapC doc2hyperdoc )
, yieldMany documents .| mapC doc2hyperdoc
, Warn.MalformedCorpus "" )
doc2hyperdoc :: JSONStructDocument -> HyperdataDocument
doc2hyperdoc (JSONStructDocument { document = JSONDocument { hyperdata } }) = hyperdata
......
This diff is collapsed.
......@@ -29,7 +29,6 @@ import Prelude
import System.Directory (doesFileExist)
import Text.Read (readMaybe)
import qualified Data.Text as T
import Gargantext.API.Admin.EnvTypes ( mkJobHandle, Env, GargJob(..) )
import Gargantext.API.Errors.Types ( BackendInternalError(InternalJobError) )
import Gargantext.API.Prelude ( GargM )
......
......@@ -3,6 +3,8 @@
module Gargantext.Utils.Jobs.Error
( ToHumanFriendlyError(..)
, HumanFriendlyErrorText(..)
, WarningDiagnostic(..)
, renderWarningDiagnostic
) where
import Prelude
......@@ -34,3 +36,13 @@ instance ToHumanFriendlyError HumanFriendlyErrorText where
-- /N.B/ Don't get fooled by this instance, it's just to help inference in case we use \"markFailed Nothing\".
instance ToHumanFriendlyError Void where
mkHumanFriendly = absurd
-- Temporary : Need change to be more precises
data WarningDiagnostic =
MissingAbstractFromCorpus T.Text
| MalformedCorpus T.Text
renderWarningDiagnostic :: WarningDiagnostic -> T.Text
renderWarningDiagnostic = \case
MissingAbstractFromCorpus corpusName -> "The corpus " <> corpusName <> " had a missing abstract."
MalformedCorpus text -> text
\ No newline at end of file
......@@ -224,6 +224,11 @@ class MonadJobStatus m where
-- | Finish tracking a job by marking all the remaining steps as succeeded.
markComplete :: JobHandle m -> m ()
markCompleteWithWarning :: MonadJobStatus m => JobHandle m -> WarningDiagnostic -> m ()
-- |
emitWarning :: JobHandle m -> WarningDiagnostic -> m ()
-- | Finish tracking a job by marking all the remaining steps as failed. Attach an optional
-- message to the failure.
markFailed :: forall e. ToHumanFriendlyError e => Maybe e -> JobHandle m -> m ()
......
......@@ -2,19 +2,16 @@
module Test.Core.Text.Corpus.TSV (tests) where
import Gargantext.Core.Text.Corpus.Parsers.TSV
import Test.QuickCheck
import Test.QuickCheck.Instances ()
import Data.ByteString.Lazy.UTF8 as BLU
import Gargantext.Core.Text.Corpus.Parsers.TSV.Internal
import Data.ByteString.Lazy as BL
import Data.Char ( ord )
import Data.ByteString.Lazy.UTF8 as BLU
import Data.Char (ord)
import Data.Text as DT (Text, pack, null, elem)
import Data.Text.Encoding as DT
import Prelude
import Test.QuickCheck
import Test.QuickCheck.Instances ()
import Test.Tasty
import Test.Tasty.HUnit
import Test.Tasty.QuickCheck hiding (Positive, Negative)
tests :: TestTree
......@@ -30,12 +27,12 @@ tests = testGroup "TSV Parser" [
delimiterBS :: Delimiter -> BL.ByteString
delimiterBS Tab = BLU.fromString "\t"
delimiterBS Comma = BLU.fromString ","
delimiterBS Line = BLU.fromString "\n"
delimiterBS :: ColumnDelimiter -> BL.ByteString
delimiterBS Tab = BLU.fromString "\t"
delimiterBS Comma = BLU.fromString ","
delimiterBS Semicolon = BLU.fromString ";"
data File = File { fDelimiter :: Delimiter
data File = File { fDelimiter :: ColumnDelimiter
, allCorpus :: [RandomCorpus]
}
deriving (Show)
......@@ -56,19 +53,19 @@ instance Arbitrary File where
arbitrarySizedFile :: Int -> Gen File
arbitrarySizedFile m = do
del <- elements [Tab, Comma]
del <- elements [Tab, Comma, Semicolon]
corp <- vectorOf m (generateRandomCorpus)
return (File del corp)
delimiterToText :: Delimiter -> Text
delimiterToText Tab = DT.pack "\t"
delimiterToText Comma = DT.pack ","
delimiterToText Line = DT.pack "\n"
delimiterToText :: ColumnDelimiter -> Text
delimiterToText Tab = DT.pack "\t"
delimiterToText Comma = DT.pack ","
delimiterToText Semicolon = DT.pack ";"
delimiterToString :: Delimiter -> Char
delimiterToString Tab = '\t'
delimiterToString Comma = ','
delimiterToString Line = '\n'
delimiterToString :: ColumnDelimiter -> Char
delimiterToString Tab = '\t'
delimiterToString Comma = ','
delimiterToString Semicolon = ';'
textToBL :: Text -> BL.ByteString
textToBL b = BL.fromChunks . return . DT.encodeUtf8 $ b
......@@ -85,7 +82,7 @@ generateRandomCorpus = RandomCorpus
generateFileDelimiter :: Gen File
generateFileDelimiter = do
del <- elements [Tab, Comma]
del <- elements [Tab, Comma, Semicolon]
m <- choose (1,5)
corp <- vectorOf m (generateRandomCorpus)
return (File del corp)
......@@ -106,7 +103,7 @@ randomHeaderList = frequency [
]
--TODO add delimiter
createLineFromCorpus :: RandomCorpus -> Delimiter -> BL.ByteString
createLineFromCorpus :: RandomCorpus -> ColumnDelimiter -> BL.ByteString
createLineFromCorpus corpus delD = do
let aut = (DT.pack "\"") <> (authors corpus) <> (DT.pack "\"")
let tit = (DT.pack "\"") <> (title corpus) <> (DT.pack "\"")
......@@ -118,7 +115,7 @@ createLineFromCorpus corpus delD = do
let del = delimiterToText delD
textToBL(pDay <> del <> pMonth <> del <> pYears <> del <> aut <> del <> tit <> del <> sou <> del <> abt)
createLineFromCorpusWithNewLine :: RandomCorpus -> Delimiter -> BL.ByteString
createLineFromCorpusWithNewLine :: RandomCorpus -> ColumnDelimiter -> BL.ByteString
createLineFromCorpusWithNewLine corpus delD = do
let aut = (DT.pack "\"") <> (authors corpus) <> (DT.pack "\"")
let tit = (DT.pack "\"") <> (title corpus) <> (DT.pack "\"")
......@@ -143,9 +140,9 @@ createFileWithNewLine file = do
let allLines = BL.intercalate (BLU.fromString "\n") $ Prelude.map (\x -> createLineFromCorpus x (fDelimiter file)) (allCorpus file)
headers <> (BLU.fromString "\n") <> allLines
validRandomCorpus :: RandomCorpus -> Delimiter -> Bool
validRandomCorpus :: RandomCorpus -> ColumnDelimiter -> Bool
validRandomCorpus tsv del
| BL.length (BL.filter (==delimiter del) (createLineFromCorpus tsv del)) > 3= True
| BL.length (BL.filter (== toWord8 del) (createLineFromCorpus tsv del)) > 3= True
| DT.null $ abstract tsv = True
| DT.null $ title tsv = True
| DT.null $ authors tsv = True
......@@ -161,7 +158,7 @@ testValidNumber :: Property
testValidNumber = forAll generateNumber (\s -> do
let nbText = DT.pack $ show s
let bl = textToBL nbText
case validNumber bl nbText 1 of
case checkNumber bl nbText 1 of
Right _ -> True
Left _ | BL.empty == bl -> True
| s < 1 -> True
......@@ -171,7 +168,7 @@ testValidNumber = forAll generateNumber (\s -> do
testValidText :: Property
testValidText = forAll generateString (\s ->
let bl = textToBL s in
case validTextField bl s 1 of
case checkTextField bl s 1 of
Right _ -> True
Left _ | BL.empty == bl -> True
| (fromIntegral $ ord '\"') `BL.elem` bl -> True
......@@ -184,26 +181,22 @@ testTestErrorPerLine = forAll generateRandomCorpus (\tsv -> do
let del = Tab
let line = createLineFromCorpus tsv del
let headers = Prelude.map DT.pack ["Publication Day", "Publication Month", "Publication Year", "Authors", "Title", "Source", "Abstract"]
let splitLine = BL.splitWith (==delimiter del) line
case testErrorPerLine splitLine del headers 1 of
let splitLine = BL.splitWith (== toWord8 del) line
case checkRow splitLine del headers 1 of
Right _ -> True
Left _ -> validRandomCorpus tsv del)
--check :
-- True Del
-- False Error
-- Test if a file is OK
testTestCorrectFile :: Property
testTestCorrectFile = forAll generateFile (\file -> do
let tsv = createFile file
case testCorrectFile tsv of
Right del -> del == fDelimiter file
Right (del, _) -> del == fDelimiter file
Left _ -> Prelude.all (\x -> do
let del = fDelimiter file
let headers = Prelude.map DT.pack ["Publication Day", "Publication Month", "Publication Year", "Authors", "Title", "Source", "Abstract"]
let splitLine = BL.splitWith (==delimiter del) $ createLineFromCorpus x del
case testErrorPerLine splitLine del headers 1 of
let splitLine = BL.splitWith (== toWord8 del) $ createLineFromCorpus x del
case checkRow splitLine del headers 1 of
Right _ -> True
Left _ -> validRandomCorpus x del) (allCorpus file))
......@@ -217,20 +210,20 @@ testTestCorrectFileWithNewLine = forAll generateFile (\file -> do
Left _ -> Prelude.all (\x -> do
let del = fDelimiter file
let headers = Prelude.map DT.pack ["Publication Day", "Publication Month", "Publication Year", "Authors", "Title", "Source", "Abstract"]
let splitLine = BL.splitWith (==delimiter del) $ createLineFromCorpus x del
case testErrorPerLine splitLine del headers 1 of
let splitLine = BL.splitWith (== toWord8 del) $ createLineFromCorpus x del
case checkRow splitLine del headers 1 of
Right _ -> True
Left _ -> validRandomCorpus x del) (allCorpus file))
testFindDelimiter :: Property
testFindDelimiter = forAll generateFileDelimiter (\file -> do
let tsv = createFile file
case findDelimiter tsv of
case findColumnDelimiter tsv of
Right _ -> True
Left _ -> do
let line = Prelude.head $ allCorpus file
let del = delimiterToString $ fDelimiter file
let delLine = delimiterToString Line
let delLine = '\n'
del `DT.elem` (abstract line) || del `DT.elem` (authors line) || del `DT.elem` (title line) || del `DT.elem` (source line) || delLine `DT.elem` (abstract line) || delLine `DT.elem` (authors line) || delLine `DT.elem` (title line) || delLine `DT.elem` (source line))
testGetHeader :: Property
......