Commit 645463e6 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Add more commentary to parseTsvWithDiagnostics

parent d84e6b93
Pipeline #7518 passed with stages
in 43 minutes and 31 seconds
......@@ -366,6 +366,12 @@ readTSVFile fp = do
type TsvDocParser = CSVI.Parser (ParseCorpusResult TsvDoc)
-- | Parses the input 'ByteString' identifying a TSV document
-- using the provided delimiter. This function is /incremental/, i.e.
-- it will correctly identify rows which can't be parsed and skip them,
-- collecting the error along the way.
-- It returns a list of 'ParseCorpusResult', which will contain either
-- the parsed record or a diagnostic on the error on the given row.
parseTsvWithDiagnostics :: Delimiter
-> BL.ByteString
-> [ParseCorpusResult TsvDoc]
......@@ -374,6 +380,8 @@ parseTsvWithDiagnostics d bs =
MkHeaderParsingContext _p ->
[ ParseTsvRecordFailed (AtRow 0 ("The parsing choked on the header (delimiter was " <> show d <> "). This might be a malformed TSV we can't recover from.", mempty)) ]
MkRecordParsingContext (RecordParsingContext{..})
-- As we accumulate records by consing (for efficiency sake),
-- we need a final 'reverse' at the end.
-> reverse $ _prs_ctx_parsed_records
where
......@@ -391,7 +399,8 @@ parseTsvWithDiagnostics d bs =
MkHeaderParsingContext p -> go_hdr strict_chunk p
MkRecordParsingContext p -> go_rec strict_chunk p
-- Specialised parser for the header.
-- Specialised parser for the header: once it fails
-- or completes, it morphs into a record parser.
go_hdr :: StrictByteString
-> CSVI.HeaderParser TsvDocParser
-> ParsingContext (ParseCorpusResult TsvDoc)
......@@ -410,7 +419,10 @@ parseTsvWithDiagnostics d bs =
-> ParsingContext (ParseCorpusResult TsvDoc)
go_rec input_bs ctx = MkRecordParsingContext $ stepParser input_bs ctx
stepParser :: ByteString
-- Main workhorse: given an input 'StrictByteString', it steps
-- the parser. If the parser reached a terminal state, it gets
-- removed from the input 'RecordParsingContext'.
stepParser :: StrictByteString
-> RecordParsingContext (ParseCorpusResult TsvDoc)
-> RecordParsingContext (ParseCorpusResult TsvDoc)
stepParser input_bs ctx = case ctx ^. prs_ctx_parser of
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment