Commit 3b114df3 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Be lenient in case of missing forms columns in ngram imports

parent b466e791
Pipeline #7477 failed with stages
in 47 minutes and 54 seconds
......@@ -181,7 +181,8 @@ ngramsListFromTSVData tsvData = case decodeTsv of
decodeTsv = Vec.catMaybes <$>
Tsv.decodeWithP tsvToNgramsTableMap
(Tsv.defaultDecodeOptions { Tsv.decDelimiter = fromIntegral (P.ord '\t') })
Tsv.HasHeader
-- Don't use an header, make it lenient in case the 'forms' are missing.
Tsv.NoHeader
binaryData
-- | Converts a plain TSV 'Record' into an NgramsTableMap
......@@ -189,6 +190,9 @@ tsvToNgramsTableMap :: Tsv.Record -> Tsv.Parser (Maybe NgramsTableMap)
tsvToNgramsTableMap record = case Vec.toList record of
(map P.decodeUtf8 -> [status, label, forms])
-> pure $ Just $ conv status label forms
-- Garg #381: alias the forms to the empty text.
(map P.decodeUtf8 -> [status, label])
-> pure $ Just $ conv status label mempty
-- WARNING: This silently ignores errors (#433)
_ -> pure Nothing
......
......@@ -54,7 +54,9 @@ data TsvList = TsvList
instance FromNamedRecord TsvList where
parseNamedRecord r = TsvList <$> r .: "status"
<*> r .: "label"
<*> r .: "forms"
-- Issue #381: be lenient in the forms
-- field, if missing, default to the empty text.
<*> (fromMaybe mempty <$> r .: "forms")
instance ToNamedRecord TsvList where
toNamedRecord (TsvList s l f) =
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment