Commit 3b114df3 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Be lenient in case of missing forms columns in ngram imports

parent b466e791
Pipeline #7477 failed with stages
in 47 minutes and 54 seconds
...@@ -181,7 +181,8 @@ ngramsListFromTSVData tsvData = case decodeTsv of ...@@ -181,7 +181,8 @@ ngramsListFromTSVData tsvData = case decodeTsv of
decodeTsv = Vec.catMaybes <$> decodeTsv = Vec.catMaybes <$>
Tsv.decodeWithP tsvToNgramsTableMap Tsv.decodeWithP tsvToNgramsTableMap
(Tsv.defaultDecodeOptions { Tsv.decDelimiter = fromIntegral (P.ord '\t') }) (Tsv.defaultDecodeOptions { Tsv.decDelimiter = fromIntegral (P.ord '\t') })
Tsv.HasHeader -- Don't use an header, make it lenient in case the 'forms' are missing.
Tsv.NoHeader
binaryData binaryData
-- | Converts a plain TSV 'Record' into an NgramsTableMap -- | Converts a plain TSV 'Record' into an NgramsTableMap
...@@ -189,6 +190,9 @@ tsvToNgramsTableMap :: Tsv.Record -> Tsv.Parser (Maybe NgramsTableMap) ...@@ -189,6 +190,9 @@ tsvToNgramsTableMap :: Tsv.Record -> Tsv.Parser (Maybe NgramsTableMap)
tsvToNgramsTableMap record = case Vec.toList record of tsvToNgramsTableMap record = case Vec.toList record of
(map P.decodeUtf8 -> [status, label, forms]) (map P.decodeUtf8 -> [status, label, forms])
-> pure $ Just $ conv status label forms -> pure $ Just $ conv status label forms
-- Garg #381: alias the forms to the empty text.
(map P.decodeUtf8 -> [status, label])
-> pure $ Just $ conv status label mempty
-- WARNING: This silently ignores errors (#433) -- WARNING: This silently ignores errors (#433)
_ -> pure Nothing _ -> pure Nothing
......
...@@ -54,7 +54,9 @@ data TsvList = TsvList ...@@ -54,7 +54,9 @@ data TsvList = TsvList
instance FromNamedRecord TsvList where instance FromNamedRecord TsvList where
parseNamedRecord r = TsvList <$> r .: "status" parseNamedRecord r = TsvList <$> r .: "status"
<*> r .: "label" <*> r .: "label"
<*> r .: "forms" -- Issue #381: be lenient in the forms
-- field, if missing, default to the empty text.
<*> (fromMaybe mempty <$> r .: "forms")
instance ToNamedRecord TsvList where instance ToNamedRecord TsvList where
toNamedRecord (TsvList s l f) = toNamedRecord (TsvList s l f) =
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment