Commit df6f1dde authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] Add more redundancies to texts Notes

parent fda25302
Pipeline #3868 failed with stage
in 30 minutes and 33 seconds
...@@ -218,13 +218,14 @@ dateISOP = do ...@@ -218,13 +218,14 @@ dateISOP = do
rd = read :: [Char] -> Integer rd = read :: [Char] -> Integer
number = many1 digit number = many1 digit
sourcePrefixP :: Parser [Char]
sourcePrefixP = do
_ <- string "source:"
many (char ' ')
sourceP :: Parser [Char] sourceP :: Parser [Char]
sourceP = try sourcePrefixP sourceP = try sourcePrefixP
*> many (noneOf "\n") *> many (noneOf "\n")
where
sourcePrefixP :: Parser [Char]
sourcePrefixP = do
_ <- string "source:"
many (char ' ')
-- contentsP :: Parser String -- contentsP :: Parser String
-- contentsP = many anyChar -- contentsP = many anyChar
...@@ -233,15 +234,19 @@ tokenEnd :: Parser () ...@@ -233,15 +234,19 @@ tokenEnd :: Parser ()
tokenEnd = void (char '\n') <|> eof tokenEnd = void (char '\n') <|> eof
--- MISC Tools --- MISC Tools
-- Using ChunkAlong here enable redundancies in short corpora of texts
-- maybe use splitEvery or chunkAlong depending on the size of the whole text
text2titleParagraphs :: Int -> Text -> [(Text, Text)] text2titleParagraphs :: Int -> Text -> [(Text, Text)]
text2titleParagraphs n = catMaybes text2titleParagraphs n = catMaybes
. List.map doTitle . List.map doTitle
. (splitEvery n) . (chunkAlong n' n)
-- . (splitEvery n)
. sentences . sentences
. DT.intercalate " " -- ". " . DT.intercalate " " -- ". "
. List.filter (/= "") . List.filter (/= "")
. DT.lines . DT.lines
where
n' = n + (round $ (fromIntegral n) / (2 :: Double))
doTitle :: [Text] -> Maybe (Text, Text) doTitle :: [Text] -> Maybe (Text, Text)
doTitle (t:ts) = Just (t, DT.concat ts) doTitle (t:ts) = Just (t, DT.concat ts)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment