Commit df6f1dde authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] Add more redundancies to texts Notes

parent fda25302
Pipeline #3868 failed with stage
in 30 minutes and 33 seconds
......@@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack
name: gargantext
version: 0.0.6.9.8.5.1
version: 0.0.6.9.8.5.1
synopsis: Search, map, share
description: Please see README.md
category: Data
......
......@@ -218,13 +218,14 @@ dateISOP = do
rd = read :: [Char] -> Integer
number = many1 digit
sourcePrefixP :: Parser [Char]
sourcePrefixP = do
_ <- string "source:"
many (char ' ')
sourceP :: Parser [Char]
sourceP = try sourcePrefixP
*> many (noneOf "\n")
where
sourcePrefixP :: Parser [Char]
sourcePrefixP = do
_ <- string "source:"
many (char ' ')
-- contentsP :: Parser String
-- contentsP = many anyChar
......@@ -233,15 +234,19 @@ tokenEnd :: Parser ()
tokenEnd = void (char '\n') <|> eof
--- MISC Tools
-- Using ChunkAlong here enable redundancies in short corpora of texts
-- maybe use splitEvery or chunkAlong depending on the size of the whole text
text2titleParagraphs :: Int -> Text -> [(Text, Text)]
text2titleParagraphs n = catMaybes
. List.map doTitle
. (splitEvery n)
. (chunkAlong n' n)
-- . (splitEvery n)
. sentences
. DT.intercalate " " -- ". "
. List.filter (/= "")
. DT.lines
where
n' = n + (round $ (fromIntegral n) / (2 :: Double))
doTitle :: [Text] -> Maybe (Text, Text)
doTitle (t:ts) = Just (t, DT.concat ts)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment