Commit 3a8af37d authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] prepareText

parent 58ad4a3e
...@@ -17,7 +17,7 @@ that could be the incarnation of the mythic Gargantua. ...@@ -17,7 +17,7 @@ that could be the incarnation of the mythic Gargantua.
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Core.Text.Clean module Gargantext.Core.Text.Prepare
where where
import Data.Text (Text) import Data.Text (Text)
...@@ -40,21 +40,21 @@ prepareText p txt = groupText p ...@@ -40,21 +40,21 @@ prepareText p txt = groupText p
--------------------------------------------------------------------- ---------------------------------------------------------------------
groupText :: Paragraph -> [Text] -> [Text] groupText :: Paragraph -> [Text] -> [Text]
groupText (Uniform g s) = groupUniform g s groupText (Uniform blockSize) = groupUniform blockSize
groupText AuthorLike = groupLines groupText AuthorLike = groupLines
--------------------------------------------------------------------- ---------------------------------------------------------------------
data Paragraph = Uniform Grain Step | AuthorLike data Paragraph = Uniform Grain | AuthorLike
-- Uniform does not preserve the paragraphs of the author but length of paragraphs is uniform -- Uniform does not preserve the paragraphs of the author but length of paragraphs is uniform
-- Author Like preserve the paragraphs of the Author but length of paragraphs is not uniform -- Author Like preserve the paragraphs of the Author but length of paragraphs is not uniform
-- Grain: number of Sentences by block of Text -- Grain: number of Sentences by block of Text
-- Step : overlap of sentence between connex block of Text -- Step : overlap of sentence between connex block of Text
groupUniform :: Grain -> Step -> [Text] -> [Text] groupUniform :: Grain -> [Text] -> [Text]
groupUniform g s ts = map (Text.intercalate " ") groupUniform g ts = map (Text.intercalate " ")
$ chunkAlong g s $ chunkAlong g g
$ sentences $ sentences
$ Text.concat ts $ Text.concat ts
groupLines :: [Text] -> [Text] groupLines :: [Text] -> [Text]
groupLines xxx@(a:b:xs) = groupLines xxx@(a:b:xs) =
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment