module Gargantext.RCT where

import Gargantext.Prelude

foo :: Int
foo = undefined
--import Data.Text (Text, words)
--import Data.Attoparsec.Text (anyChar, isEndOfLine, Parser, takeTill, many1, endOfLine, space, manyTill)
--import Control.Applicative (many)

-- RCT is the acronym for Referential ConText (of Text)
-- at the begin there was a byte
-- then a char 
-- Char -> RCT [Char]

-- then a list of chars called a string, we call it a Form
-- (removing all weird charachters which are not alphanumeric)

-- Form -> RCT Sentence

-- These forms compose the RCT Sentence
-- an ngrams is composed with multiple forms

-- Paragraph = [Sentence]

-- type Title = Paragraph
-- data Block = [Paragraph]
-- Block is taken form Pandoc

-- data Document = [Block]

-- Set of databases
-- Database
-- Set of Articles
--      Article
--      Paragraph (abstract + title)
-- Sentence - Ngrams - Forms



--separateurs :: Parser Text
--separateurs = dropWhile isEndOfLine

--paragraphs :: Parser [Text]
--paragraphs = many paragraph
--
--paragraph :: Parser Text
--paragraph = takeTill isEndOfLine <* many1 endOfLine
--
-- forms :: Text -> [Text]
-- forms = words