[framewrite] parser is working now, much better

Order of authors, date, source doesn't matter and we can interleave contents.
parent 3305c248
Pipeline #1852 passed with stage
in 34 minutes and 35 seconds
...@@ -12,7 +12,7 @@ rec { ...@@ -12,7 +12,7 @@ rec {
git git
gmp gmp
gsl gsl
haskell-language-server #haskell-language-server
hlint hlint
igraph igraph
liblapack liblapack
......
...@@ -2,12 +2,12 @@ module Gargantext.Core.Text.Corpus.Parsers.FrameWrite where ...@@ -2,12 +2,12 @@ module Gargantext.Core.Text.Corpus.Parsers.FrameWrite where
import Control.Applicative ((*>)) import Control.Applicative ((*>))
import Control.Monad (void) import Control.Monad (void)
import Data.Either
import Data.Maybe import Data.Maybe
import Data.Text import Data.Text hiding (foldl)
import Gargantext.Prelude import Gargantext.Prelude
import Prelude (String, (++)) import Prelude ((++))
import Text.Parsec hiding (Line) import Text.Parsec hiding (Line)
import Text.Parsec.Combinator
import Text.Parsec.String import Text.Parsec.String
...@@ -22,6 +22,7 @@ import Text.Parsec.String ...@@ -22,6 +22,7 @@ import Text.Parsec.String
-- par défaut: un doc == 1 NodeWrite -- par défaut: un doc == 1 NodeWrite
-- ## mean each ## section will be a new document with title the subsubsection title. Either it features options for author, date etc. or it will inherit the document's option. -- ## mean each ## section will be a new document with title the subsubsection title. Either it features options for author, date etc. or it will inherit the document's option.
sample :: Text
sample = sample =
unlines unlines
[ "title1" [ "title1"
...@@ -34,6 +35,7 @@ sample = ...@@ -34,6 +35,7 @@ sample =
, "document contents 2" , "document contents 2"
] ]
sampleUnordered :: Text
sampleUnordered = sampleUnordered =
unlines unlines
[ "title1" [ "title1"
...@@ -46,10 +48,12 @@ sampleUnordered = ...@@ -46,10 +48,12 @@ sampleUnordered =
, "document contents 2" , "document contents 2"
] ]
parseSample = parse documentP "sample" (unpack sample) -- parseSample = parse documentP "sample" (unpack sample)
parseSampleUnordered = parse documentP "sampleUnordered" (unpack sampleUnordered) -- parseSampleUnordered = parse documentP "sampleUnordered" (unpack sampleUnordered)
parseLinesSample = parse documentLinesP "sample" (unpack sample) parseLinesSample :: Either ParseError Parsed
parseLinesSampleUnordered = parse documentLinesP "sampleUnordered" (unpack sampleUnordered) parseLinesSample = parseLines sample
parseLinesSampleUnordered :: Either ParseError Parsed
parseLinesSampleUnordered = parseLines sampleUnordered
data Author = data Author =
Author { firstName :: Text Author { firstName :: Text
...@@ -64,6 +68,7 @@ data Parsed = ...@@ -64,6 +68,7 @@ data Parsed =
, contents :: Text } , contents :: Text }
deriving (Show) deriving (Show)
emptyParsed :: Parsed
emptyParsed = emptyParsed =
Parsed { title = "" Parsed { title = ""
, authors = [] , authors = []
...@@ -73,26 +78,27 @@ emptyParsed = ...@@ -73,26 +78,27 @@ emptyParsed =
data Line = data Line =
LAuthors [Author] LAuthors [Author]
| LContents Text
| LDate Text | LDate Text
| LSource Text | LSource Text
| LContents Text
| LTitle Text | LTitle Text
deriving (Show) deriving (Show)
parseLines :: Text -> Parsed parseLines :: Text -> Either ParseError Parsed
parseLines text = foldl f emptyParsed lst parseLines text = foldl f emptyParsed <$> lst
where where
lst = parse documentLinesP "" (unpack text) lst = parse documentLinesP "" (unpack text)
f (Parsed { .. }) (LAuthors as) = Parsed { authors = as, .. } f (Parsed { .. }) (LAuthors as) = Parsed { authors = as, .. }
f (Parsed { .. }) (LDate d ) = Parsed { date = d, .. } f (Parsed { .. }) (LContents c) = Parsed { contents = concat [contents, c], .. }
f (Parsed { .. }) (LSource s ) = Parsed { source = s, .. } f (Parsed { .. }) (LDate d ) = Parsed { date = Just d, .. }
f (Parsed { .. }) (LContents c) = Parsed { contents = contents ++ c, .. } f (Parsed { .. }) (LSource s ) = Parsed { source = Just s, .. }
f (Parsed { .. }) (LTitle t ) = Parsed { title = t, .. } f (Parsed { .. }) (LTitle t ) = Parsed { title = t, .. }
documentLinesP :: Parser [Line]
documentLinesP = do documentLinesP = do
t <- titleP t <- titleP
lines <- lineP `sepBy` newline ls <- lineP `sepBy` newline
pure $ [LTitle $ pack t] ++ lines pure $ [LTitle $ pack t] ++ ls
lineP :: Parser Line lineP :: Parser Line
lineP = do lineP = do
...@@ -101,43 +107,49 @@ lineP = do ...@@ -101,43 +107,49 @@ lineP = do
, try sourceLineP , try sourceLineP
, contentsLineP ] , contentsLineP ]
authorsLineP :: Parser Line
authorsLineP = do authorsLineP = do
authors <- authorsP authors <- authorsP
pure $ LAuthors authors pure $ LAuthors authors
dateLineP :: Parser Line
dateLineP = do dateLineP = do
date <- dateP date <- dateP
pure $ LDate $ pack date pure $ LDate $ pack date
sourceLineP :: Parser Line
sourceLineP = do sourceLineP = do
source <- sourceP source <- sourceP
pure $ LSource $ pack source pure $ LSource $ pack source
contentsLineP :: Parser Line
contentsLineP = do contentsLineP = do
contents <- many (noneOf "\n") contents <- many (noneOf "\n")
pure $ LContents $ pack contents pure $ LContents $ pack contents
-------------------- --------------------
documentP = do -- documentP = do
t <- titleP -- t <- titleP
a <- optionMaybe authorsP -- a <- optionMaybe authorsP
d <- optionMaybe dateP -- d <- optionMaybe dateP
s <- optionMaybe sourceP -- s <- optionMaybe sourceP
c <- contentsP -- c <- contentsP
pure $ Parsed { title = pack t -- pure $ Parsed { title = pack t
, authors = fromMaybe [] a -- , authors = fromMaybe [] a
, date = pack <$> d -- , date = pack <$> d
, source = pack <$> s -- , source = pack <$> s
, contents = pack c } -- , contents = pack c }
titleDelimiterP :: Parser ()
titleDelimiterP = do titleDelimiterP = do
newline _ <- newline
string "==" _ <- string "=="
tokenEnd tokenEnd
titleP :: Parser [Char] titleP :: Parser [Char]
titleP = manyTill anyChar (try titleDelimiterP) titleP = manyTill anyChar (try titleDelimiterP)
authorsPrefixP :: Parser [Char]
authorsPrefixP = do authorsPrefixP = do
_ <- string "^@@authors:" _ <- string "^@@authors:"
many (char ' ') many (char ' ')
...@@ -153,6 +165,7 @@ authorP = do ...@@ -153,6 +165,7 @@ authorP = do
pure $ Author { firstName = pack fn, lastName = pack ln } pure $ Author { firstName = pack fn, lastName = pack ln }
-- manyTill anyChar (void (char '\n') <|> eof) -- manyTill anyChar (void (char '\n') <|> eof)
datePrefixP :: Parser [Char]
datePrefixP = do datePrefixP = do
_ <- string "^@@date:" _ <- string "^@@date:"
many (char ' ') many (char ' ')
...@@ -160,6 +173,7 @@ dateP :: Parser [Char] ...@@ -160,6 +173,7 @@ dateP :: Parser [Char]
dateP = try datePrefixP dateP = try datePrefixP
*> many (noneOf "\n") *> many (noneOf "\n")
sourcePrefixP :: Parser [Char]
sourcePrefixP = do sourcePrefixP = do
_ <- string "^@@source:" _ <- string "^@@source:"
many (char ' ') many (char ' ')
...@@ -167,7 +181,8 @@ sourceP :: Parser [Char] ...@@ -167,7 +181,8 @@ sourceP :: Parser [Char]
sourceP = try sourcePrefixP sourceP = try sourcePrefixP
*> many (noneOf "\n") *> many (noneOf "\n")
contentsP :: Parser String -- contentsP :: Parser String
contentsP = many anyChar -- contentsP = many anyChar
tokenEnd :: Parser ()
tokenEnd = void (char '\n') <|> eof tokenEnd = void (char '\n') <|> eof
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment