Commit ea0fe616 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[PARSERS] Iramuteq file format parser

parent aa5bfd52
{-|
Module : Gargantext.Core.Text.Corpus.Parsers.WOS
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
module Gargantext.Core.Text.Corpus.Parsers.Iramuteq (parseIramuteqFile, notices) where
import Control.Applicative
import Data.Attoparsec.ByteString (Parser, takeTill, parseOnly)
import Data.Attoparsec.ByteString.Char8 (isEndOfLine, takeWhile, endOfLine)
import Data.ByteString (ByteString)
import Prelude hiding (takeWhile, take, concat, readFile, lines, concat)
import qualified Data.ByteString as DB
parseIramuteqFile :: String -> IO (Either String [[(ByteString, ByteString)]])
parseIramuteqFile fp = do
txts <- DB.readFile fp
pure $ parseOnly notices txts
-------------------------------------------------------------
notices :: Parser [[(ByteString, ByteString)]]
notices = do
ns <- (many notice)
pure ns
notice :: Parser [(ByteString, ByteString)]
notice = do
hs <- headers
ns <- takeWhile (/= '*')
pure $ hs <> [("text", ns)]
-----------------------------------------------------------------
headers :: Parser [(ByteString, ByteString)]
headers = parseOf header fields
header :: Parser ByteString
header = "**** " *> takeTill isEndOfLine <* endOfLine
-----------------------------------------------------------------
fields :: Parser [(ByteString, ByteString)]
fields = many (parseOf field fieldTuple)
field :: Parser ByteString
field = "*" *> takeWhile (/= ' ') <* " "
<|> "*" *> takeWhile (/= '\n')
fieldTuple :: Parser (ByteString, ByteString)
fieldTuple = do
name <- takeWhile (/= '_') <* "_"
rest <- takeWhile (/= '\n')
pure (name,rest)
-----------------------------------------------------------------
constP :: Parser a -> ByteString -> Parser a
constP p t = case parseOnly p t of
Left _ -> empty
Right a -> return a
parseOf :: Parser ByteString -> Parser a -> Parser a
parseOf ptxt pa = bothParse <|> empty
where
bothParse = ptxt >>= constP pa
......@@ -16,7 +16,6 @@ citation programs to exchange data.
-}
module Gargantext.Core.Text.Corpus.Parsers.RIS (parser, onField, fieldWith, lines) where
import Data.List (lookup)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment