Commit f7578633 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[PARSERS] Iramuteq file format parser

parent 6dd22092
...@@ -5,7 +5,7 @@ cabal-version: 1.12 ...@@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
name: gargantext name: gargantext
version: 0.0.6.9.9.1 version: 0.0.6.9.9.1
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
...@@ -192,6 +192,7 @@ library ...@@ -192,6 +192,7 @@ library
Gargantext.Core.Text.Corpus.Parsers.Date.Attoparsec Gargantext.Core.Text.Corpus.Parsers.Date.Attoparsec
Gargantext.Core.Text.Corpus.Parsers.FrameWrite Gargantext.Core.Text.Corpus.Parsers.FrameWrite
Gargantext.Core.Text.Corpus.Parsers.GrandDebat Gargantext.Core.Text.Corpus.Parsers.GrandDebat
Gargantext.Core.Text.Corpus.Parsers.Iramuteq
Gargantext.Core.Text.Corpus.Parsers.Isidore Gargantext.Core.Text.Corpus.Parsers.Isidore
Gargantext.Core.Text.Corpus.Parsers.Json2Csv Gargantext.Core.Text.Corpus.Parsers.Json2Csv
Gargantext.Core.Text.Corpus.Parsers.RIS Gargantext.Core.Text.Corpus.Parsers.RIS
......
{-|
Module : Gargantext.Core.Text.Corpus.Parsers.WOS
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
module Gargantext.Core.Text.Corpus.Parsers.Iramuteq (parseIramuteqFile, notices) where
import Control.Applicative
import Data.Attoparsec.ByteString (Parser, takeTill, parseOnly)
import Data.Attoparsec.ByteString.Char8 (isEndOfLine, takeWhile, endOfLine)
import Data.ByteString (ByteString)
import Prelude hiding (takeWhile, take, concat, readFile, lines, concat)
import qualified Data.ByteString as DB
parseIramuteqFile :: String -> IO (Either String [[(ByteString, ByteString)]])
parseIramuteqFile fp = do
txts <- DB.readFile fp
pure $ parseOnly notices txts
-------------------------------------------------------------
notices :: Parser [[(ByteString, ByteString)]]
notices = do
ns <- (many notice)
pure ns
notice :: Parser [(ByteString, ByteString)]
notice = do
hs <- headers
ns <- takeWhile (/= '*')
pure $ hs <> [("text", ns)]
-----------------------------------------------------------------
headers :: Parser [(ByteString, ByteString)]
headers = parseOf header fields
header :: Parser ByteString
header = "**** " *> takeTill isEndOfLine <* endOfLine
-----------------------------------------------------------------
fields :: Parser [(ByteString, ByteString)]
fields = many (parseOf field fieldTuple)
field :: Parser ByteString
field = "*" *> takeWhile (/= ' ') <* " "
<|> "*" *> takeWhile (/= '\n')
fieldTuple :: Parser (ByteString, ByteString)
fieldTuple = do
name <- takeWhile (/= '_') <* "_"
rest <- takeWhile (/= '\n')
pure (name,rest)
-----------------------------------------------------------------
constP :: Parser a -> ByteString -> Parser a
constP p t = case parseOnly p t of
Left _ -> empty
Right a -> return a
parseOf :: Parser ByteString -> Parser a -> Parser a
parseOf ptxt pa = bothParse <|> empty
where
bothParse = ptxt >>= constP pa
...@@ -16,7 +16,6 @@ citation programs to exchange data. ...@@ -16,7 +16,6 @@ citation programs to exchange data.
-} -}
module Gargantext.Core.Text.Corpus.Parsers.RIS (parser, onField, fieldWith, lines) where module Gargantext.Core.Text.Corpus.Parsers.RIS (parser, onField, fieldWith, lines) where
import Data.List (lookup) import Data.List (lookup)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment