RIS.hs 2.17 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
{-|
Module      : Gargantext.Text.Parsers.RIS
Description : 
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX


RIS is a standardized tag format developed by Research Information
Systems, Incorporated (the format name refers to the company) to enable
13 14 15
citation programs to exchange data.

[More](https://en.wikipedia.org/wiki/RIS_(file_format))
16 17 18 19 20 21

-}

{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}

22
module Gargantext.Text.Parsers.RIS (parser, onField, fieldWith, lines) where
23

24
import Data.List (lookup)
25
import Control.Applicative
26 27 28
import Data.Attoparsec.ByteString (Parser, try, takeTill, take, many1)
import Data.Attoparsec.ByteString.Char8 (isEndOfLine)
import Data.ByteString (ByteString, concat)
29
import Data.Monoid ((<>))
30
import Gargantext.Prelude hiding (takeWhile, take)
31 32 33
import qualified Data.List as DL
-------------------------------------------------------------

34 35
parser :: Parser [[(ByteString, ByteString)]]
parser = do
Alexandre Delanoë's avatar
Alexandre Delanoë committed
36 37 38
    n  <- notice "TY  -"
    ns <- many1 (notice "\nTY  -")
    pure $ [n] <> ns
39

Alexandre Delanoë's avatar
Alexandre Delanoë committed
40
notice :: Parser ByteString -> Parser [(ByteString, ByteString)]
41
notice s = start *> many (fieldWith field)  <* end
42
    where
43 44 45
      field :: Parser ByteString
      field = "\n" *> take 2 <* "  - "

46
      start :: Parser ByteString
Alexandre Delanoë's avatar
Alexandre Delanoë committed
47
      start = s *> takeTill isEndOfLine
48 49

      end :: Parser ByteString
Alexandre Delanoë's avatar
Alexandre Delanoë committed
50
      end =  "\nER  -" *> takeTill isEndOfLine
51

52 53 54 55

fieldWith :: Parser ByteString -> Parser (ByteString, ByteString)
fieldWith n = do
    name  <- n
Alexandre Delanoë's avatar
Alexandre Delanoë committed
56
    txt   <- takeTill isEndOfLine
57 58 59 60
    txts  <- try lines
    let txts' = case DL.length txts > 0 of
            True  -> txts
            False -> []
61
    pure (name, concat ([txt] <> txts'))
62

63

64 65 66 67
lines :: Parser [ByteString]
lines = many line
    where
        line :: Parser ByteString
68
        line = "\n " *> takeTill isEndOfLine
69 70

-------------------------------------------------------------
71 72
-- Field for First elem of a Tuple, Key for corresponding Map
onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
73
       -> [(ByteString, ByteString)] -> [(ByteString, ByteString)]
74
onField k f m = m <> ( maybe [] f (lookup k m) )
75