Commit 18968540 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[PARSERS] RIS/PRESSE fix title and abstract field.

parent d388d621
Pipeline #370 failed with stage
......@@ -19,7 +19,7 @@ citation programs to exchange data.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers.RIS (parser, withField, fieldWith, lines) where
module Gargantext.Text.Parsers.RIS (parser, onField, fieldWith, lines) where
import Data.List (lookup)
import Control.Applicative
......@@ -68,7 +68,8 @@ lines = many line
line = "\n " *> takeTill isEndOfLine
-------------------------------------------------------------
withField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
-- Field for First elem of a Tuple, Key for corresponding Map
onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
-> [(ByteString, ByteString)] -> [(ByteString, ByteString)]
withField k f m = m <> ( maybe [] f (lookup k m) )
onField k f m = m <> ( maybe [] f (lookup k m) )
......@@ -7,7 +7,7 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Presse RIS format parser en enricher.
Presse RIS format parser for Europresse Database.
-}
......@@ -16,41 +16,60 @@ Presse RIS format parser en enricher.
module Gargantext.Text.Parsers.RIS.Presse (presseEnrich) where
import Data.List (lookup)
import Data.Either (either)
import Data.Tuple.Extra (first)
import Data.Tuple.Extra (first, both, uncurry)
import Data.Attoparsec.ByteString (parseOnly)
import Data.ByteString (ByteString)
import Gargantext.Prelude hiding (takeWhile, take)
import Gargantext.Text.Parsers.RIS (withField)
import Data.ByteString (ByteString, length)
import Gargantext.Prelude hiding (takeWhile, take, length)
import Gargantext.Text.Parsers.RIS (onField)
import Gargantext.Core (Lang(..))
import qualified Gargantext.Text.Parsers.Date.Attoparsec as Date
-------------------------------------------------------------
-------------------------------------------------------------
presseEnrich :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
presseEnrich = (withField "DA" presseDate)
. (withField "LA" presseLang)
. (map (first presseFields))
presseDate :: ByteString -> [(ByteString, ByteString)]
presseDate str = either (const []) identity $ parseOnly (Date.parserWith "/") str
presseLang :: ByteString -> [(ByteString, ByteString)]
presseLang "Français" = [("language", "FR")]
presseLang "English" = [("language", "EN")]
presseLang x = [("language", x)]
presseFields :: ByteString -> ByteString
presseFields champs
| champs == "AU" = "authors"
| champs == "TI" = "title"
| champs == "JF" = "source"
| champs == "DI" = "doi"
| champs == "UR" = "url"
| champs == "N2" = "abstract"
| otherwise = champs
{-
fixTitle :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
fixTitle ns = ns <> [ti, ab]
presseEnrich = (onField "DA" parseDate)
. (onField "LA" parseLang)
. fixFields
parseDate :: ByteString -> [(ByteString, ByteString)]
parseDate str = either (const []) identity $ parseOnly (Date.parserWith "/") str
parseLang :: ByteString -> [(ByteString, ByteString)]
parseLang "Français" = [(langField, cs $ show FR)]
parseLang "English" = [(langField, cs $ show EN)]
parseLang x = [(langField, x)]
langField :: ByteString
langField = "language"
fixFields :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
fixFields ns = map (first fixFields'') ns
where
ti = case
-}
-- | Title is sometimes longer than abstract
fixFields'' = case uncurry (>) <$> look'' of
Just True -> fixFields' "abstract" "title"
_ -> fixFields' "title" "abstract"
look'' :: Maybe (Int, Int)
look'' = both length <$> look
look :: Maybe (ByteString,ByteString)
look = (,) <$> lookup "TI" ns <*> lookup "N2" ns
fixFields' :: ByteString -> ByteString
-> ByteString -> ByteString
fixFields' title abstract champs
| champs == "AU" = "authors"
| champs == "TI" = title
| champs == "JF" = "source"
| champs == "DI" = "doi"
| champs == "UR" = "url"
| champs == "N2" = abstract
| otherwise = champs
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment