Commit 18968540 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[PARSERS] RIS/PRESSE fix title and abstract field.

parent d388d621
...@@ -19,7 +19,7 @@ citation programs to exchange data. ...@@ -19,7 +19,7 @@ citation programs to exchange data.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers.RIS (parser, withField, fieldWith, lines) where module Gargantext.Text.Parsers.RIS (parser, onField, fieldWith, lines) where
import Data.List (lookup) import Data.List (lookup)
import Control.Applicative import Control.Applicative
...@@ -68,7 +68,8 @@ lines = many line ...@@ -68,7 +68,8 @@ lines = many line
line = "\n " *> takeTill isEndOfLine line = "\n " *> takeTill isEndOfLine
------------------------------------------------------------- -------------------------------------------------------------
withField :: ByteString -> (ByteString -> [(ByteString, ByteString)]) -- Field for First elem of a Tuple, Key for corresponding Map
onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
-> [(ByteString, ByteString)] -> [(ByteString, ByteString)] -> [(ByteString, ByteString)] -> [(ByteString, ByteString)]
withField k f m = m <> ( maybe [] f (lookup k m) ) onField k f m = m <> ( maybe [] f (lookup k m) )
...@@ -7,7 +7,7 @@ Maintainer : team@gargantext.org ...@@ -7,7 +7,7 @@ Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
Presse RIS format parser en enricher. Presse RIS format parser for Europresse Database.
-} -}
...@@ -16,41 +16,60 @@ Presse RIS format parser en enricher. ...@@ -16,41 +16,60 @@ Presse RIS format parser en enricher.
module Gargantext.Text.Parsers.RIS.Presse (presseEnrich) where module Gargantext.Text.Parsers.RIS.Presse (presseEnrich) where
import Data.List (lookup)
import Data.Either (either) import Data.Either (either)
import Data.Tuple.Extra (first) import Data.Tuple.Extra (first, both, uncurry)
import Data.Attoparsec.ByteString (parseOnly) import Data.Attoparsec.ByteString (parseOnly)
import Data.ByteString (ByteString) import Data.ByteString (ByteString, length)
import Gargantext.Prelude hiding (takeWhile, take) import Gargantext.Prelude hiding (takeWhile, take, length)
import Gargantext.Text.Parsers.RIS (withField) import Gargantext.Text.Parsers.RIS (onField)
import Gargantext.Core (Lang(..))
import qualified Gargantext.Text.Parsers.Date.Attoparsec as Date import qualified Gargantext.Text.Parsers.Date.Attoparsec as Date
-------------------------------------------------------------
-------------------------------------------------------------
presseEnrich :: [(ByteString, ByteString)] -> [(ByteString, ByteString)] presseEnrich :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
presseEnrich = (withField "DA" presseDate) presseEnrich = (onField "DA" parseDate)
. (withField "LA" presseLang) . (onField "LA" parseLang)
. (map (first presseFields)) . fixFields
parseDate :: ByteString -> [(ByteString, ByteString)]
parseDate str = either (const []) identity $ parseOnly (Date.parserWith "/") str
parseLang :: ByteString -> [(ByteString, ByteString)]
parseLang "Français" = [(langField, cs $ show FR)]
parseLang "English" = [(langField, cs $ show EN)]
parseLang x = [(langField, x)]
langField :: ByteString
langField = "language"
presseDate :: ByteString -> [(ByteString, ByteString)]
presseDate str = either (const []) identity $ parseOnly (Date.parserWith "/") str
presseLang :: ByteString -> [(ByteString, ByteString)] fixFields :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
presseLang "Français" = [("language", "FR")] fixFields ns = map (first fixFields'') ns
presseLang "English" = [("language", "EN")] where
presseLang x = [("language", x)] -- | Title is sometimes longer than abstract
fixFields'' = case uncurry (>) <$> look'' of
Just True -> fixFields' "abstract" "title"
_ -> fixFields' "title" "abstract"
look'' :: Maybe (Int, Int)
look'' = both length <$> look
look :: Maybe (ByteString,ByteString)
look = (,) <$> lookup "TI" ns <*> lookup "N2" ns
presseFields :: ByteString -> ByteString fixFields' :: ByteString -> ByteString
presseFields champs -> ByteString -> ByteString
fixFields' title abstract champs
| champs == "AU" = "authors" | champs == "AU" = "authors"
| champs == "TI" = "title" | champs == "TI" = title
| champs == "JF" = "source" | champs == "JF" = "source"
| champs == "DI" = "doi" | champs == "DI" = "doi"
| champs == "UR" = "url" | champs == "UR" = "url"
| champs == "N2" = "abstract" | champs == "N2" = abstract
| otherwise = champs | otherwise = champs
{-
fixTitle :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
fixTitle ns = ns <> [ti, ab]
where
ti = case
-}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment