Commit 18968540 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[PARSERS] RIS/PRESSE fix title and abstract field.

parent d388d621
...@@ -19,7 +19,7 @@ citation programs to exchange data. ...@@ -19,7 +19,7 @@ citation programs to exchange data.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers.RIS (parser, withField, fieldWith, lines) where module Gargantext.Text.Parsers.RIS (parser, onField, fieldWith, lines) where
import Data.List (lookup) import Data.List (lookup)
import Control.Applicative import Control.Applicative
...@@ -68,7 +68,8 @@ lines = many line ...@@ -68,7 +68,8 @@ lines = many line
line = "\n " *> takeTill isEndOfLine line = "\n " *> takeTill isEndOfLine
------------------------------------------------------------- -------------------------------------------------------------
withField :: ByteString -> (ByteString -> [(ByteString, ByteString)]) -- Field for First elem of a Tuple, Key for corresponding Map
onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
-> [(ByteString, ByteString)] -> [(ByteString, ByteString)] -> [(ByteString, ByteString)] -> [(ByteString, ByteString)]
withField k f m = m <> ( maybe [] f (lookup k m) ) onField k f m = m <> ( maybe [] f (lookup k m) )
...@@ -7,7 +7,7 @@ Maintainer : team@gargantext.org ...@@ -7,7 +7,7 @@ Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
Presse RIS format parser en enricher. Presse RIS format parser for Europresse Database.
-} -}
...@@ -16,41 +16,60 @@ Presse RIS format parser en enricher. ...@@ -16,41 +16,60 @@ Presse RIS format parser en enricher.
module Gargantext.Text.Parsers.RIS.Presse (presseEnrich) where module Gargantext.Text.Parsers.RIS.Presse (presseEnrich) where
import Data.List (lookup)
import Data.Either (either) import Data.Either (either)
import Data.Tuple.Extra (first) import Data.Tuple.Extra (first, both, uncurry)
import Data.Attoparsec.ByteString (parseOnly) import Data.Attoparsec.ByteString (parseOnly)
import Data.ByteString (ByteString) import Data.ByteString (ByteString, length)
import Gargantext.Prelude hiding (takeWhile, take) import Gargantext.Prelude hiding (takeWhile, take, length)
import Gargantext.Text.Parsers.RIS (withField) import Gargantext.Text.Parsers.RIS (onField)
import Gargantext.Core (Lang(..))
import qualified Gargantext.Text.Parsers.Date.Attoparsec as Date import qualified Gargantext.Text.Parsers.Date.Attoparsec as Date
-------------------------------------------------------------
-------------------------------------------------------------
presseEnrich :: [(ByteString, ByteString)] -> [(ByteString, ByteString)] presseEnrich :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
presseEnrich = (withField "DA" presseDate) presseEnrich = (onField "DA" parseDate)
. (withField "LA" presseLang) . (onField "LA" parseLang)
. (map (first presseFields)) . fixFields
presseDate :: ByteString -> [(ByteString, ByteString)]
presseDate str = either (const []) identity $ parseOnly (Date.parserWith "/") str parseDate :: ByteString -> [(ByteString, ByteString)]
parseDate str = either (const []) identity $ parseOnly (Date.parserWith "/") str
presseLang :: ByteString -> [(ByteString, ByteString)]
presseLang "Français" = [("language", "FR")] parseLang :: ByteString -> [(ByteString, ByteString)]
presseLang "English" = [("language", "EN")] parseLang "Français" = [(langField, cs $ show FR)]
presseLang x = [("language", x)] parseLang "English" = [(langField, cs $ show EN)]
parseLang x = [(langField, x)]
presseFields :: ByteString -> ByteString
presseFields champs langField :: ByteString
| champs == "AU" = "authors" langField = "language"
| champs == "TI" = "title"
| champs == "JF" = "source"
| champs == "DI" = "doi" fixFields :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
| champs == "UR" = "url" fixFields ns = map (first fixFields'') ns
| champs == "N2" = "abstract"
| otherwise = champs
{-
fixTitle :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
fixTitle ns = ns <> [ti, ab]
where where
ti = case -- | Title is sometimes longer than abstract
-} fixFields'' = case uncurry (>) <$> look'' of
Just True -> fixFields' "abstract" "title"
_ -> fixFields' "title" "abstract"
look'' :: Maybe (Int, Int)
look'' = both length <$> look
look :: Maybe (ByteString,ByteString)
look = (,) <$> lookup "TI" ns <*> lookup "N2" ns
fixFields' :: ByteString -> ByteString
-> ByteString -> ByteString
fixFields' title abstract champs
| champs == "AU" = "authors"
| champs == "TI" = title
| champs == "JF" = "source"
| champs == "DI" = "doi"
| champs == "UR" = "url"
| champs == "N2" = abstract
| otherwise = champs
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment