1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
{-|
Module : Gargantext.Text.Parsers.RIS.Presse
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Presse RIS format parser for Europresse Database.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers.RIS.Presse (presseEnrich) where
import Data.List (lookup)
import Data.Either (either)
import Data.Tuple.Extra (first, both, uncurry)
import Data.Attoparsec.ByteString (parseOnly)
import Data.ByteString (ByteString, length)
import Gargantext.Prelude hiding (takeWhile, take, length)
import Gargantext.Text.Parsers.RIS (onField)
import Gargantext.Core (Lang(..))
import qualified Gargantext.Text.Parsers.Date.Attoparsec as Date
presseEnrich :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
presseEnrich = (onField "DA" parseDate)
. (onField "LA" parseLang)
. fixFields
parseDate :: ByteString -> [(ByteString, ByteString)]
parseDate str = either (const []) identity $ parseOnly (Date.parserWith "/") str
parseLang :: ByteString -> [(ByteString, ByteString)]
parseLang "Français" = [(langField, cs $ show FR)]
parseLang "English" = [(langField, cs $ show EN)]
parseLang x = [(langField, x)]
langField :: ByteString
langField = "language"
fixFields :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
fixFields ns = map (first fixFields'') ns
where
-- | Title is sometimes longer than abstract
fixFields'' = case uncurry (>) <$> look'' of
Just True -> fixFields' "abstract" "title"
_ -> fixFields' "title" "abstract"
look'' :: Maybe (Int, Int)
look'' = both length <$> look
look :: Maybe (ByteString,ByteString)
look = (,) <$> lookup "TI" ns <*> lookup "N2" ns
fixFields' :: ByteString -> ByteString
-> ByteString -> ByteString
fixFields' title abstract champs
| champs == "AU" = "authors"
| champs == "TI" = title
| champs == "JF" = "source"
| champs == "DI" = "doi"
| champs == "UR" = "url"
| champs == "N2" = abstract
| otherwise = champs