1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
{-|
Module : Gargantext.Text.Corpus.Parsers.Json2Csv
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Json parser to export towoard CSV GargV3 format.
(Export from the Patent Database.)
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Text.Corpus.Parsers.Json2Csv (json2csv, readPatents)
where
import Prelude (read)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.ByteString.Lazy (readFile)
import Data.Text (Text, unpack)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude
import System.IO (FilePath)
import Gargantext.Text.Corpus.Parsers.CSV (CsvDoc(..), writeFile, headerCsvGargV3)
import Data.Vector (fromList)
data Patent = Patent { _patent_title :: Text
, _patent_abstract :: Text
, _patent_year :: Text
, _patent_id :: Text
} deriving (Show)
$(deriveJSON (unPrefix "_patent_") ''Patent)
readPatents :: FilePath -> IO (Maybe [Patent])
readPatents fp = decode <$> readFile fp
type FilePathIn = FilePath
type FilePathOut = FilePath
json2csv :: FilePathIn -> FilePathOut -> IO ()
json2csv fin fout = do
patents <- maybe (panic "json2csv error") identity <$> readPatents fin
writeFile fout (headerCsvGargV3, fromList $ map patent2csvDoc patents)
patent2csvDoc :: Patent -> CsvDoc
patent2csvDoc (Patent title abstract year _) =
CsvDoc title "Source" (read (unpack year)) 1 1 abstract "Authors"