{-|
Module      : Gargantext.Core.Text.Corpus.Parsers.Json2Csv
Description : 
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

Json parser to export towoard CSV GargV3 format.
(Export from the Patent Database.)

-}

{-# LANGUAGE TemplateHaskell   #-}

module Gargantext.Core.Text.Corpus.Parsers.Json2Csv (json2tsv, readPatents)
  where

import Data.Aeson ( decode )
import Data.ByteString.Lazy (readFile)
import Data.Text (unpack)
import Data.Vector (fromList)
import Gargantext.Core.Text.Corpus.Parsers.TSV (TsvDoc(..), writeFile, headerTsvGargV3)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (readFile, writeFile)
import Prelude (read)

data Patent = Patent { _patent_title    :: Text
                     , _patent_abstract :: Text
                     , _patent_year     :: Text
                     , _patent_id       :: Text
 } deriving (Show)

$(deriveJSON (unPrefix "_patent_") ''Patent)

readPatents :: FilePath -> IO (Maybe [Patent])
readPatents fp = decode <$> readFile fp

type FilePathIn  = FilePath
type FilePathOut = FilePath

json2tsv :: FilePathIn -> FilePathOut -> IO ()
json2tsv fin fout = do
  patents <- maybe (panicTrace "json2tsv error") identity <$> readPatents fin
  writeFile fout (headerTsvGargV3, fromList $ map patent2tsvDoc patents)

patent2tsvDoc :: Patent -> TsvDoc
patent2tsvDoc (Patent { .. }) =
  TsvDoc { tsv_title = _patent_title
         , tsv_source = "Source"
         , tsv_publication_year = Just $ read (unpack _patent_year)
         , tsv_publication_month = Just $ Defaults.month
         , tsv_publication_day = Just $ Defaults.day
         , tsv_abstract = _patent_abstract
         , tsv_authors = "Authors" 
         , tsv_institutes = Nothing }





