{-|
Module      : Gargantext.Core.Text.Corpus.Parsers.TSV.TsvHal
Description :
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}

module Gargantext.Core.Text.Corpus.Parsers.TSV.TsvHal where

import Data.ByteString.Lazy qualified as BL
import Data.Csv
import Data.Text (pack)
import Data.Time.Segment (jour)
import Data.Vector (Vector)
import Data.Vector qualified as V
import Gargantext.Core.Text.Corpus.Parsers.TSV.Types (Delimiter(..))
import Gargantext.Core.Text.Corpus.Parsers.TSV.Utils (readTsvLazyBS)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Prelude


------------------------------------------------------------------------
-- Hal Format
data TsvHal = TsvHal
    { tsvHal_title  :: !Text
    , tsvHal_source :: !Text
    , tsvHal_publication_year  :: !Integer
    , tsvHal_publication_month :: !Int
    , tsvHal_publication_day   :: !Int
    , tsvHal_abstract          :: !Text
    , tsvHal_authors           :: !Text

    , tsvHal_url               :: !Text
    , tsvHal_isbn_s            :: !Text
    , tsvHal_issue_s           :: !Text
    , tsvHal_journalPublisher_s:: !Text
    , tsvHal_language_s        :: !Text

    , tsvHal_doiId_s           :: !Text
    , tsvHal_authId_i          :: !Text
    , tsvHal_instStructId_i    :: !Text
    , tsvHal_deptStructId_i    :: !Text
    , tsvHal_labStructId_i     :: !Text

    , tsvHal_rteamStructId_i   :: !Text
    , tsvHal_docType_s         :: !Text
    }
    deriving (Show)

instance FromNamedRecord TsvHal where
  parseNamedRecord r = do
    tsvHal_title <- r .: "title"
    tsvHal_source <- r .: "source"
    tsvHal_publication_year <- r .: "publication_year"
    tsvHal_publication_month <- r .: "publication_month"
    tsvHal_publication_day <- r .: "publication_day"
    tsvHal_abstract <- r .: "abstract"
    tsvHal_authors <- r .: "authors"
    tsvHal_url <- r .: "url"
    tsvHal_isbn_s <- r .: "isbn_s"
    tsvHal_issue_s <- r .: "issue_s"
    tsvHal_journalPublisher_s <- r .: "journalPublisher_s"
    tsvHal_language_s <- r .: "language_s"
    tsvHal_doiId_s <- r .: "doiId_s"
    tsvHal_authId_i <- r .: "authId_i"
    tsvHal_instStructId_i <- r .: "instStructId_i"
    tsvHal_deptStructId_i <- r .: "deptStructId_i"
    tsvHal_labStructId_i <- r .: "labStructId_i"
    tsvHal_rteamStructId_i <- r .: "rteamStructId_i"
    tsvHal_docType_s <- r .: "docType_s"
    pure $ TsvHal { .. }

instance ToNamedRecord TsvHal where
  --toNamedRecord (TsvHal t s py  pm pd abst aut  url isbn iss j lang  doi auth inst dept lab team doct) =
  toNamedRecord (TsvHal { .. }) =
    namedRecord [ "title"  .= tsvHal_title
                , "source" .= tsvHal_source

                , "publication_year"  .= tsvHal_publication_year
                , "publication_month" .= tsvHal_publication_month
                , "publication_day"   .= tsvHal_publication_day

                , "abstract"          .= tsvHal_abstract
                , "authors"           .= tsvHal_authors

                , "url"                .= tsvHal_url
                , "isbn_s"             .= tsvHal_isbn_s
                , "issue_s"            .= tsvHal_issue_s
                , "journalPublisher_s" .= tsvHal_journalPublisher_s
                , "language_s"         .= tsvHal_language_s

                , "doiId_s"            .= tsvHal_doiId_s
                , "authId_i"           .= tsvHal_authId_i
                , "instStructId_i"     .= tsvHal_instStructId_i
                , "deptStructId_i"     .= tsvHal_deptStructId_i
                , "labStructId_i"      .= tsvHal_labStructId_i

                , "rteamStructId_i"    .= tsvHal_rteamStructId_i
                , "docType_s"          .= tsvHal_docType_s
               ]

tsv2doc :: TsvHal -> HyperdataDocument
tsv2doc (TsvHal { .. }) =
  HyperdataDocument { _hd_bdd = Just "TsvHal"
                    , _hd_doi = Just tsvHal_doiId_s
                    , _hd_url = Just tsvHal_url
                    , _hd_page = Nothing
                    , _hd_title = Just tsvHal_title
                    , _hd_authors = Just tsvHal_authors
                    , _hd_institutes = Just tsvHal_instStructId_i
                    , _hd_source = Just tsvHal_source
                    , _hd_abstract = Just tsvHal_abstract
                    , _hd_publication_date = Just $ pack . show $ jour tsvHal_publication_year
                                                                      tsvHal_publication_month
                                                                      tsvHal_publication_day
                    , _hd_publication_year = Just $ fromIntegral tsvHal_publication_year
                    , _hd_publication_month = Just tsvHal_publication_month
                    , _hd_publication_day = Just tsvHal_publication_day
                    , _hd_publication_hour = Nothing
                    , _hd_publication_minute = Nothing
                    , _hd_publication_second = Nothing
                    , _hd_language_iso2 = Nothing
                    , _hd_institutes_tree = Nothing }

------------------------------------------------------------------------
-- | TODO use readFileLazy
readTsvHal :: FilePath -> IO (Either Text (Header, Vector TsvHal))
readTsvHal fp = do
  c <- BL.readFile fp
  pure $ readTsvLazyBS Tab c

------------------------------------------------------------------------
parseHal :: FilePath -> IO (Either Text [HyperdataDocument])
parseHal fp = do
  r <- readTsvHal fp
  pure $ V.toList . V.map tsv2doc . snd <$> r

