{-# LANGUAGE TemplateHaskell #-}
{-|
Module      : Gargantext.Core.Text.Corpus.API.Hal
Description : Pubmed API connection
Copyright   : (c) CNRS, 2017
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}

module Gargantext.Core.Text.Corpus.API.Hal
    where

import Conduit ( ConduitT, (.|), mapMC )
import Data.LanguageCodes qualified as ISO639
import Data.Map.Strict qualified as Map
import Data.Text (pack)
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Core.Utils (nonemptyIntercalate)
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (intercalate)
import HAL qualified
import HAL.Doc.Document qualified as HAL
import HAL.Types qualified as HAL
import Servant.Client (ClientError (..))
import Gargantext.Core.Worker.Jobs.Types (FetchDocumentsHALPayload(..), Job (..))
import Gargantext.Core.Types (CorpusId)

get :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO [HyperdataDocument]
get la q ml = do
  eDocs <- HAL.getMetadataWith [q] (Just 0) (fromIntegral <$> ml) la
  either (panicTrace . pack . show) (mapM (toDoc' la) . HAL._docs) eDocs

getC :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
getC la q ml = do
  eRes <- HAL.getMetadataWithCursorC q (fromIntegral <$> ml) la
  pure $ (\(len, docsC) -> (len, docsC .| mapMC (toDoc' la))) <$> eRes
--  case eRes of
--    Left err -> panic $ pack $ show err
--    Right (len, docsC) -> pure (len, docsC .| mapMC (toDoc' la))

toDoc' :: Maybe ISO639.ISO639_1 -> HAL.Document -> IO HyperdataDocument
toDoc' la (HAL.Document { .. }) = do
  -- printDebug "[toDoc corpus] h" h
  let mDateS = _document_date <|> Just (pack $ show Defaults.year)
  let (utctime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
  let abstractDefault = unwords _document_abstract
  let abstract = case la of
        Nothing -> abstractDefault
        Just l  -> maybe abstractDefault unwords (Map.lookup l _document_abstract_lang_map)
  pure HyperdataDocument { _hd_bdd = Just "Hal"
                         , _hd_doi = Just $ pack $ show _document_docid
                         , _hd_url = Nothing
                         , _hd_page = Nothing
                         , _hd_title = Just $ unwords _document_title
                         , _hd_authors = Just $ nonemptyIntercalate ", " _document_authors_names
                         , _hd_institutes = Just $ nonemptyIntercalate ", " $ zipWith (\affialition structId -> affialition <> " | " <> structId) _document_authors_affiliations $ map show _document_struct_id
                         , _hd_source = Just $ maybe "Nothing" identity _document_source
                         , _hd_abstract = Just abstract
                         , _hd_publication_date = fmap show utctime
                         , _hd_publication_year = pub_year
                         , _hd_publication_month = pub_month
                         , _hd_publication_day = pub_day
                         , _hd_publication_hour = Nothing
                         , _hd_publication_minute = Nothing
                         , _hd_publication_second = Nothing
                         , _hd_language_iso2 = Just $ show la
                         , _hd_institutes_tree = Just _document_institutes_tree }

-- A Simple ExceptT to make working with network requests a bit more pleasant.
type HALMonad a = ExceptT ClientError IO a

getDataProducer :: CorpusId
                -> Maybe ISO639.ISO639_1
                -> Text
                -> Maybe Int
                -> IO (Either ClientError (ResultsCount, DataProducer IO HyperdataDocument))
getDataProducer corpusId la q _mb_limit = runExceptT $ do
  -- First of all, make a trivial query to fetch the full number of documents. Then, split the
  -- total requests into suitable batches and turn them into Jobs.
  total <- ExceptT $ HAL.countResultsOpts' (HAL.defaultHalOptions { HAL._hco_batchSize = 1 }) q la
  putStrLn $ "Found " <> show total <> " documents matching the query."++""
  let (batches,finalBatchSize) = (fromInteger total) `divMod` halBatchSize
  pure (ResultsCount total, DataAsyncBatchProducer $ mkBatches (max 0 batches) finalBatchSize 0)
  where
    mkBatches 0 finalBatchSize offset =
      [FetchDocumentsHAL (FetchDocumentsHALPayload corpusId q la offset finalBatchSize)]
    mkBatches curBatch finalBatchSize offset =
      FetchDocumentsHAL (FetchDocumentsHALPayload corpusId q la offset halBatchSize)
      : mkBatches (curBatch - 1) finalBatchSize (offset + halBatchSize)

-- | The size of a single batch.
halBatchSize :: Int
halBatchSize = 100
