{-|
Module      : Gargantext.Core.Text.Corpus.API.Hal
Description : Pubmed API connection
Copyright   : (c) CNRS, 2017
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}

{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE ViewPatterns       #-}

module Gargantext.Core.Text.Corpus.API.Hal
    ( getC
    , get

    -- * Internal functions for testing
    , getHalQuery
    , HalQuery(..)
    , convertQuery
    )
    where

import Conduit
import Data.Either
import Data.LanguageCodes qualified as ISO639
import Data.Map.Strict qualified as Map
import Data.Maybe
import Data.Monoid
import Data.Semigroup
import Data.Text (pack, intercalate)
import Data.Text qualified as Text
import Data.Text.Encoding qualified as TE
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (intercalate, get)
import HAL qualified as HAL
import HAL.Client qualified as HAL
import HAL.Doc.Corpus qualified as HAL
import Network.HTTP.Types.URI (EscapeItem(..), renderQueryPartialEscape)
import Servant.Client (ClientError)

newtype HalQuery = HalQuery { _HalQuery :: [EscapeItem] }
  deriving stock (Show, Eq)
  deriving newtype (Semigroup, Monoid)

-- | Returns an /url encoded/ query ready to be sent to Hal.
getHalQuery :: HalQuery -> HAL.Query
getHalQuery (HalQuery items) =
  Text.replace "q=" "" . TE.decodeUtf8 . renderQueryPartialEscape False $ [
    ("q", items)
    ]

convertQuery :: Corpus.Query -> HalQuery
convertQuery q = HalQuery (interpretQuery q transformAST)
  where

    lParen :: [EscapeItem]
    lParen = [QN "("]

    rParen :: [EscapeItem]
    rParen = [QN ")"]

    transformAST :: BoolExpr Term -> [EscapeItem]
    transformAST ast = case ast of
      BAnd sub1 sub2
        -> lParen <> transformAST sub1 <> [QN "+AND+"] <> transformAST sub2 <> rParen
      BOr sub1 sub2
        -> lParen <> transformAST sub1 <> [QN "+OR+"] <> transformAST sub2 <> rParen
      BNot (BConst (Negative term))
        -> transformAST (BConst (Positive term)) -- double negation
      BNot sub
        -> [QN "NOT+"] <> lParen <> transformAST sub <> rParen
      -- BTrue cannot happen is the query parser doesn't support parsing 'TRUE' alone.
      BTrue
        -> mempty
      -- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
      BFalse
        -> mempty
      -- Maps the positive terms to contain 'abstract:'
      BConst (Positive (Term term))
        -> [QE (TE.encodeUtf8 term)]
      BConst (Negative sub)
        -> [QN "NOT+"] <> transformAST (BConst (Positive sub))

get :: Maybe ISO639.ISO639_1
    -> Corpus.Query
    -> Maybe Corpus.Limit
    -> IO [HyperdataDocument]
get la (convertQuery -> q) ml = do
  eDocs <- HAL.getMetadataWith [getHalQuery q] (Just 0) (fromIntegral . getLimit <$> ml) la
  either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) eDocs

halOptions :: HAL.HalCrawlerOptions
halOptions = HAL.HalCrawlerOptions False 1000

getC :: Maybe ISO639.ISO639_1
     -> Corpus.Query
     -> Maybe Corpus.Limit
     -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
getC la (convertQuery -> q) ml = do
  eRes <- HAL.getMetadataWithLangC halOptions [getHalQuery q] (Just 0) (fromIntegral . getLimit <$> ml) la
  pure $ (\(len, docsC) -> (len, docsC .| mapMC (toDoc' la))) <$> eRes

toDoc' :: Maybe ISO639.ISO639_1 -> HAL.Corpus -> IO HyperdataDocument
toDoc' la (HAL.Corpus { .. }) = do
  -- printDebug "[toDoc corpus] h" h
  let mDateS = maybe (Just $ pack $ show Defaults.year) Just _corpus_date
  let (utctime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
  let abstractDefault = intercalate " " _corpus_abstract
  let abstract = case la of
        Nothing -> abstractDefault
        Just l  -> fromMaybe abstractDefault (intercalate " " <$> Map.lookup l _corpus_abstract_lang_map)
  pure HyperdataDocument { _hd_bdd = Just "Hal"
                         , _hd_doi = Just $ pack $ show _corpus_docid
                         , _hd_url = Nothing
                         , _hd_uniqId = Nothing
                         , _hd_uniqIdBdd = Nothing
                         , _hd_page = Nothing
                         , _hd_title = Just $ intercalate " " _corpus_title
                         , _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" _corpus_authors_names
                         , _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" $ _corpus_authors_affiliations <> map show _corpus_struct_id
                         , _hd_source = Just $ maybe "Nothing" identity _corpus_source
                         , _hd_abstract = Just abstract
                         , _hd_publication_date = fmap show utctime
                         , _hd_publication_year = pub_year
                         , _hd_publication_month = pub_month
                         , _hd_publication_day = pub_day
                         , _hd_publication_hour = Nothing
                         , _hd_publication_minute = Nothing
                         , _hd_publication_second = Nothing
                         , _hd_language_iso2 = Just $ show la }
