{-|
Module      : Gargantext.Core.Text.Corpus.API.Istex
Description : Pubmed API connection
Copyright   : (c) CNRS, 2017
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}

{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE ViewPatterns       #-}


module Gargantext.Core.Text.Corpus.API.Istex
    ( get

      -- * Internal API for testing
    , getIstexQuery
    , IstexQuery(..)
    , convertQuery
    )
    where

import Data.ByteString.Char8 qualified as C8
import Data.Conduit
import Data.Conduit.Combinators (yieldMany)
import Data.Maybe
import Data.Monoid
import Data.Semigroup
import Data.Text qualified as Text
import Data.Text.Encoding qualified as TE
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.JSON.Istex (toDoc)
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude hiding (get)
import ISTEX qualified as ISTEX
import ISTEX.Client qualified as ISTEX
import Network.HTTP.Types.URI (EscapeItem(..), renderQueryPartialEscape)
import Servant.Client

languageToQuery :: Lang -> C8.ByteString
languageToQuery la =
  "language:" <> case la of
    FR -> "fre"
    _  -> "eng" -- FIXME -- we should support all the languages.

newtype IstexQuery = IstexQuery { _IstexQuery :: [EscapeItem] }
  deriving stock (Show, Eq)
  deriving newtype (Semigroup, Monoid)

-- | Returns an /url encoded/ query ready to be sent to Istex.
getIstexQuery :: Lang -> IstexQuery -> Text
getIstexQuery lang (IstexQuery items) =
  Text.replace "q=" "" . TE.decodeUtf8 . renderQueryPartialEscape False $ [
    ("q", langItems <> items)
    ]
  where
    langItems :: [EscapeItem]
    langItems = [QN (languageToQuery lang), QN "+AND+"]

convertQuery :: Corpus.Query -> IstexQuery
convertQuery q = IstexQuery (interpretQuery q transformAST)
  where
    transformAST :: BoolExpr Term -> [EscapeItem]
    transformAST ast = case ast of
      BAnd sub1 sub2
        -> transformAST sub1 <> [QN "+AND+"] <> transformAST sub2
      BOr sub1 sub2
        -> transformAST sub1 <> [QN "+OR+"] <> transformAST sub2
      BNot (BConst (Negative term))
        -> transformAST (BConst (Positive term)) -- double negation
      BNot sub
        -> [QN "NOT+"] <> transformAST sub
      -- BTrue cannot happen is the query parser doesn't support parsing 'TRUE' alone.
      BTrue
        -> mempty
      -- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
      BFalse
        -> mempty
      -- Maps the positive terms to contain 'abstract:'
      BConst (Positive (Term term))
        -> [QN "abstract:", QE (TE.encodeUtf8 term)]
      BConst (Negative sub)
        -> [QN "NOT+"] <> transformAST (BConst (Positive sub))

get :: Lang
    -> Corpus.Query
    -> Maybe Corpus.Limit
    -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get la (convertQuery -> query) maxResults = do
  eDocs <- ISTEX.getMetadataWith (getIstexQuery la query) (getLimit <$> maxResults)
  case eDocs of
    Left err -> pure $ Left err
    Right docs@(ISTEX.Documents { _documents_hits }) -> do
      docs' <- toDoc' la docs
      pure $ Right (Just $ fromIntegral $ length docs', yieldMany docs')

toDoc' :: Lang -> ISTEX.Documents -> IO [HyperdataDocument]
toDoc' la docs' =  mapM (toDoc la) (ISTEX._documents_hits docs')
