{-|
Module      : Gargantext.Core.Text.Corpus.API.Istex
Description : Pubmed API connection
Copyright   : (c) CNRS, 2017
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}

{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE ViewPatterns       #-}


module Gargantext.Core.Text.Corpus.API.Istex
    ( get

      -- * Internal API for testing
    , getIstexQuery
    , IstexQuery(..)
    , convertQuery
    )
    where

import Data.ByteString.Char8 qualified as C8
import Data.Conduit
import Data.Conduit.Combinators (yieldMany)
import Data.Maybe
import Data.Monoid
import Data.Semigroup
import Data.Text qualified as Text
import Data.Text.Encoding qualified as TE
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.JSON.Istex (toDoc)
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude hiding (get)
import ISTEX qualified as ISTEX
import ISTEX.Client qualified as ISTEX
import Network.HTTP.Types.URI (EscapeItem(..), renderQueryPartialEscape)
import Servant.Client

languageToQuery :: Lang -> C8.ByteString
languageToQuery la =
  "language:" <> case la of
    FR -> "fre"
    _  -> "eng" -- FIXME -- we should support all the languages.

newtype IstexQuery = IstexQuery { _IstexQuery :: [EscapeItem] }
  deriving stock (Show, Eq)
  deriving newtype (Semigroup, Monoid)

-- | Returns an /url encoded/ query ready to be sent to Istex.
getIstexQuery :: Lang -> IstexQuery -> Text
getIstexQuery lang (IstexQuery items) =
  Text.replace "q=" "" . TE.decodeUtf8 . renderQueryPartialEscape False $ [
    ("q", langItems <> items)
    ]
  where
    langItems :: [EscapeItem]
    langItems = [QN (languageToQuery lang), QN "+AND+"]

convertQuery :: Corpus.Query -> IstexQuery
convertQuery q = IstexQuery (interpretQuery q transformAST)
  where
    transformAST :: BoolExpr Term -> [EscapeItem]
    transformAST ast = case ast of
      BAnd sub (BConst (Negative term))
        -- The second term become positive, so that it can be translated.
        -> (transformAST sub) <> [QN "+AND+NOT+"] <> transformAST (BConst (Positive term))
      BAnd term1 (BNot term2)
        -> transformAST term1 <> [QN "+AND+NOT+"] <> transformAST term2
      BAnd sub1 sub2
        -> transformAST sub1 <> [QN "+AND+"] <> transformAST sub2
      BOr sub1 sub2
        -> transformAST sub1 <> [QN "+OR+"] <> transformAST sub2
      BNot (BConst (Negative term))
        -> transformAST (BConst (Positive term)) -- double negation
      BNot sub
        -> [QN "NOT+"] <> transformAST sub
      -- BTrue cannot happen is the query parser doesn't support parsing 'TRUE' alone.
      BTrue
        -> mempty
      -- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
      BFalse
        -> mempty
      -- Maps the positive terms to contain 'abstract:'
      BConst (Positive (Term term))
        -> [QN "abstract:", QE (TE.encodeUtf8 term)]
      BConst (Negative sub)
        -> [QN "NOT+"] <> transformAST (BConst (Positive sub))

get :: Lang
    -> Corpus.Query
    -> Maybe Corpus.Limit
    -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get la (convertQuery -> query) maxResults = do
  --printDebug "[Istex.get] calling getMetadataScrollProgress for la" la
  --printDebug "[Istex.get] calling getMetadataScrollProgress for q" q
  --printDebug "[Istex.get] calling getMetadataScrollProgress for ml" ml
  -- The "scroll" expects "d/h/m/s/ms" time interval. Let's set it to "1 month"
  --eDocs <- ISTEX.getMetadataScroll q ((\_n -> pack $ "1m") <$> ml) Nothing 0  --(fromIntegral <$> ml)

  -- TODO check if abstract is in query already if not add like below
  -- eDocs <- ISTEX.getMetadataScroll (q <> " abstract:*")  "1m" Nothing 0  --(fromIntegral <$> ml)
  -- eDocs <- ISTEX.getMetadataScroll q "1m" Nothing 0  --(fromIntegral <$> ml)
  eDocs <- ISTEX.getMetadataWith (getIstexQuery la query) (getLimit <$> maxResults)
  -- printDebug "[Istex.get] will print length" (0 :: Int)
  case eDocs of
    Left err -> pure $ Left err
    Right docs@(ISTEX.Documents { _documents_hits }) -> do
      printDebug "[Istex.get] length docs" $ length _documents_hits
  --ISTEX.getMetadataScrollProgress q ((\_ -> pack $ "1m") <$> ml) Nothing progress errorHandler
      docs' <- toDoc' la docs
      pure $ Right (Just $ fromIntegral $ length docs', yieldMany docs')
  --pure $ either (panic . pack . show) (toDoc' la) eDocs
--  where
--    progress (ISTEX.ScrollResponse { _scroll_documents = ISTEX.Documents { _documents_hits }}) =
--      printDebug "[Istex.get] got docs: " $ length _documents_hits
--    errorHandler err = printDebug "[Istex.get] error" $ show err

toDoc' :: Lang -> ISTEX.Documents -> IO [HyperdataDocument]
toDoc' la docs' =  mapM (toDoc la) (ISTEX._documents_hits docs')
  --printDebug "ISTEX" (ISTEX._documents_total docs')


