{-|
Module      : Gargantext.Core.Text.Corpus.API.Isidore
Description : To query French Humanities publication database from its API
Copyright   : (c) CNRS, 2019-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}

{-# LANGUAGE DerivingStrategies  #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns        #-}

module Gargantext.Core.Text.Corpus.API.Isidore (
    get
  , isidore2csvFile
    -- * Internals for testing
  , IsidoreQuery(..)
  , getIsidoreQuery
  , convertQuery
  ) where

import Data.Conduit
import Data.Conduit.Combinators (yieldMany)
import Data.Monoid
import Data.Semigroup
import Data.Text qualified as Text
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers (cleanText)
import Gargantext.Core.Text.Corpus.Parsers.CSV (writeDocs2Csv)
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (get)
import Isidore qualified as Isidore
import Isidore.Client
import Network.HTTP.Types.URI (EscapeItem(..), renderQueryPartialEscape)
import Servant.Client
import qualified Data.Text.Encoding as TE

newtype IsidoreQuery = IsidoreQuery { _IsidoreQuery :: [EscapeItem] }
  deriving stock (Show, Eq)
  deriving newtype (Semigroup, Monoid)

-- | Returns an /url encoded/ query ready to be sent to Isidore.
getIsidoreQuery :: IsidoreQuery -> Text
getIsidoreQuery (IsidoreQuery items) =
  Text.replace "q=" "" . TE.decodeUtf8 . renderQueryPartialEscape False $ [
    ("q", items)
    ]

convertQuery :: Corpus.Query -> IsidoreQuery
convertQuery q = IsidoreQuery (interpretQuery q transformAST)
  where
    -- It seems like Isidore supports a similar query language to Pubmed.
    transformAST :: BoolExpr Term -> [EscapeItem]
    transformAST ast = case ast of
      BAnd sub (BConst (Negative term))
        -- The second term become positive, so that it can be translated.
        -> (transformAST sub) <> [QN "+AND+NOT+"] <> transformAST (BConst (Positive term))
      BAnd term1 (BNot term2)
        -> transformAST term1 <> [QN "+AND+NOT+"] <> transformAST term2
      BAnd sub1 sub2
        -> transformAST sub1 <> [QN "+AND+"] <> transformAST sub2
      BOr sub1 sub2
        -> transformAST sub1 <> [QN "+OR+"] <> transformAST sub2
      BNot (BConst (Negative term))
        -> transformAST (BConst (Positive term)) -- double negation
      BNot sub
        -> [QN "NOT+"] <> transformAST sub
      -- BTrue cannot happen is the query parser doesn't support parsing 'TRUE' alone.
      BTrue
        -> mempty
      -- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
      BFalse
        -> mempty
      BConst (Positive (Term term))
        -> [QE (TE.encodeUtf8 term)]
      BConst (Negative (Term term))
        -> [QN "NOT+", QE (TE.encodeUtf8 term)]

get :: Lang
    -> Maybe Corpus.Limit
    -> Corpus.Query
    -> Maybe Isidore.AuthorQuery
    -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get la l (convertQuery -> q) a = do
  let

    toIsidoreDocs :: Reply -> [IsidoreDoc]
    toIsidoreDocs (ReplyOnly r) = [r]
    toIsidoreDocs (Replies  rs) = rs

  mb_iDocs <- Isidore.get (getLimit <$> l) (Just $ getIsidoreQuery q) a
  case mb_iDocs of
    Left err    -> pure $ Left err
    Right iDocs -> do
      hDocs <- mapM (\d -> isidoreToDoc la d) (toIsidoreDocs $ _content iDocs)
      pure $ Right (Just $ fromIntegral $ length hDocs, yieldMany hDocs)

isidore2csvFile :: FilePath
                -> Lang
                -> Maybe Corpus.Limit
                -> Corpus.Query
                -> Maybe Isidore.AuthorQuery
                -> IO ()
isidore2csvFile fp la li tq aq = do
  mb_hdocs <- get la li tq aq
  case mb_hdocs of
    Left err     -> throwIO err
    Right (_, hdocsC) -> do
      hdocs <- sourceToList hdocsC
      writeDocs2Csv fp hdocs

isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument
isidoreToDoc l (IsidoreDoc t a d u s as) = do
  let
    author :: Author -> Text
    author (Author fn ln) = (_name fn) <> ", " <> (_name ln)
    author (Authors aus) = Text.intercalate ". " $ map author aus

    creator2text :: Creator -> Text
    creator2text (Creator au)   = author au
    creator2text (Creators aus') = Text.intercalate ". " $ map author aus'

    langText :: LangText -> Text
    langText (LangText _l t1) = t1
    langText (OnlyText t2   ) = t2
    langText (ArrayText ts  ) = Text.intercalate " " $ map langText ts

  let mDateS = maybe (Just $ Text.pack $ show Defaults.year) (Just) d
  let (utcTime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS

  pure HyperdataDocument
         { _hd_bdd = Just "Isidore"
         , _hd_doi = Nothing
         , _hd_url = u
         , _hd_uniqId = Nothing
         , _hd_uniqIdBdd = Nothing
         , _hd_page = Nothing
         , _hd_title = Just $ cleanText $ langText t
         , _hd_authors = creator2text <$> as
         , _hd_institutes = Nothing
         , _hd_source = Just $ maybe "Nothing" identity $ _sourceName <$> s
         , _hd_abstract = cleanText <$> langText    <$> a
         , _hd_publication_date = fmap (Text.pack . show) utcTime
         , _hd_publication_year = pub_year
         , _hd_publication_month = pub_month
         , _hd_publication_day = pub_day
         , _hd_publication_hour = Nothing
         , _hd_publication_minute = Nothing
         , _hd_publication_second = Nothing
         , _hd_language_iso2 = Just $ (Text.pack . show) l
         }
