{-# LANGUAGE TemplateHaskell #-}

module Gargantext.API.Node.Corpus.Searx where



import Control.Lens (view)
import qualified Data.Aeson as Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Either (Either(..))
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as T
import Data.Time.Calendar (Day, toGregorian)
import Data.Time.Format (defaultTimeLocale, formatTime, parseTimeM)
import Data.Tuple.Select (sel1, sel2, sel3)
import GHC.Generics (Generic)
import Network.HTTP.Client
import Network.HTTP.Client.TLS

import qualified Prelude as Prelude
import Protolude (catMaybes, encodeUtf8, rightToMaybe, Text)
import Gargantext.Prelude
import Gargantext.Prelude.Config

import Gargantext.API.Admin.Orchestrator.Types (JobLog(..))
--import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Job (jobLogSuccess)
import Gargantext.Core (Lang(..), PosTagAlgo(..))
import qualified Gargantext.Core.Text.Corpus.API as API
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..))
import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow (insertMasterDocs) --, DataText(..))
import Gargantext.Database.Action.Flow.List (flowList_DbRepo)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node (CorpusId, ListId)
import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Database.Query.Table.Node (defaultListMaybe, getOrMkList)
import Gargantext.Database.Query.Tree.Root (getOrMk_RootWithCorpus)
import qualified Gargantext.Database.Query.Table.Node.Document.Add  as Doc  (add)

langToSearx :: Lang -> Text
langToSearx EN = "en-US"
langToSearx FR = "fr-FR"
langToSearx All = "en-US"

data SearxResult = SearxResult
  { _sr_url           :: Text
  , _sr_title         :: Text
  , _sr_content       :: Maybe Text
  , _sr_engine        :: Text
  , _sr_score         :: Double
  , _sr_category      :: Text
  , _sr_pretty_url    :: Text
  , _sr_publishedDate :: Text   -- "Nov 19, 2021"
  , _sr_pubdate       :: Text  -- "2021-11-19 02:12:00+0000"
  }
  deriving (Show, Eq, Generic)
--  , _sr_parsed_url
--  , _sr_engines
--  , _sr_positions

$(deriveJSON (unPrefix "_sr_") ''SearxResult)

data SearxResponse = SearxResponse
  { _srs_query                :: Text
  , _srs_number_of_results    :: Int
  , _srs_results              :: [SearxResult] }
  deriving (Show, Eq, Generic)
-- , _srs_answers
-- , _srs_corrections
-- , _srs_infoboxes
--  , _srs_suggestions          :: [Text]
--  , _srs_unresponsive_engines :: [Text] }

$(deriveJSON (unPrefix "_srs_") ''SearxResponse)

data FetchSearxParams = FetchSearxParams
  { _fsp_language :: Lang
  , _fsp_manager :: Manager
  , _fsp_pageno  :: Int
  , _fsp_query   :: Text
  , _fsp_url     :: Text
  }

fetchSearxPage :: FetchSearxParams -> IO (Either Prelude.String SearxResponse)
fetchSearxPage (FetchSearxParams { _fsp_language
                                 , _fsp_manager
                                 , _fsp_pageno
                                 , _fsp_query
                                 , _fsp_url }) = do
  -- searx search API:
  -- https://searx.github.io/searx/dev/search_api.html?highlight=json
  req <- parseRequest $ T.unpack _fsp_url
  let request = urlEncodedBody
        [ --("category_general", "1")
          ("q", encodeUtf8 _fsp_query)
        , ("categories", "news")  -- https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/70#note_3976
        , ("pageno", encodeUtf8 $ T.pack $ show _fsp_pageno)
          --, ("time_range", "None")
        , ("language", encodeUtf8 $ langToSearx _fsp_language)
        , ("format", "json")
        ] req
  res <- httpLbs request _fsp_manager
  let dec = Aeson.eitherDecode $ responseBody res :: (Either Prelude.String SearxResponse)
  pure dec

insertSearxResponse :: (MonadBase IO m, FlowCmdM env err m)
                    => User
                    -> CorpusId
                    -> ListId
                    -> Lang
                    -> Either Prelude.String SearxResponse
                    -> m ()
insertSearxResponse _ _ _ _ (Left _) = pure ()
insertSearxResponse user cId listId l (Right (SearxResponse { _srs_results })) = do
  let docs = hyperdataDocumentFromSearxResult <$> _srs_results
  --printDebug "[triggerSearxSearch] docs" docs
  -- docs :: [Either Text HyperdataDocument]
  let docs' = catMaybes $ rightToMaybe <$> docs
  Prelude.mapM_ (\(HyperdataDocument { _hd_title, _hd_publication_year, _hd_publication_date }) -> do
    printDebug "[triggerSearxSearch] doc time" $
      "[title] " <> (show _hd_title) <>
      " :: [publication_year] " <> (show _hd_publication_year) <>
      " :: [publication_date] " <> (show _hd_publication_date)
    ) docs'
  --_ <- flowDataText user (DataNew [docs']) (Multi EN) cId Nothing logStatus
  let mCorpus = Nothing :: Maybe HyperdataCorpus
  ids <- insertMasterDocs mCorpus (Multi EN) docs'
  _ <- Doc.add cId ids
  (_masterUserId, _masterRootId, masterCorpusId)
    <- getOrMk_RootWithCorpus (UserName userMaster) (Left "") mCorpus
  let gp = GroupWithPosTag l CoreNLP HashMap.empty 
  ngs         <- buildNgramsLists user cId masterCorpusId Nothing gp
  _userListId <- flowList_DbRepo listId ngs

  pure ()

-- TODO Make an async task out of this?
triggerSearxSearch :: (MonadBase IO m, FlowCmdM env err m)
            => User
            -> CorpusId
            -> API.Query
            -> Lang
            -> (JobLog -> m ())
            -> m JobLog
triggerSearxSearch user cId q l logStatus = do
  let numPages = 10
  let jobLog = JobLog { _scst_succeeded = Just 0
                      , _scst_failed    = Just 0
                      , _scst_remaining = Just numPages
                      , _scst_events    = Just []
                      }
  logStatus jobLog

  printDebug "[triggerSearxSearch] cId" cId
  printDebug "[triggerSearxSearch] q" q
  printDebug "[triggerSearxSearch] l" l
  cfg <- view hasConfig
  uId <- getUserId user
  let surl = _gc_frame_searx_url cfg
  printDebug "[triggerSearxSearch] surl" surl
  mListId <- defaultListMaybe cId
  listId <- case mListId of
    Nothing -> do
      --let failedJobLog = jobLogFailTotalWithMessage "[triggerSearxSearch] no list id" jobLog
      --logStatus failedJobLog
      --pure failedJobLog
      listId <- getOrMkList cId uId
      pure listId
    Just listId -> pure listId
    
  printDebug "[triggerSearxSearch] listId" listId

  manager <- liftBase $ newManager tlsManagerSettings
  _ <- mapM (\page -> do
                res <- liftBase $ fetchSearxPage $ FetchSearxParams { _fsp_language = l
                                                                    , _fsp_manager = manager
                                                                    , _fsp_pageno = page
                                                                    , _fsp_query = q
                                                                    , _fsp_url = surl }

                insertSearxResponse user cId listId l res

                logStatus $ JobLog { _scst_succeeded = Just page
                                   , _scst_failed    = Just 0
                                   , _scst_remaining = Just (numPages - page)
                                   , _scst_events    = Just [] }
            ) [1..numPages]
  --printDebug "[triggerSearxSearch] res" res

  pure $ jobLogSuccess jobLog

hyperdataDocumentFromSearxResult :: SearxResult -> Either T.Text HyperdataDocument
hyperdataDocumentFromSearxResult (SearxResult { _sr_content, _sr_engine, _sr_pubdate, _sr_title }) = do
  let mDate = parseTimeM False defaultTimeLocale "%Y-%m-%d %H:%M:%S+0000" (T.unpack _sr_pubdate) :: Maybe Day
  let mGregorian = toGregorian <$> mDate
  Right HyperdataDocument { _hd_bdd = Just "Searx"
                          , _hd_doi = Nothing
                          , _hd_url = Nothing
                          , _hd_uniqId = Nothing
                          , _hd_uniqIdBdd = Nothing
                          , _hd_page = Nothing
                          , _hd_title = Just _sr_title
                          , _hd_authors = Nothing
                          , _hd_institutes = Nothing
                          , _hd_source = Just _sr_engine
                          , _hd_abstract = _sr_content
                          , _hd_publication_date = T.pack <$> formatTime defaultTimeLocale "%Y-%m-%dT%H:%M:%S" <$> mDate
                          , _hd_publication_year = fromIntegral <$> sel1 <$> mGregorian
                          , _hd_publication_month = sel2 <$> mGregorian
                          , _hd_publication_day = sel3 <$> mGregorian
                          , _hd_publication_hour = Nothing
                          , _hd_publication_minute = Nothing
                          , _hd_publication_second = Nothing
                          , _hd_language_iso2 = Just $ T.pack $ show EN }