{-|
Module      : Test.API.UpdateList
Description :
Copyright   : (c) CNRS, 2017
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX
-}

{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}

{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ViewPatterns #-}

module Test.API.UpdateList (
    tests
  -- * Useful helpers
  , JobPollHandle(..)
  , updateNode
  , createDocsList
  , createFortranDocsList
  , mkNewWithForm
  ) where

import Control.Lens (mapped, over)
import Control.Monad.Fail (fail)
import Data.Aeson qualified as JSON
import Data.Aeson.QQ
import Data.Map.Strict.Patch qualified as PM
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text.IO qualified as TIO
import Data.Text qualified as T
import Fmt
import Gargantext.API.Admin.Auth.Types (Token)
import Gargantext.API.Errors
import Gargantext.API.HashedResponse
import Gargantext.API.Ngrams.List ( ngramsListFromTSVData )
import Gargantext.API.Ngrams.List.Types (WithJsonFile(..), WithTextFile(..))
import Gargantext.API.Ngrams qualified as APINgrams
import Gargantext.API.Ngrams.Types
import Gargantext.API.Node.Corpus.New.Types qualified as FType
import Gargantext.API.Node.Types
import Gargantext.API.Routes.Named
import Gargantext.API.Routes.Named.Corpus (addWithTempFileEp)
import Gargantext.API.Routes.Named.Node
import Gargantext.API.Routes.Named.Private
import Gargantext.API.Worker (workerAPIPost)
import Gargantext.Core.Config
import Gargantext.Core qualified as Lang
import Gargantext.Core.Text.Corpus.Query (RawQuery(..))
import Gargantext.Core.Text.List.Social
import Gargantext.Core.Text.Ngrams
import Gargantext.Core.Types ( CorpusId, ListId, NodeId, _NodeId, TableResult(..))
import Gargantext.Core.Types.Individu
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Worker.Types (JobInfo)
import Gargantext.Database.Query.Facet qualified as Facet
import Gargantext.Prelude hiding (get)
import Network.Wai.Handler.Warp qualified as Wai
import Paths_gargantext (getDataFileName)
import qualified Prelude
import Servant.Client.Streaming
import System.FilePath
import Test.API.Prelude (checkEither, newCorpusForUser, newPrivateFolderForUser, alice)
import Test.API.Routes (mkUrl, gqlUrl, get_table_ngrams, put_table_ngrams, toServantToken, clientRoutes, get_table, update_node, add_form_to_list, add_tsv_to_list)
import Test.API.Setup (withTestDBAndPort, dbEnvSetup, SpecContext (..))
import Test.Database.Types
import Test.Hspec
import Test.Hspec.Wai.Internal (withApplication, WaiSession)
import Test.Hspec.Wai.JSON (json)
import Test.Types (JobPollHandle(..))
import Test.Utils (pollUntilWorkFinished, protectedJSON, withValidLogin, isJobFinished)
import Text.Printf (printf)


uploadJSONList :: LogConfig
               -> Wai.Port
               -> Token
               -> CorpusId
               -> FilePath
               -> ClientEnv
               -> WaiSession () ListId
uploadJSONList log_cfg port token cId pathToNgrams clientEnv = do
  ([listId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> build cId)) [aesonQQ|{"pn_typename":"NodeList","pn_name":"Testing"}|]
  -- Upload the JSON doc
  simpleNgrams' <- liftIO (TIO.readFile =<< getDataFileName pathToNgrams)
  -- let (Just simpleNgrams) = JSON.decode $ BSL.fromStrict $ encodeUtf8 simpleNgrams'
  -- let jsonFileFormData = [ (T.pack "_wjf_data", simpleNgrams)
  --                        , ("_wjf_filetype", "JSON")
  --                        , ("_wjf_name", "simple_ngrams.json")
  --                        ]
  let params = WithJsonFile { _wjf_data = simpleNgrams'
                            , _wjf_name = "simple_ngrams.json" }
  -- let url         = "/lists/"  +|listId|+ "/add/form/async"
  -- let mkPollUrl j = "/corpus/" +|listId|+ "/add/form/async/" +|_jph_id j|+ "/poll?limit=1"
  -- (j :: JobPollHandle) <- postJSONUrlEncoded token (mkUrl port url) (urlEncodeFormStable $ toForm jsonFileFormData)
  -- j' <- pollUntilFinished token port mkPollUrl j
  ji <- checkEither $ liftIO $ runClientM (add_form_to_list token listId params) clientEnv
  -- liftIO (_jph_status j' `shouldBe` "IsFinished")
  ji' <- pollUntilWorkFinished log_cfg port ji
  liftIO $ ji' `shouldSatisfy` isJobFinished
  
  pure listId



tests :: Spec
tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
  describe "UpdateList API" $ do

    describe "POST /api/v1.0/lists/:id/add/form/async (JSON)" $ do

      it "allows uploading a JSON ngrams file" $ \(SpecContext testEnv port app _) -> do
        cId <- newCorpusForUser testEnv "alice"
        let log_cfg = (test_config testEnv) ^. gc_logging
        withApplication app $ do
          withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do
            listId <- uploadJSONList log_cfg port token cId "test-data/ngrams/simple.json" clientEnv

            -- Now check that we can retrieve the ngrams
            liftIO $ do
              eRes <- runClientM (get_table_ngrams token cId APINgrams.Terms listId 50 Nothing (Just MapTerm) Nothing Nothing Nothing Nothing) clientEnv
              eRes `shouldSatisfy` isRight
              let (Right res) = eRes
              Just res `shouldBe` JSON.decode [json| { "version": 0,
                                                       "count": 1,
                                                       "data": [
                                                           {
                                                               "ngrams": "abelian group",
                                                               "size": 2,
                                                               "list": "MapTerm",
                                                               "occurrences": [],
                                                               "children": []
                                                           }
                                                       ]
                                                   } |]

      it "does not create duplicates when uploading JSON (#313)" $ \(SpecContext testEnv port app _) -> do
        cId <- newCorpusForUser testEnv "alice"
        let log_cfg = (test_config testEnv) ^. gc_logging
        withApplication app $ do
          withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do
            -- this term is imported from the .json file
            let importedTerm = NgramsTerm "abelian group"
            -- this is the new term, under which importedTerm will be grouped
            let newTerm = NgramsTerm "new abelian group"

            listId <- uploadJSONList log_cfg port token cId "test-data/ngrams/simple.json" clientEnv

            let checkNgrams expected = do
                  eng <- liftIO $ runClientM (get_table_ngrams token cId APINgrams.Terms listId 10 Nothing (Just MapTerm) Nothing Nothing Nothing Nothing) clientEnv
                  case eng of
                    Left err -> fail (show err)
                    Right r ->
                      let real = over mapped (\nt -> ( nt ^. ne_ngrams
                                                     , mSetToList $ nt ^. ne_children ))
                                             (r ^. vc_data . _NgramsTable) in
                      liftIO $ Set.fromList real `shouldBe` Set.fromList expected

            -- The #313 error is about importedTerm being duplicated
            -- in a specific case

            checkNgrams [ (importedTerm, []) ]
            let nre = NgramsRepoElement 1 MapTerm Nothing Nothing (MSet mempty)
            let patch = PM.fromList [
                           ( newTerm
                           , NgramsReplace { _patch_old = Nothing
                                           , _patch_new = Just nre } )
                      ]
            _ <- liftIO $ runClientM (put_table_ngrams token cId APINgrams.Terms listId (Versioned 1 $ NgramsTablePatch $ fst patch)) clientEnv

            -- check that new term is added (with no parent)
            checkNgrams [ (newTerm, [])
                        , (importedTerm, []) ]

            -- now patch it so that we have a group
            let patchChildren = PM.fromList [
                                 ( newTerm
                                 , toNgramsPatch [importedTerm] )
                               ]
            _ <- liftIO $ runClientM (put_table_ngrams token cId APINgrams.Terms listId (Versioned 32 $ NgramsTablePatch $ fst patchChildren)) clientEnv

            -- check that new term is parent of old one
            checkNgrams [ (newTerm, [importedTerm]) ]

            -- finally, upload the list again, the group should be as
            -- it was before (the bug in #313 was that "abelian group"
            -- was created again as a term with no parent)
            _ <- uploadJSONList log_cfg port token cId "test-data/ngrams/simple.json" clientEnv

            -- old (imported) term shouldn't become parentless
            -- (#313 error was that we had [newTerm, importedTerm] instead)

            -- NOTE: Unfortunately, I'm not able to reproduce this
            -- error here, though I tried. Something is missing, maybe
            -- some nodestory integration with tests?
            checkNgrams [ (newTerm, [importedTerm]) ]

            pure ()

    describe "POST /api/v1.0/lists/:id/csv/add/form/async (CSV)" $ do

      it "parses CSV via ngramsListFromCSVData" $ \(SpecContext _testEnv _port _app _) -> do
        simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName "test-data/ngrams/simple.tsv")
        ngramsListFromTSVData simpleNgrams `shouldBe`
          Right (Map.fromList [ (NgramsTerms, Versioned 0 $ Map.fromList [
                                  (NgramsTerm "abelian group", NgramsRepoElement 1 MapTerm Nothing Nothing (MSet mempty))
                                , (NgramsTerm "brazorf", NgramsRepoElement 1 StopTerm Nothing Nothing (MSet mempty))
                              ])])

      it "parses TSV with UTF-8 issues" $ \(SpecContext _testEnv _port _app _) -> do
        simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName "test-data/ngrams/433-utf-encoding-issue.tsv")
        -- we don't care about the output, only that the file was parsed without errors (this file is garbage)
        ngramsListFromTSVData simpleNgrams `shouldSatisfy` isRight

      it "allows uploading a CSV ngrams file" $ \(SpecContext testEnv port app _) -> do
        cId <- newCorpusForUser testEnv "alice"
        let log_cfg = (test_config testEnv) ^. gc_logging
        withApplication app $ do
          withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do
            ([listId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> build cId)) [aesonQQ|{"pn_typename":"NodeList","pn_name":"Testing"}|]
            -- Upload the CSV doc
            simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName "test-data/ngrams/simple.tsv")
            let params = WithTextFile { _wtf_filetype = FType.TSV
                                      , _wtf_data = simpleNgrams
                                      , _wtf_name = "simple.tsv" }
            ji <- checkEither $ liftIO $ runClientM (add_tsv_to_list token listId params) clientEnv
            _ <- pollUntilWorkFinished log_cfg port ji

            -- Now check that we can retrieve the ngrams
            liftIO $ do
              eRes <- runClientM (get_table_ngrams token cId APINgrams.Terms listId 50 Nothing (Just MapTerm) Nothing Nothing Nothing Nothing) clientEnv
              eRes `shouldSatisfy` isRight
              let (Right res) = eRes
              Just res `shouldBe` JSON.decode [json| {"version":0
                                                     ,"count":1
                                                     ,"data":[
                                                       {"ngrams":"abelian group"
                                                       ,"size":1
                                                       ,"list":"MapTerm"
                                                       ,"occurrences":[],"children":[]}
                                                       ]
                                                     } |]
            -- let getStopUrl = "/node/" +| listId |+ "/ngrams?ngramsType=Terms&listType=StopTerm&list="+| listId |+"&limit=50"
              eResStop <- runClientM (get_table_ngrams token cId APINgrams.Terms listId 50 Nothing (Just StopTerm) Nothing Nothing Nothing Nothing) clientEnv
              eResStop `shouldSatisfy` isRight
              let (Right resStop) = eResStop
              Just resStop `shouldBe` JSON.decode [json| {"version":0
                                                         ,"count":1
                                                         ,"data":[
                                                           {"ngrams":"brazorf"
                                                           ,"size":1
                                                           ,"list":"StopTerm"
                                                           ,"occurrences":[],"children":[]}
                                                           ]
                                                         } |]

    describe "POST /api/v1.0/corpus/:id/add/form/async (JSON)" $ do

      it "allows uploading a JSON docs file" $ \(SpecContext testEnv port app _) -> do
        withApplication app $ do
          withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do
            void $ createFortranDocsList testEnv port clientEnv token

      it "doesn't use trashed documents for score calculation (#385)" $ \(SpecContext testEnv port app _) -> do
        let log_cfg = (test_config testEnv) ^. gc_logging
        withApplication app $ do
          withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do
            corpusId <- createFortranDocsList testEnv port clientEnv token
            tr1 <- liftIO $ do
              (HashedResponse _ tr1)
                <- checkEither $ runClientM (get_table token
                                                       corpusId
                                                       (Just APINgrams.Docs)
                                                       (Just 10)
                                                       (Just 0)
                                                       (Just Facet.DateDesc)
                                                       (Just $ RawQuery "fortran")
                                                       Nothing
                                            ) clientEnv
              length (tr_docs tr1) `shouldBe` 2
              pure tr1


            termsNodeId <- uploadJSONList log_cfg port token corpusId "test-data/ngrams/GarganText_NgramsTerms-nodeId-177.json" clientEnv

            liftIO $ do
              -- Now let's check the score for the \"fortran\" ngram.
              (VersionedWithCount _ _ (NgramsTable [fortran_ngram]))
                <- checkEither $ runClientM (get_table_ngrams token
                                                              corpusId
                                                              APINgrams.Terms
                                                              termsNodeId
                                                              10
                                                              (Just 0)
                                                              (Just MapTerm)
                                                              Nothing
                                                              Nothing
                                                              Nothing
                                                              Nothing
                                            ) clientEnv
              length (_ne_occurrences fortran_ngram) `shouldBe` 2

            -- At this point, we need to trash one of the two documents which contains
            -- the \"fortran\" occurrence, and this should be reflected in the Ngrams.
            trash_document token (Facet.facetDoc_id $ Prelude.head (tr_docs tr1)) corpusId

            -- Check that the document of returned documents has decreased
            liftIO $ do
              (HashedResponse _ tr2)
                <- checkEither $ runClientM (get_table token
                                                       corpusId
                                                       (Just APINgrams.Docs)
                                                       (Just 10)
                                                       (Just 0)
                                                       (Just Facet.DateDesc)
                                                       (Just $ RawQuery "fortran")
                                                       Nothing
                                            ) clientEnv
              length (tr_docs tr2) `shouldBe` 1

            liftIO $ do
              -- Now let's check the score for the \"fortran\" ngram. It must be decreased
              -- by 1, because one of the matching documents have been trashed.
              (VersionedWithCount _ _ (NgramsTable [fortran_ngram']))
                <- checkEither $ runClientM (get_table_ngrams token
                                                              corpusId
                                                              APINgrams.Terms
                                                              termsNodeId
                                                              10
                                                              (Just 0)
                                                              (Just MapTerm)
                                                              Nothing
                                                              Nothing
                                                              Nothing
                                                              Nothing
                                            ) clientEnv
              length (_ne_occurrences fortran_ngram') `shouldBe` 1

createDocsList :: FilePath
               -> TestEnv
               -> Int
               -> ClientEnv
               -> Token
               -> WaiSession () CorpusId
createDocsList testDataPath testEnv port clientEnv token = do
  folderId <- liftIO $ newPrivateFolderForUser testEnv alice
  ([corpusId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> build folderId)) [aesonQQ|{"pn_typename":"NodeCorpus","pn_name":"TestCorpus"}|]
  -- Import the docsList with only two documents, both containing a \"fortran\" term.
  simpleDocs <- liftIO (TIO.readFile =<< getDataFileName testDataPath)
  let newWithForm = mkNewWithForm simpleDocs (T.pack $ takeBaseName testDataPath)
  ji <- checkEither $ liftIO $ runClientM (add_file_async token corpusId newWithForm) clientEnv
  ji' <- pollUntilWorkFinished log_cfg port ji
  liftIO $ ji' `shouldSatisfy` isJobFinished
  pure corpusId
  where
    log_cfg = (test_config testEnv) ^. gc_logging

createFortranDocsList :: TestEnv -> Int -> ClientEnv -> Token -> WaiSession () CorpusId
createFortranDocsList testEnv port =
  createDocsList "test-data/ngrams/GarganText_DocsList-nodeId-177.json" testEnv port

updateNode :: LogConfig
           -> Int
           -> ClientEnv
           -> Token
           -> NodeId
           -> WaiSession () ()
updateNode log_cfg port clientEnv token nodeId = do
  let params = UpdateNodeParamsTexts Both
  ji <- checkEither $ liftIO $ runClientM (update_node token nodeId params) clientEnv
  ji' <- pollUntilWorkFinished log_cfg port ji
  liftIO $ ji' `shouldSatisfy` isJobFinished

mkNewWithForm :: T.Text -> T.Text -> NewWithForm
mkNewWithForm content name = NewWithForm
  { _wf_filetype   = FType.JSON
  , _wf_fileformat = FType.Plain
  , _wf_data       = content
  , _wf_lang       = Just Lang.EN
  , _wf_name       = name
  , _wf_selection  = FlowSocialListWithPriority MySelfFirst
  }

add_file_async :: Token
               -> CorpusId
               -> NewWithForm
               -> ClientM JobInfo
add_file_async (toServantToken -> token) corpusId nwf =
  clientRoutes & apiWithCustomErrorScheme
               & ($ GES_new)
               & backendAPI
               & backendAPI'
               & mkBackEndAPI
               & gargAPIVersion
               & gargPrivateAPI
               & mkPrivateAPI
               & ($ token)
               & addWithTempFile
               & addWithTempFileEp
               & ($ corpusId)
               & workerAPIPost
               & (\submitForm -> submitForm nwf)


-- | Utility to trash a document by performing a raw query towards GQL. Not very type safe,
-- but it will get the job done for now.
trash_document :: Token
               -> NodeId
               -- ^ The context id to delete, i.e. the document ID.
               -> CorpusId
               -- ^ The parent corpus ID this document is attached to.
               -> WaiSession () ()
trash_document token docId cpsId =
  void $ protectedJSON @JSON.Value token "POST" gqlUrl [aesonQQ|
{
  "query": #{operation},
  "operationName": "update_node_context_category",
  "variables": {}
}|]
  where
    operation :: Prelude.String
    operation = printf "mutation update_node_context_category { update_node_context_category(context_id: %d, node_id: %d, category: 0) }" contextId corpusId

    contextId :: Int
    contextId = _NodeId docId

    corpusId :: Int
    corpusId = _NodeId cpsId