{-|
Module      : Test.API.UpdateList
Description :
Copyright   : (c) CNRS, 2017
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX
-}

{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}

{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ViewPatterns #-}

module Test.API.UpdateList (
    tests
  -- * Useful helpers
  , JobPollHandle(..)
  , updateNode
  , createDocsList
  , createFortranDocsList
  , mkNewWithForm
  ) where

import Control.Monad.Fail (fail)
import Data.Aeson qualified as JSON
import Data.Aeson.QQ
import Data.ByteString.Lazy qualified as BL
import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PM
import Data.Patch.Class
import Data.Text qualified as T
import Data.Text.IO qualified as TIO
import Fmt
import Gargantext.API.Admin.Auth.Types (Token)
import Gargantext.API.Errors
import Gargantext.API.HashedResponse
import Gargantext.API.Ngrams qualified as APINgrams
import Gargantext.API.Ngrams.List ( ngramsListFromTSVData )
import Gargantext.API.Ngrams.List.Types (WithJsonFile(..), WithTextFile(..))
import Gargantext.API.Ngrams.Types as NT
import Gargantext.API.Node.Corpus.New.Types qualified as FType
import Gargantext.API.Node.Types
import Gargantext.API.Routes.Named
import Gargantext.API.Routes.Named.Corpus (addWithTempFileEp)
import Gargantext.API.Routes.Named.Node
import Gargantext.API.Routes.Named.Private
import Gargantext.API.Worker (workerAPIPost)
import Gargantext.Core qualified as Lang
import Gargantext.Core.Config
import Gargantext.Core.Text.Corpus.Query (RawQuery(..))
import Gargantext.Core.Text.List.Social
import Gargantext.Core.Text.Ngrams
import Gargantext.Core.Types ( CorpusId, ListId, NodeId, _NodeId, TableResult(..))
import Gargantext.Core.Types.Individu
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Worker.Types (JobInfo)
import Gargantext.Database.Query.Facet qualified as Facet
import Gargantext.Prelude hiding (get)
import Network.Wai.Handler.Warp qualified as Wai
import Paths_gargantext (getDataFileName)
import Prelude qualified
import Servant.Client.Streaming
import Servant.API qualified as Servant
import System.FilePath
import Test.API.Prelude (checkEither, newCorpusForUser, newPrivateFolderForUser, alice)
import Test.API.Routes (mkUrl, gqlUrl, get_table_ngrams, put_table_ngrams, toServantToken, clientRoutes, get_table, update_node, add_form_to_list, add_tsv_to_list, get_list_json)
import Test.API.Setup (withTestDBAndPort, dbEnvSetup, SpecContext (..))
import Test.Database.Types
import Test.Hspec
import Test.Hspec.Wai.Internal (withApplication, WaiSession)
import Test.Hspec.Wai.JSON (json)
import Test.Types (JobPollHandle(..))
import Test.Utils (pollUntilWorkFinished, protectedJSON, withValidLogin, isJobFinished)
import Text.Printf (printf)
import qualified Data.Text.Encoding as TE
import qualified Data.ByteString as BIO
import Control.Lens (view)
import Gargantext.API.Admin.Orchestrator.Types


uploadJSONList :: LogConfig
               -> Wai.Port
               -> Token
               -> CorpusId
               -> FilePath
               -> ClientEnv
               -> WaiSession () ListId
uploadJSONList log_cfg port token cId pathToNgrams clientEnv = do
  simpleNgrams' <- liftIO (BIO.readFile =<< getDataFileName pathToNgrams)
  uploadJSONListBS log_cfg port token cId simpleNgrams' clientEnv

uploadJSONListBS :: LogConfig
                 -> Wai.Port
                 -> Token
                 -> CorpusId
                 -> ByteString
                 -> ClientEnv
                 -> WaiSession () ListId
uploadJSONListBS log_cfg port token cId blob clientEnv = do
  ([listId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> build cId)) [aesonQQ|{"pn_typename":"NodeList","pn_name":"Testing"}|]
  -- Upload the JSON doc
  let params = WithJsonFile { _wjf_data = TE.decodeUtf8 blob
                            , _wjf_name = "simple_ngrams.json" }
  ji <- checkEither $ liftIO $ runClientM (add_form_to_list token listId params) clientEnv
  ji' <- pollUntilWorkFinished log_cfg port ji
  liftIO $ ji' `shouldSatisfy` isJobFinished
  pure listId

-- | Compares the ngrams returned via the input IO action with the ones provided as
-- the 'ByteString'. Use this function with the 'json' quasi quoter to pass directly
-- a nicely-formatted JSON.
checkNgrams :: IO (Either ClientError (VersionedWithCount NgramsTable))
            -> BL.ByteString
            -> WaiSession () ()
checkNgrams rq expected = liftIO $ do
  eng <- rq
  case eng of
    Left err -> fail (show err)
    Right r -> Just r `shouldBe` JSON.decode expected


tests :: Spec
tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
  describe "UpdateList API" $ do

    describe "POST /api/v1.0/lists/:id/add/form/async (JSON)" $ do

      it "allows uploading a JSON ngrams file" $ \(SpecContext testEnv port app _) -> do
        cId <- newCorpusForUser testEnv "alice"
        let log_cfg = (test_config testEnv) ^. gc_logging
        withApplication app $ do
          withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do
            listId <- uploadJSONList log_cfg port token cId "test-data/ngrams/simple.json" clientEnv

            -- Now check that we can retrieve the ngrams
            liftIO $ do
              eRes <- runClientM (get_table_ngrams token cId APINgrams.Terms listId 50 Nothing (Just MapTerm) Nothing Nothing Nothing Nothing) clientEnv
              eRes `shouldSatisfy` isRight
              let (Right res) = eRes
              Just res `shouldBe` JSON.decode [json| { "version": 0,
                                                       "count": 1,
                                                       "data": [
                                                           {
                                                               "ngrams": "abelian group",
                                                               "size": 2,
                                                               "list": "MapTerm",
                                                               "occurrences": [],
                                                               "children": []
                                                           }
                                                       ]
                                                   } |]

      it "does allow creating hierarchical grouping at least for level-2" $ \(SpecContext testEnv port app _) -> do
        cId <- newCorpusForUser testEnv "alice"
        withApplication app $ do
          withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do
            ([listId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> build cId)) [aesonQQ|{"pn_typename":"NodeList","pn_name":"Testing"}|]
            let newMapTerm = NgramsRepoElement {
                             _nre_size = 1
                           , _nre_list        = MapTerm
                           , _nre_root        = Nothing
                           , _nre_parent      = Nothing
                           , _nre_children    = mempty
                           }
            let add_guitar_pedals =
                  PM.fromList [
                    ( "guitar pedals"
                    , NgramsReplace { _patch_old = Nothing
                                    , _patch_new = Just newMapTerm })
                    ]
            _ <- liftIO $ runClientM (put_table_ngrams token cId APINgrams.Terms listId (Versioned 1 $ NgramsTablePatch $ fst add_guitar_pedals)) clientEnv
            let add_tube_screamers =
                  PM.fromList [
                    ( "tube screamers"
                    , NgramsReplace { _patch_old = Nothing
                                    , _patch_new = Just newMapTerm })
                    ]
            _ <- liftIO $ runClientM (put_table_ngrams token cId APINgrams.Terms listId (Versioned 1 $ NgramsTablePatch $ fst add_tube_screamers)) clientEnv
            let group_nodes =
                  PM.fromList [
                    ( "guitar pedals"
                    , NgramsPatch { _patch_children = NT.PatchMSet (fst $ PM.fromList [("tube screamers", addPatch)])
                                  , _patch_list = Keep })
                    ]
            _ <- liftIO $ runClientM (put_table_ngrams token cId APINgrams.Terms listId (Versioned 1 $ NgramsTablePatch $ fst group_nodes)) clientEnv
            -- Creates the grouping:
            {- overdrives
               |
               \ guitar pedals
                 |
                 \ tube screamers
            -}
            let add_overdrives =
                  PM.fromList [
                    ( "overdrives"
                    , NgramsReplace { _patch_old = Nothing
                                    , _patch_new = Just newMapTerm })
                    ]
            _ <- liftIO $ runClientM (put_table_ngrams token cId APINgrams.Terms listId (Versioned 1 $ NgramsTablePatch $ fst add_overdrives)) clientEnv
            let group_nodes_2 =
                  PM.fromList [
                    ( "overdrives"
                    , NgramsPatch { _patch_children = NT.PatchMSet (fst $ PM.fromList [("guitar pedals", addPatch)])
                                  , _patch_list = Keep })
                    ]
            _ <- liftIO $ runClientM (put_table_ngrams token cId APINgrams.Terms listId (Versioned 1 $ NgramsTablePatch $ fst group_nodes_2)) clientEnv
            liftIO $ do
              eRes <- runClientM (get_table_ngrams token cId APINgrams.Terms listId 50 Nothing (Just MapTerm) Nothing Nothing Nothing Nothing) clientEnv
              eRes `shouldSatisfy` isRight
              let (Right res) = eRes
              -- /NOTA BENE/ The count is 1 because the count applies to roots only.
              Just res `shouldBe` JSON.decode [json| {"version":5
                                                     ,"count":1
                                                     ,"data":[
                                                       {"ngrams":"overdrives"
                                                       ,"size":1
                                                       ,"list":"MapTerm"
                                                       ,"occurrences":[]
                                                       ,"children":["guitar pedals"]
                                                       },
                                                       {"ngrams":"guitar pedals"
                                                       ,"size":1
                                                       ,"list":"MapTerm"
                                                       ,"root":"overdrives"
                                                       ,"parent":"overdrives"
                                                       ,"occurrences":[]
                                                       ,"children":["tube screamers"]
                                                       },
                                                       {"ngrams":"tube screamers"
                                                       ,"size":1
                                                       ,"list":"MapTerm"
                                                       ,"root":"overdrives"
                                                       ,"parent":"guitar pedals"
                                                       ,"occurrences":[]
                                                       ,"children":[]}
                                                       ]
                                                     } |]

      it "does not create duplicates when uploading JSON (#313)" $ \(SpecContext testEnv port app _) -> do
        cId <- newCorpusForUser testEnv "alice"
        let log_cfg = (test_config testEnv) ^. gc_logging
        withApplication app $ do
          withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do
            -- The test data has a single term called "abelian group". In this test
            -- we will try grouping together "abelian group" and "new abelian group".
            listId <- uploadJSONList log_cfg port token cId "test-data/ngrams/simple.json" clientEnv

            -- The #313 error is about importedTerm being duplicated
            -- in a specific case
            let getNgrams = runClientM (get_table_ngrams token cId APINgrams.Terms listId 10 Nothing (Just MapTerm) Nothing Nothing Nothing Nothing) clientEnv

            checkNgrams getNgrams [json| {"version":0
                                         ,"count":1
                                         ,"data":[
                                           {"ngrams":"abelian group"
                                           ,"size":2
                                           ,"list":"MapTerm"
                                           ,"root":null
                                           ,"parent":null
                                           ,"occurrences":[]
                                           ,"children":[]
                                           }
                                           ]
                                         }
                                  |]
            let nre = NgramsRepoElement 1 MapTerm Nothing Nothing (MSet mempty)
            let patch = PM.fromList [
                           ( "new abelian group"
                           , NgramsReplace { _patch_old = Nothing
                                           , _patch_new = Just nre } )
                      ]
            _ <- liftIO $ runClientM (put_table_ngrams token cId APINgrams.Terms listId (Versioned 1 $ NgramsTablePatch $ fst patch)) clientEnv

            -- check that new term is added (with no parent)
            checkNgrams getNgrams [json| { "version": 1
                                         ,"count":2
                                         ,"data":[
                                           {"ngrams":"abelian group"
                                           ,"size":2
                                           ,"list":"MapTerm"
                                           ,"root":null
                                           ,"parent":null
                                           ,"occurrences":[]
                                           ,"children":[]
                                           },
                                           {"ngrams":"new abelian group"
                                           ,"size":1
                                           ,"list":"MapTerm"
                                           ,"root":null
                                           ,"parent":null
                                           ,"occurrences":[]
                                           ,"children":[]
                                           }
                                           ]
                                         }
                                  |]

            -- now patch it so that we have a group
            let patchChildren = PM.fromList [
                                 ( "new abelian group"
                                 , toNgramsPatch ["abelian group"] )
                               ]
            _ <- liftIO $ runClientM (put_table_ngrams token cId APINgrams.Terms listId (Versioned 32 $ NgramsTablePatch $ fst patchChildren)) clientEnv

            -- check that new term is parent of old one
            checkNgrams getNgrams [json| {"version": 2
                                         ,"count":1
                                         ,"data":[
                                           {"ngrams":"new abelian group"
                                           ,"size":1
                                           ,"list":"MapTerm"
                                           ,"root":null
                                           ,"parent":null
                                           ,"occurrences":[]
                                           ,"children":["abelian group"]
                                           },
                                           {"ngrams":"abelian group"
                                           ,"size":2
                                           ,"list":"MapTerm"
                                           ,"root": "new abelian group"
                                           ,"parent": "new abelian group"
                                           ,"occurrences":[]
                                           ,"children":[]
                                           }
                                           ]
                                         }
                                  |]

            -- finally, upload the list again, the group should be as
            -- it was before (the bug in #313 was that "abelian group"
            -- was created again as a term with no parent)
            _ <- uploadJSONList log_cfg port token cId "test-data/ngrams/simple.json" clientEnv

            -- old (imported) term shouldn't become parentless
            -- (#313 error was that we had ["new abelian group", "abelian group"] instead)
            -- In essence, this JSON needs to be exactly the same as the previous one,
            -- i.e. important doesn't change the topology.
            checkNgrams getNgrams [json| {"version": 2
                                         ,"count":1
                                         ,"data":[
                                           {"ngrams":"new abelian group"
                                           ,"size":1
                                           ,"list":"MapTerm"
                                           ,"root":null
                                           ,"parent":null
                                           ,"occurrences":[]
                                           ,"children":["abelian group"]
                                           },
                                           {"ngrams":"abelian group"
                                           ,"size":2
                                           ,"list":"MapTerm"
                                           ,"root": "new abelian group"
                                           ,"parent": "new abelian group"
                                           ,"occurrences":[]
                                           ,"children":[]
                                           }
                                           ]
                                         }
                                  |]

    describe "POST /api/v1.0/lists/:id/csv/add/form/async (CSV)" $ do

      it "parses CSV via ngramsListFromCSVData" $ \(SpecContext _testEnv _port _app _) -> do
        simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName "test-data/ngrams/simple.tsv")
        ngramsListFromTSVData simpleNgrams `shouldBe`
          Right (Map.fromList [ (NgramsTerms, Versioned 0 $ Map.fromList [
                                  (NgramsTerm "abelian group", NgramsRepoElement 1 MapTerm Nothing Nothing (MSet mempty))
                                , (NgramsTerm "brazorf", NgramsRepoElement 1 StopTerm Nothing Nothing (MSet mempty))
                              ])])

      it "parses TSV with UTF-8 issues" $ \(SpecContext _testEnv _port _app _) -> do
        simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName "test-data/ngrams/433-utf-encoding-issue.tsv")
        -- we don't care about the output, only that the file was parsed without errors (this file is garbage)
        ngramsListFromTSVData simpleNgrams `shouldSatisfy` isRight

      it "allows uploading a CSV ngrams file" $ \(SpecContext testEnv port app _) -> do
        cId <- newCorpusForUser testEnv "alice"
        let log_cfg = (test_config testEnv) ^. gc_logging
        withApplication app $ do
          withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do
            ([listId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> build cId)) [aesonQQ|{"pn_typename":"NodeList","pn_name":"Testing"}|]
            -- Upload the CSV doc
            simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName "test-data/ngrams/simple.tsv")
            let params = WithTextFile { _wtf_filetype = FType.TSV
                                      , _wtf_data = simpleNgrams
                                      , _wtf_name = "simple.tsv" }
            ji <- checkEither $ liftIO $ runClientM (add_tsv_to_list token listId params) clientEnv
            _ <- pollUntilWorkFinished log_cfg port ji

            -- Now check that we can retrieve the ngrams
            liftIO $ do
              eRes <- runClientM (get_table_ngrams token cId APINgrams.Terms listId 50 Nothing (Just MapTerm) Nothing Nothing Nothing Nothing) clientEnv
              eRes `shouldSatisfy` isRight
              let (Right res) = eRes
              Just res `shouldBe` JSON.decode [json| {"version":0
                                                     ,"count":1
                                                     ,"data":[
                                                       {"ngrams":"abelian group"
                                                       ,"size":1
                                                       ,"list":"MapTerm"
                                                       ,"occurrences":[],"children":[]}
                                                       ]
                                                     } |]
            -- let getStopUrl = "/node/" +| listId |+ "/ngrams?ngramsType=Terms&listType=StopTerm&list="+| listId |+"&limit=50"
              eResStop <- runClientM (get_table_ngrams token cId APINgrams.Terms listId 50 Nothing (Just StopTerm) Nothing Nothing Nothing Nothing) clientEnv
              eResStop `shouldSatisfy` isRight
              let (Right resStop) = eResStop
              Just resStop `shouldBe` JSON.decode [json| {"version":0
                                                         ,"count":1
                                                         ,"data":[
                                                           {"ngrams":"brazorf"
                                                           ,"size":1
                                                           ,"list":"StopTerm"
                                                           ,"occurrences":[],"children":[]}
                                                           ]
                                                         } |]

    describe "POST /api/v1.0/corpus/:id/add/form/async (JSON)" $ do

      it "allows uploading a JSON docs file" $ \(SpecContext testEnv port app _) -> do
        withApplication app $ do
          withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do
            void $ createFortranDocsList testEnv port clientEnv token

      it "doesn't use trashed documents for score calculation (#385)" $ \(SpecContext testEnv port app _) -> do
        let log_cfg = (test_config testEnv) ^. gc_logging
        withApplication app $ do
          withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do
            corpusId <- createFortranDocsList testEnv port clientEnv token
            tr1 <- liftIO $ do
              (HashedResponse _ tr1)
                <- checkEither $ runClientM (get_table token
                                                       corpusId
                                                       (Just APINgrams.Docs)
                                                       (Just 10)
                                                       (Just 0)
                                                       (Just Facet.DateDesc)
                                                       (Just $ RawQuery "fortran")
                                                       Nothing
                                            ) clientEnv
              length (tr_docs tr1) `shouldBe` 2
              pure tr1


            termsNodeId <- uploadJSONList log_cfg port token corpusId "test-data/ngrams/GarganText_NgramsTerms-nodeId-177.json" clientEnv

            liftIO $ do
              -- Now let's check the score for the \"fortran\" ngram.
              (VersionedWithCount _ _ (NgramsTable [fortran_ngram]))
                <- checkEither $ runClientM (get_table_ngrams token
                                                              corpusId
                                                              APINgrams.Terms
                                                              termsNodeId
                                                              10
                                                              (Just 0)
                                                              (Just MapTerm)
                                                              Nothing
                                                              Nothing
                                                              Nothing
                                                              Nothing
                                            ) clientEnv
              length (_ne_occurrences fortran_ngram) `shouldBe` 2

            -- At this point, we need to trash one of the two documents which contains
            -- the \"fortran\" occurrence, and this should be reflected in the Ngrams.
            trash_document token (Facet.facetDoc_id $ Prelude.head (tr_docs tr1)) corpusId

            -- Check that the document of returned documents has decreased
            liftIO $ do
              (HashedResponse _ tr2)
                <- checkEither $ runClientM (get_table token
                                                       corpusId
                                                       (Just APINgrams.Docs)
                                                       (Just 10)
                                                       (Just 0)
                                                       (Just Facet.DateDesc)
                                                       (Just $ RawQuery "fortran")
                                                       Nothing
                                            ) clientEnv
              length (tr_docs tr2) `shouldBe` 1

            liftIO $ do
              -- Now let's check the score for the \"fortran\" ngram. It must be decreased
              -- by 1, because one of the matching documents have been trashed.
              (VersionedWithCount _ _ (NgramsTable [fortran_ngram']))
                <- checkEither $ runClientM (get_table_ngrams token
                                                              corpusId
                                                              APINgrams.Terms
                                                              termsNodeId
                                                              10
                                                              (Just 0)
                                                              (Just MapTerm)
                                                              Nothing
                                                              Nothing
                                                              Nothing
                                                              Nothing
                                            ) clientEnv
              length (_ne_occurrences fortran_ngram') `shouldBe` 1

    describe "Importing and exporting nested terms" $ do
      -- As per #498, we want to test that even in the present of deep
      -- nested hierarchy of ngrams, we can import and export them and we should
      -- end up with the ngrams hierarchy we started from. In other terms, a
      -- roundtrip property should be satisfied.
      it "should roundtrip for JSON" $ \(SpecContext testEnv port app _) -> do
        cId <- newCorpusForUser testEnv "alice"
        cId2 <- newCorpusForUser testEnv "alice"
        let log_cfg = (test_config testEnv) ^. gc_logging
        withApplication app $ do
          withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do
            -- Import the initial terms
            let (Right initialTerms) = JSON.eitherDecode @NgramsList $ [json|
                  {
                    "Authors": {
                      "version": 11,
                      "data": {}
                    },
                    "Institutes": {
                      "version": 11,
                      "data": {}
                    },
                    "Sources": {
                      "version": 11,
                      "data": {}
                    },
                    "NgramsTerms": {
                      "version": 11,
                      "data": {
                        "boss ds-1": {
                          "root": "guitar effects",
                          "parent": "distortions",
                          "size": 1,
                          "list": "MapTerm",
                          "children": []
                        },
                        "distortions": {
                          "root": "guitar effects",
                          "parent": "guitar effects",
                          "size": 1,
                          "list": "MapTerm",
                          "children": [
                            "boss ds-1",
                            "rat"
                          ]
                        },
                        "guitar effects": {
                          "size": 1,
                          "list": "MapTerm",
                          "children": [
                            "distortions",
                            "overdrives"
                          ]
                        },
                        "guitar pedals": {
                          "root": "guitar effects",
                          "parent": "overdrives",
                          "size": 1,
                          "list": "MapTerm",
                          "children": [
                            "tube screamers"
                          ]
                        },
                        "overdrives": {
                          "root": "guitar effects",
                          "parent": "guitar effects",
                          "size": 1,
                          "list": "MapTerm",
                          "children": [
                            "guitar pedals"
                          ]
                        },
                        "rat": {
                          "root": "guitar effects",
                          "parent": "distortions",
                          "size": 1,
                          "list": "MapTerm",
                          "children": []
                        },
                        "tube screamers": {
                          "root": "guitar effects",
                          "parent": "guitar pedals",
                          "size": 1,
                          "list": "MapTerm",
                          "children": []
                        }
                      }
                    }
                  }
                |]
            listId <- uploadJSONListBS log_cfg port token cId (BL.toStrict $ JSON.encode initialTerms) clientEnv
            -- Export them.
            exported <- Servant.getResponse <$> (checkEither $ liftIO $ runClientM (get_list_json token listId) clientEnv)

            let initialNgrams  = view v_data <$> Map.lookup NgramsTerms initialTerms
            let exportedNgrams = view v_data <$> Map.lookup NgramsTerms exported
            liftIO $ exportedNgrams `shouldBe` initialNgrams

            -- now we import them again, but this time on a different corpus, so that we don't
            -- get conflicts and the occurrences count won't get messed up. Dealing with conflicts
            -- is a separate type of test.
            listId2 <- uploadJSONListBS log_cfg port token cId2 (BL.toStrict $ JSON.encode exported) clientEnv
            -- Export them again.
            exported2 <- Servant.getResponse <$> (checkEither $ liftIO $ runClientM (get_list_json token listId2) clientEnv)
            let exportedNgrams2 = view v_data <$> Map.lookup NgramsTerms exported2
            liftIO $ exportedNgrams `shouldBe` exportedNgrams2

      -- We test that if we try to import terms which, when merged with the existing,
      -- would cause a loop, GGTX is capable of rejecting the request.
      it "refuses to import terms which will lead to a loop" $ \(SpecContext testEnv port app _) -> do
        cId <- newCorpusForUser testEnv "alice"
        let log_cfg = (test_config testEnv) ^. gc_logging
        withApplication app $ do
          withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do
            -- Import the initial terms
            let (Right initialTerms) = JSON.eitherDecode @NgramsList $ [json|
                  {
                    "Authors": {
                      "version": 1,
                      "data": {}
                    },
                    "Institutes": {
                      "version": 1,
                      "data": {}
                    },
                    "Sources": {
                      "version": 1,
                      "data": {}
                    },
                    "NgramsTerms": {
                      "version": 1,
                      "data": {
                        "foo": {
                          "size": 1,
                          "list": "MapTerm",
                          "children": ["bar"]
                        },
                        "bar": {
                          "root": "foo",
                          "parent": "foo",
                          "size": 1,
                          "list": "MapTerm",
                          "children": [
                            "quux"
                          ]
                        },
                        "quux": {
                          "size": 1,
                          "list": "MapTerm",
                          "children": []
                        } } } }
                        |]
            listId <- uploadJSONListBS log_cfg port token cId (BL.toStrict $ JSON.encode initialTerms) clientEnv
            let (Right secondBatch) = JSON.eitherDecode @NgramsList $ [json|
                  {
                    "Authors": {
                      "version": 1,
                      "data": {}
                    },
                    "Institutes": {
                      "version": 1,
                      "data": {}
                    },
                    "Sources": {
                      "version": 1,
                      "data": {}
                    },
                    "NgramsTerms": {
                      "version": 1,
                      "data": {
                        "bar": {
                          "size": 1,
                          "list": "MapTerm",
                          "children": ["foo"]
                        }
                        } } }
                        |]
            let params = WithJsonFile { _wjf_data = TE.decodeUtf8 (BL.toStrict $ JSON.encode secondBatch)
                                      , _wjf_name = "simple_ngrams.json"
                                      }
            ji <- checkEither $ liftIO $ runClientM (add_form_to_list token listId params) clientEnv
            ji' <- pollUntilWorkFinished log_cfg port ji
            -- Unfortunately we don't have a better way then to match on the stringified exception, sigh.
            case _scst_events ji' of
              Just [ScraperEvent{..}]
                | Just msg <- _scev_message
                -> liftIO $ msg `shouldSatisfy` \txt -> "Loop detected in terms: foo -> bar -> foo" `T.isInfixOf` txt
                | otherwise
                -> fail "No suitable message in ScraperEvent."
              _ -> fail "Expected job to fail, but it didn't"

createDocsList :: FilePath
               -> TestEnv
               -> Int
               -> ClientEnv
               -> Token
               -> WaiSession () CorpusId
createDocsList testDataPath testEnv port clientEnv token = do
  folderId <- liftIO $ newPrivateFolderForUser testEnv alice
  ([corpusId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> build folderId)) [aesonQQ|{"pn_typename":"NodeCorpus","pn_name":"TestCorpus"}|]
  -- Import the docsList with only two documents, both containing a \"fortran\" term.
  simpleDocs <- liftIO (TIO.readFile =<< getDataFileName testDataPath)
  let newWithForm = mkNewWithForm simpleDocs (T.pack $ takeBaseName testDataPath)
  ji <- checkEither $ liftIO $ runClientM (add_file_async token corpusId newWithForm) clientEnv
  ji' <- pollUntilWorkFinished log_cfg port ji
  liftIO $ ji' `shouldSatisfy` isJobFinished
  pure corpusId
  where
    log_cfg = (test_config testEnv) ^. gc_logging

createFortranDocsList :: TestEnv -> Int -> ClientEnv -> Token -> WaiSession () CorpusId
createFortranDocsList testEnv port =
  createDocsList "test-data/ngrams/GarganText_DocsList-nodeId-177.json" testEnv port

updateNode :: LogConfig
           -> Int
           -> ClientEnv
           -> Token
           -> NodeId
           -> WaiSession () ()
updateNode log_cfg port clientEnv token nodeId = do
  let params = UpdateNodeParamsTexts Both
  ji <- checkEither $ liftIO $ runClientM (update_node token nodeId params) clientEnv
  ji' <- pollUntilWorkFinished log_cfg port ji
  liftIO $ ji' `shouldSatisfy` isJobFinished

mkNewWithForm :: T.Text -> T.Text -> NewWithForm
mkNewWithForm content name = NewWithForm
  { _wf_filetype   = FType.JSON
  , _wf_fileformat = FType.Plain
  , _wf_data       = content
  , _wf_lang       = Just Lang.EN
  , _wf_name       = name
  , _wf_selection  = FlowSocialListWithPriority MySelfFirst
  }

add_file_async :: Token
               -> CorpusId
               -> NewWithForm
               -> ClientM JobInfo
add_file_async (toServantToken -> token) corpusId nwf =
  clientRoutes & apiWithCustomErrorScheme
               & ($ GES_new)
               & backendAPI
               & backendAPI'
               & mkBackEndAPI
               & gargAPIVersion
               & gargPrivateAPI
               & mkPrivateAPI
               & ($ token)
               & addWithTempFile
               & addWithTempFileEp
               & ($ corpusId)
               & workerAPIPost
               & (\submitForm -> submitForm nwf)


-- | Utility to trash a document by performing a raw query towards GQL. Not very type safe,
-- but it will get the job done for now.
trash_document :: Token
               -> NodeId
               -- ^ The context id to delete, i.e. the document ID.
               -> CorpusId
               -- ^ The parent corpus ID this document is attached to.
               -> WaiSession () ()
trash_document token docId cpsId =
  void $ protectedJSON @JSON.Value token "POST" gqlUrl [aesonQQ|
{
  "query": #{operation},
  "operationName": "update_node_context_category",
  "variables": {}
}|]
  where
    operation :: Prelude.String
    operation = printf "mutation update_node_context_category { update_node_context_category(context_id: %d, node_id: %d, category: 0) }" contextId corpusId

    contextId :: Int
    contextId = _NodeId docId

    corpusId :: Int
    corpusId = _NodeId cpsId
