{-| Module : Test.API.UpdateList Description : Copyright : (c) CNRS, 2017 License : AGPL + CECILL v3 Maintainer : team@gargantext.org Stability : experimental Portability : POSIX -} {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE ViewPatterns #-} module Test.API.UpdateList ( tests -- * Useful helpers , JobPollHandle(..) , updateNode , createDocsList , createFortranDocsList ) where import Control.Lens (mapped, over) import Control.Monad.Fail (fail) import Data.Aeson.QQ import Data.Aeson qualified as JSON import Data.ByteString.Lazy qualified as BSL import Data.Map.Strict.Patch qualified as PM import Data.Map.Strict qualified as Map import Data.Set qualified as Set import Data.Text.IO qualified as TIO import Data.Text qualified as T import Fmt import Gargantext.API.Admin.Auth.Types (Token) import Gargantext.API.Errors import Gargantext.API.HashedResponse import Gargantext.API.Ngrams.List ( ngramsListFromTSVData ) import Gargantext.API.Ngrams.List.Types (WithJsonFile(..), WithTextFile(..)) import Gargantext.API.Ngrams qualified as APINgrams import Gargantext.API.Ngrams.Types import Gargantext.API.Node.Corpus.New.Types qualified as FType import Gargantext.API.Node.Types import Gargantext.API.Routes.Named import Gargantext.API.Routes.Named.Corpus import Gargantext.API.Routes.Named.Node import Gargantext.API.Routes.Named.Private import Gargantext.API.Worker (workerAPIPost) import Gargantext.Core.Config import Gargantext.Core qualified as Lang import Gargantext.Core.Text.Corpus.Query (RawQuery(..)) import Gargantext.Core.Text.List.Social import Gargantext.Core.Text.Ngrams import Gargantext.Core.Types ( CorpusId, ListId, NodeId, _NodeId, TableResult(..)) import Gargantext.Core.Types.Individu import Gargantext.Core.Types.Main (ListType(..)) import Gargantext.Core.Worker.Types (JobInfo) import Gargantext.Database.Query.Facet qualified as Facet import Gargantext.Prelude hiding (get) import Network.Wai.Handler.Warp qualified as Wai import Paths_gargantext (getDataFileName) import qualified Prelude import Servant.Client.Streaming import System.FilePath import Test.API.Prelude (checkEither, newCorpusForUser, newPrivateFolderForUser, alice) import Test.API.Routes (mkUrl, gqlUrl, get_table_ngrams, put_table_ngrams, toServantToken, clientRoutes, get_table, update_node, add_form_to_list, add_tsv_to_list) import Test.API.Setup (withTestDBAndPort, dbEnvSetup, SpecContext (..)) import Test.Database.Types import Test.Hspec import Test.Hspec.Wai.Internal (withApplication, WaiSession) import Test.Hspec.Wai.JSON (json) import Test.Types (JobPollHandle(..)) import Test.Utils (pollUntilWorkFinished, protectedJSON, withValidLogin) import Text.Printf (printf) uploadJSONList :: LogConfig -> Wai.Port -> Token -> CorpusId -> FilePath -> ClientEnv -> WaiSession () ListId uploadJSONList log_cfg port token cId pathToNgrams clientEnv = do ([listId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> build cId)) [aesonQQ|{"pn_typename":"NodeList","pn_name":"Testing"}|] -- Upload the JSON doc simpleNgrams' <- liftIO (TIO.readFile =<< getDataFileName pathToNgrams) let (Just simpleNgrams) = JSON.decode $ BSL.fromStrict $ encodeUtf8 simpleNgrams' -- let jsonFileFormData = [ (T.pack "_wjf_data", simpleNgrams) -- , ("_wjf_filetype", "JSON") -- , ("_wjf_name", "simple_ngrams.json") -- ] let params = WithJsonFile { _wjf_data = simpleNgrams , _wjf_name = "simple_ngrams.json" } -- let url = "/lists/" +|listId|+ "/add/form/async" -- let mkPollUrl j = "/corpus/" +|listId|+ "/add/form/async/" +|_jph_id j|+ "/poll?limit=1" -- (j :: JobPollHandle) <- postJSONUrlEncoded token (mkUrl port url) (urlEncodeFormStable $ toForm jsonFileFormData) -- j' <- pollUntilFinished token port mkPollUrl j ji <- checkEither $ liftIO $ runClientM (add_form_to_list token listId params) clientEnv -- liftIO (_jph_status j' `shouldBe` "IsFinished") ji' <- pollUntilWorkFinished log_cfg port ji liftIO $ ji' `shouldBe` ji pure listId tests :: Spec tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do describe "UpdateList API" $ do describe "POST /api/v1.0/lists/:id/add/form/async (JSON)" $ do it "allows uploading a JSON ngrams file" $ \(SpecContext testEnv port app _) -> do cId <- newCorpusForUser testEnv "alice" let log_cfg = (test_config testEnv) ^. gc_logging withApplication app $ do withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do listId <- uploadJSONList log_cfg port token cId "test-data/ngrams/simple.json" clientEnv -- Now check that we can retrieve the ngrams liftIO $ do eRes <- runClientM (get_table_ngrams token cId APINgrams.Terms listId 50 Nothing (Just MapTerm) Nothing Nothing Nothing Nothing) clientEnv eRes `shouldSatisfy` isRight let (Right res) = eRes Just res `shouldBe` JSON.decode [json| { "version": 0, "count": 1, "data": [ { "ngrams": "abelian group", "size": 2, "list": "MapTerm", "occurrences": [], "children": [] } ] } |] it "does not create duplicates when uploading JSON (#313)" $ \(SpecContext testEnv port app _) -> do cId <- newCorpusForUser testEnv "alice" let log_cfg = (test_config testEnv) ^. gc_logging withApplication app $ do withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do -- this term is imported from the .json file let importedTerm = NgramsTerm "abelian group" -- this is the new term, under which importedTerm will be grouped let newTerm = NgramsTerm "new abelian group" listId <- uploadJSONList log_cfg port token cId "test-data/ngrams/simple.json" clientEnv let checkNgrams expected = do eng <- liftIO $ runClientM (get_table_ngrams token cId APINgrams.Terms listId 10 Nothing (Just MapTerm) Nothing Nothing Nothing Nothing) clientEnv case eng of Left err -> fail (show err) Right r -> let real = over mapped (\nt -> ( nt ^. ne_ngrams , mSetToList $ nt ^. ne_children )) (r ^. vc_data . _NgramsTable) in liftIO $ Set.fromList real `shouldBe` Set.fromList expected -- The #313 error is about importedTerm being duplicated -- in a specific case checkNgrams [ (importedTerm, []) ] let nre = NgramsRepoElement 1 MapTerm Nothing Nothing (MSet mempty) let patch = PM.fromList [ ( newTerm , NgramsReplace { _patch_old = Nothing , _patch_new = Just nre } ) ] _ <- liftIO $ runClientM (put_table_ngrams token cId APINgrams.Terms listId (Versioned 1 $ NgramsTablePatch $ fst patch)) clientEnv -- check that new term is added (with no parent) checkNgrams [ (newTerm, []) , (importedTerm, []) ] -- now patch it so that we have a group let patchChildren = PM.fromList [ ( newTerm , toNgramsPatch [importedTerm] ) ] _ <- liftIO $ runClientM (put_table_ngrams token cId APINgrams.Terms listId (Versioned 32 $ NgramsTablePatch $ fst patchChildren)) clientEnv -- check that new term is parent of old one checkNgrams [ (newTerm, [importedTerm]) ] -- finally, upload the list again, the group should be as -- it was before (the bug in #313 was that "abelian group" -- was created again as a term with no parent) _ <- uploadJSONList log_cfg port token cId "test-data/ngrams/simple.json" clientEnv -- old (imported) term shouldn't become parentless -- (#313 error was that we had [newTerm, importedTerm] instead) -- NOTE: Unfortunately, I'm not able to reproduce this -- error here, though I tried. Something is missing, maybe -- some nodestory integration with tests? checkNgrams [ (newTerm, [importedTerm]) ] pure () describe "POST /api/v1.0/lists/:id/csv/add/form/async (CSV)" $ do it "parses CSV via ngramsListFromCSVData" $ \(SpecContext _testEnv _port _app _) -> do simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName "test-data/ngrams/simple.tsv") ngramsListFromTSVData simpleNgrams `shouldBe` Right (Map.fromList [ (NgramsTerms, Versioned 0 $ Map.fromList [ (NgramsTerm "abelian group", NgramsRepoElement 1 MapTerm Nothing Nothing (MSet mempty)) , (NgramsTerm "brazorf", NgramsRepoElement 1 StopTerm Nothing Nothing (MSet mempty)) ])]) it "parses TSV with UTF-8 issues" $ \(SpecContext _testEnv _port _app _) -> do simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName "test-data/ngrams/433-utf-encoding-issue.tsv") -- we don't care about the output, only that the file was parsed without errors (this file is garbage) ngramsListFromTSVData simpleNgrams `shouldSatisfy` isRight it "allows uploading a CSV ngrams file" $ \(SpecContext testEnv port app _) -> do cId <- newCorpusForUser testEnv "alice" let log_cfg = (test_config testEnv) ^. gc_logging withApplication app $ do withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do ([listId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> build cId)) [aesonQQ|{"pn_typename":"NodeList","pn_name":"Testing"}|] -- Upload the CSV doc simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName "test-data/ngrams/simple.tsv") let params = WithTextFile { _wtf_filetype = FType.TSV , _wtf_data = simpleNgrams , _wtf_name = "simple.tsv" } ji <- checkEither $ liftIO $ runClientM (add_tsv_to_list token listId params) clientEnv _ <- pollUntilWorkFinished log_cfg port ji -- Now check that we can retrieve the ngrams liftIO $ do eRes <- runClientM (get_table_ngrams token cId APINgrams.Terms listId 50 Nothing (Just MapTerm) Nothing Nothing Nothing Nothing) clientEnv eRes `shouldSatisfy` isRight let (Right res) = eRes Just res `shouldBe` JSON.decode [json| {"version":0 ,"count":1 ,"data":[ {"ngrams":"abelian group" ,"size":1 ,"list":"MapTerm" ,"occurrences":[],"children":[]} ] } |] -- let getStopUrl = "/node/" +| listId |+ "/ngrams?ngramsType=Terms&listType=StopTerm&list="+| listId |+"&limit=50" eResStop <- runClientM (get_table_ngrams token cId APINgrams.Terms listId 50 Nothing (Just StopTerm) Nothing Nothing Nothing Nothing) clientEnv eResStop `shouldSatisfy` isRight let (Right resStop) = eResStop Just resStop `shouldBe` JSON.decode [json| {"version":0 ,"count":1 ,"data":[ {"ngrams":"brazorf" ,"size":1 ,"list":"StopTerm" ,"occurrences":[],"children":[]} ] } |] describe "POST /api/v1.0/corpus/:id/add/form/async (JSON)" $ do it "allows uploading a JSON docs file" $ \(SpecContext testEnv port app _) -> do withApplication app $ do withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do void $ createFortranDocsList testEnv port clientEnv token it "doesn't use trashed documents for score calculation (#385)" $ \(SpecContext testEnv port app _) -> do let log_cfg = (test_config testEnv) ^. gc_logging withApplication app $ do withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do corpusId <- createFortranDocsList testEnv port clientEnv token tr1 <- liftIO $ do (HashedResponse _ tr1) <- checkEither $ runClientM (get_table token corpusId (Just APINgrams.Docs) (Just 10) (Just 0) (Just Facet.DateDesc) (Just $ RawQuery "fortran") Nothing ) clientEnv length (tr_docs tr1) `shouldBe` 2 pure tr1 termsNodeId <- uploadJSONList log_cfg port token corpusId "test-data/ngrams/GarganText_NgramsTerms-nodeId-177.json" clientEnv liftIO $ do -- Now let's check the score for the \"fortran\" ngram. (VersionedWithCount _ _ (NgramsTable [fortran_ngram])) <- checkEither $ runClientM (get_table_ngrams token corpusId APINgrams.Terms termsNodeId 10 (Just 0) (Just MapTerm) Nothing Nothing Nothing Nothing ) clientEnv length (_ne_occurrences fortran_ngram) `shouldBe` 2 -- At this point, we need to trash one of the two documents which contains -- the \"fortran\" occurrence, and this should be reflected in the Ngrams. trash_document token (Facet.facetDoc_id $ Prelude.head (tr_docs tr1)) corpusId -- Check that the document of returned documents has decreased liftIO $ do (HashedResponse _ tr2) <- checkEither $ runClientM (get_table token corpusId (Just APINgrams.Docs) (Just 10) (Just 0) (Just Facet.DateDesc) (Just $ RawQuery "fortran") Nothing ) clientEnv length (tr_docs tr2) `shouldBe` 1 liftIO $ do -- Now let's check the score for the \"fortran\" ngram. It must be decreased -- by 1, because one of the matching documents have been trashed. (VersionedWithCount _ _ (NgramsTable [fortran_ngram'])) <- checkEither $ runClientM (get_table_ngrams token corpusId APINgrams.Terms termsNodeId 10 (Just 0) (Just MapTerm) Nothing Nothing Nothing Nothing ) clientEnv length (_ne_occurrences fortran_ngram') `shouldBe` 1 createDocsList :: FilePath -> TestEnv -> Int -> ClientEnv -> Token -> WaiSession () CorpusId createDocsList testDataPath testEnv port clientEnv token = do folderId <- liftIO $ newPrivateFolderForUser testEnv alice ([corpusId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> build folderId)) [aesonQQ|{"pn_typename":"NodeCorpus","pn_name":"TestCorpus"}|] -- Import the docsList with only two documents, both containing a \"fortran\" term. simpleDocs <- liftIO (TIO.readFile =<< getDataFileName testDataPath) let newWithForm = mkNewWithForm simpleDocs (T.pack $ takeBaseName testDataPath) ji <- checkEither $ liftIO $ runClientM (add_file_async token corpusId newWithForm) clientEnv ji' <- pollUntilWorkFinished log_cfg port ji liftIO $ ji' `shouldBe` ji pure corpusId where log_cfg = (test_config testEnv) ^. gc_logging createFortranDocsList :: TestEnv -> Int -> ClientEnv -> Token -> WaiSession () CorpusId createFortranDocsList testEnv port = createDocsList "test-data/ngrams/GarganText_DocsList-nodeId-177.json" testEnv port updateNode :: LogConfig -> Int -> ClientEnv -> Token -> NodeId -> WaiSession () () updateNode log_cfg port clientEnv token nodeId = do let params = UpdateNodeParamsTexts Both ji <- checkEither $ liftIO $ runClientM (update_node token nodeId params) clientEnv ji' <- pollUntilWorkFinished log_cfg port ji liftIO $ ji' `shouldBe` ji mkNewWithForm :: T.Text -> T.Text -> NewWithForm mkNewWithForm content name = NewWithForm { _wf_filetype = FType.JSON , _wf_fileformat = FType.Plain , _wf_data = content , _wf_lang = Just Lang.EN , _wf_name = name , _wf_selection = FlowSocialListWithPriority MySelfFirst } add_file_async :: Token -> CorpusId -> NewWithForm -> ClientM JobInfo add_file_async (toServantToken -> token) corpusId nwf = clientRoutes & apiWithCustomErrorScheme & ($ GES_new) & backendAPI & backendAPI' & mkBackEndAPI & gargAPIVersion & gargPrivateAPI & mkPrivateAPI & ($ token) & addWithFormAPI & addWithFormEp & ($ corpusId) & workerAPIPost & (\submitForm -> submitForm nwf) -- | Utility to trash a document by performing a raw query towards GQL. Not very type safe, -- but it will get the job done for now. trash_document :: Token -> NodeId -- ^ The context id to delete, i.e. the document ID. -> CorpusId -- ^ The parent corpus ID this document is attached to. -> WaiSession () () trash_document token docId cpsId = void $ protectedJSON @JSON.Value token "POST" gqlUrl [aesonQQ| { "query": #{operation}, "operationName": "update_node_context_category", "variables": {} }|] where operation :: Prelude.String operation = printf "mutation update_node_context_category { update_node_context_category(context_id: %d, node_id: %d, category: 0) }" contextId corpusId contextId :: Int contextId = _NodeId docId corpusId :: Int corpusId = _NodeId cpsId