[test] add test for ngrams list query

parent cdfd7dc0
Pipeline #7968 failed with stages
in 27 minutes and 39 seconds
......@@ -280,8 +280,10 @@ library
Gargantext.Core.Worker.PGMQTypes
Gargantext.Core.Worker.Types
Gargantext.Database.Action.Flow
Gargantext.Database.Action.Flow.List
Gargantext.Database.Action.Flow.Types
Gargantext.Database.Action.Flow.Utils
Gargantext.Database.Action.Metrics.NgramsByContext
Gargantext.Database.Action.Metrics.TFICF
Gargantext.Database.Action.Search
Gargantext.Database.Action.User
......@@ -439,13 +441,11 @@ library
Gargantext.Database.Action.Delete
Gargantext.Database.Action.Flow.Annuaire
Gargantext.Database.Action.Flow.Extract
Gargantext.Database.Action.Flow.List
Gargantext.Database.Action.Flow.Pairing
Gargantext.Database.Action.Index
Gargantext.Database.Action.Learn
Gargantext.Database.Action.Mail
Gargantext.Database.Action.Metrics
Gargantext.Database.Action.Metrics.NgramsByContext
Gargantext.Database.Action.Node
Gargantext.Database.Action.Share
Gargantext.Database.Admin.Access
......@@ -863,11 +863,12 @@ test-suite garg-test
Test.Database.Operations
Test.Database.Operations
Test.Database.Operations.DocumentSearch
Test.Database.Operations.DocumentSearch
Test.Database.Operations.NgramsByContext
Test.Database.Operations.NodeStory
Test.Database.Operations.NodeStory
Test.Database.Operations.PublishNode
Test.Database.Operations.PublishNode
Test.Database.Operations.Types
Test.Database.Setup
Test.Database.Setup
Test.Database.Transactions
......
......@@ -10,49 +10,36 @@ module Test.Database.Operations (
, nodeStoryTests
) where
import Control.Monad.Reader
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Database.PostgreSQL.Simple
import Database.PostgreSQL.Simple.SqlQQ
import Gargantext.API.Node.Corpus.Update
import Gargantext.Core
import Gargantext.Core.Mail (EmailAddress)
import Database.PostgreSQL.Simple.SqlQQ ( sql )
import Gargantext.API.Node.Corpus.Update ( addLanguageToCorpus )
import Gargantext.Core ( Lang(IT, EN) )
import Gargantext.Core.Types.Individu
import Gargantext.Database.Action.User
import Gargantext.Database.Action.User.New
import Gargantext.Database.Admin.Types.Hyperdata.Corpus
import Gargantext.Database.Action.User ( getUserId )
import Gargantext.Database.Action.User.New ( mkNewUser, new_user )
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataCorpus(_hc_lang) )
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Transactional ( mkPGQuery, runDBQuery, runDBTx )
import Gargantext.Database.Query.Table.Node ( getCorporaWithParentId, MkCorpus(mk) )
import Gargantext.Database.Query.Tree.Root (getRootId)
import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Prelude
import Test.API.Setup (createAliceAndBob, setupEnvironment)
import Test.Database.Operations.DocumentSearch
import Test.Database.Operations.NgramsByContext qualified as NBC
import Test.Database.Operations.NodeStory
import Test.Database.Operations.PublishNode
import Test.Database.Operations.Types
import Test.Database.Setup (withTestDB)
import Test.Database.Types
import Test.HUnit hiding (assert)
import Test.HUnit ( Assertion )
import Test.Hspec
import Test.QuickCheck
import Test.QuickCheck.Monadic
-- | Keeps a log of usernames we have already generated, so that our
-- roundtrip tests won't fail.
uniqueArbitraryNewUser :: Int -> Gen (NewUser GargPassword)
uniqueArbitraryNewUser currentIx = do
ur <- (`mappend` ((show currentIx :: Text) <> "-")) <$> ascii_txt
let email = ur <> "@foo.com"
NewUser <$> pure ur <*> pure email <*> elements arbitraryPassword
where
ascii_txt :: Gen T.Text
ascii_txt = fmap (T.pack . getPrintableString) arbitrary
import Test.QuickCheck ( Property )
import Test.QuickCheck.Monadic ( monadicIO, pick, run, PropertyM )
tests :: Spec
tests = parallel $ around withTestDB $ beforeWith (\ctx -> setupEnvironment ctx >>= (const $ pure ctx)) $
tests = sequential $ around withTestDB $ beforeWith (\ctx -> setupEnvironment ctx >>= (const $ pure ctx)) $
describe "Database" $ do
describe "User creation" $ do
it "Simple write/read" writeRead01
......@@ -74,6 +61,13 @@ tests = parallel $ around withTestDB $ beforeWith (\ctx -> setupEnvironment ctx
it "Can perform more complex searches using the boolean API" corpusSearch03
it "Can correctly count doc score" corpusScore01
it "Can perform search with spaces for doc in db" corpusSearchDB01
describe "NgramsByContext Operations" $ beforeWith NBC.setupNgramsCorpus $ do
it "returns correct occurrences for ngrams" $
NBC.testGetOccByNgramsOnlyFast
it "handles empty results gracefully" $
NBC.testGetOccByNgramsOnlyFastEmptyList
it "returns valid context IDs" $
NBC.testGetOccByNgramsOnlyFastValidContextIds
beforeWith (\env -> createAliceAndBob env >>= (const $ pure env)) $
describe "Publishing a node" $ do
it "Returns the root public folder for a user" testGetUserRootPublicNode
......@@ -100,28 +94,6 @@ nodeStoryTests = sequential $
setupDBAndCorpus testsFunc = withTestDB $ \env -> do
setupEnvironment env
testsFunc env
data ExpectedActual a =
Expected a
| Actual a
deriving Show
instance Eq a => Eq (ExpectedActual a) where
(Expected a) == (Actual b) = a == b
(Actual a) == (Expected b) = a == b
_ == _ = False
testUsername' :: Text
testUsername' = "alfredo"
testUsername :: User
testUsername = UserName testUsername'
testUser :: EmailAddress
testUser = testUsername' <> "@well-typed.com"
testUserPassword :: GargPassword
testUserPassword = GargPassword "my_secret"
testCorpusName :: Text
testCorpusName = "Text_Corpus"
writeRead01 :: TestEnv -> Assertion
writeRead01 env = runTestMonad env $ do
......
{-|
Module : Test.Database.Operations.NgramsByContext
Description : Tests for ngrams occurrence queries
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Test.Database.Operations.NgramsByContext where
import Data.Aeson.QQ.Simple ( aesonQQ )
import Data.Aeson.Types ( parseEither )
import Data.HashMap.Strict qualified as HM
import Data.Text qualified as T
import Database.PostgreSQL.Simple qualified as PSQL
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types qualified as PSQL
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core ( Lang(EN) )
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Types
import Gargantext.Core.Worker.Env ()
import Gargantext.Database.Action.Flow
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument )
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Error (HasNodeError, nodeErrorWith)
import Gargantext.Database.Query.Tree.Root
import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Prelude
import Test.Database.Operations.Types ( testUsername )
import Test.Database.Types ( TestEnv, runTestMonad )
import Test.HUnit ( Assertion, assertFailure )
import Test.Hspec.Expectations ( shouldBe, shouldSatisfy )
-- Test documents with known ngrams
testDoc_01 :: HyperdataDocument
testDoc_01 = either errorTrace identity $ parseEither parseJSON $ [aesonQQ|
{ "doi":"test01"
, "publication_year":2023
, "language_iso2":"EN"
, "authors":"Alice Smith"
, "abstract":"This paper discusses functional programming and type systems."
, "title":"Introduction to Functional Programming"
}
|]
testDoc_02 :: HyperdataDocument
testDoc_02 = either errorTrace identity $ parseEither parseJSON $ [aesonQQ|
{ "doi":"test02"
, "publication_year":2023
, "language_iso2":"EN"
, "authors":"Bob Jones"
, "abstract":"We explore functional programming paradigms in modern languages."
, "title":"Functional Programming Paradigms"
}
|]
testDoc_03 :: HyperdataDocument
testDoc_03 = either errorTrace identity $ parseEither parseJSON $ [aesonQQ|
{ "doi":"test03"
, "publication_year":2023
, "language_iso2":"EN"
, "authors":"Carol White"
, "abstract":"Type systems ensure program correctness and safety."
, "title":"Type Systems in Programming Languages"
}
|]
getCorporaWithParentIdOrFail :: HasNodeError err => NodeId -> DBQuery err x (Node HyperdataCorpus)
getCorporaWithParentIdOrFail parentId = do
xs <- getCorporaWithParentId parentId
case xs of
[corpus] -> pure corpus
_ -> nodeErrorWith $ "getCorporaWithParentIdOrFail failed: " <> T.pack (show xs)
setupNgramsCorpus :: TestEnv -> IO TestEnv
setupNgramsCorpus env = runTestMonad env $ do
let user = testUsername
parentId <- runDBQuery $ getRootId user
[corpus] <- runDBQuery $ getCorporaWithParentId parentId
let corpusId = _node_id corpus
let docs = [testDoc_01, testDoc_02, testDoc_03]
_docIds <- addDocumentsToHyperCorpus (Nothing :: Maybe HyperdataCorpus)
(Multi EN)
corpusId
docs
userId <- runDBQuery $ getUserId user
listId <- runDBTx $ getOrMkList corpusId userId
-- Get Terms that were actually extracted
(extractedTerms :: [(Int, Text)]) <- runDBQuery $ mkPGQuery [sql|
SELECT DISTINCT ng.id, ng.terms
FROM context_node_ngrams cng
JOIN ngrams ng ON cng.ngrams_id = ng.id
WHERE cng.ngrams_type = 4 -- NgramsTerms
AND cng.context_id IN (
SELECT context_id FROM nodes_contexts WHERE node_id = ?
)
|] (PSQL.Only corpusId)
-- Directly insert into node_stories
runDBTx $ void $ mkPGUpdate [sql|
INSERT INTO node_stories (node_id, ngrams_id, ngrams_type_id, version)
SELECT ?, unnest(?::int[]), 4, 1
ON CONFLICT DO NOTHING
|] (listId, PSQL.PGArray $ map fst extractedTerms)
pure env
-- Test that getOccByNgramsOnlyFast returns correct occurrences
testGetOccByNgramsOnlyFast :: TestEnv -> Assertion
testGetOccByNgramsOnlyFast env = runTestMonad env $ do
result <- runDBQuery $ do
parentId <- getRootId testUsername
corpus <- getCorporaWithParentIdOrFail parentId
let corpusId = _node_id corpus
-- Get the list node (should be created during corpus setup)
lists <- getListsWithParentId corpusId
case lists of
[] -> nodeErrorWith "No list found for corpus"
(listNode:_) -> do
let listId = _node_id listNode
-- Query occurrences for NgramsTerms type
getOccByNgramsOnlyFast corpusId listId NgramsTerms
liftIO $ do
-- Verify we got a non-empty result
result `shouldSatisfy` (not . HM.null)
-- Check that known terms appear in results
-- "functional programming" should appear in doc_01 (on doc_02 it's with "paradigm")
let functionalProg = NgramsTerm "functional programming"
case HM.lookup functionalProg result of
Nothing -> assertFailure "Expected 'functional programming' in results"
Just contexts -> do
length contexts `shouldBe` 1
contexts `shouldSatisfy` (not . null)
-- "functional programming paradigms" appears in exactly 1 doc (doc_02)
let functionalProgParadigms = NgramsTerm "functional programming paradigms"
case HM.lookup functionalProgParadigms result of
Nothing -> assertFailure "Expected 'functional programming paradigms' in results"
Just contexts -> do
length contexts `shouldBe` 1
contexts `shouldSatisfy` (not . null)
-- "type systems" should appear in doc_01 and doc_03
let typeSystems = NgramsTerm "type systems"
case HM.lookup typeSystems result of
Nothing -> pure () -- might not be extracted depending on ngrams config
Just contexts -> do
contexts `shouldSatisfy` (not . null)
-- Test that empty list returns empty results
testGetOccByNgramsOnlyFastEmptyList :: TestEnv -> Assertion
testGetOccByNgramsOnlyFastEmptyList env = runTestMonad env $ do
result <- runDBQuery $ do
parentId <- getRootId testUsername
corpus <- getCorporaWithParentIdOrFail parentId
let corpusId = _node_id corpus
lists <- getListsWithParentId corpusId
case lists of
[] -> nodeErrorWith "No list found for corpus"
(listNode:_) -> do
let listId = _node_id listNode
-- Query with empty corpus should return empty or very limited results
getOccByNgramsOnlyFast corpusId listId NgramsTerms
liftIO $ do
-- Should return a HashMap (possibly empty if no node_stories exist)
result `shouldSatisfy` HM.null
-- Test that results contain valid context IDs
testGetOccByNgramsOnlyFastValidContextIds :: TestEnv -> Assertion
testGetOccByNgramsOnlyFastValidContextIds env = runTestMonad env $ do
result <- runDBQuery $ do
parentId <- getRootId testUsername
corpus <- getCorporaWithParentIdOrFail parentId
let corpusId = _node_id corpus
lists <- getListsWithParentId corpusId
case lists of
[] -> nodeErrorWith "No list found"
(listNode:_) -> do
let listId = _node_id listNode
getOccByNgramsOnlyFast corpusId listId NgramsTerms
liftIO $ do
-- All context lists should be non-empty for terms that exist
HM.toList result `shouldSatisfy` all (\(_, contexts) -> not (null contexts))
-- Context IDs should be positive integers
let allContexts = concatMap snd $ HM.toList result
allContexts `shouldSatisfy` all (\(UnsafeMkContextId cid) -> cid > 0)
......@@ -228,7 +228,7 @@ tests = describe "Database Transactions" $ do
-- | Testing the transactional behaviour outside the classic GGTX operations.
-- We test that throwing exceptions in IO leads to rollbacks.
counterDBTests :: Spec
counterDBTests = parallel $ around withTestCounterDB $
counterDBTests = sequential $ around withTestCounterDB $
describe "Counter Transactions" $ do
describe "Opaleye count queries" $ do
it "Supports counting rows" opaCountQueries
......@@ -248,7 +248,7 @@ counterDBTests = parallel $ around withTestCounterDB $
-- | Testing the transactional behaviour inside the classic GGTX operations.
-- We test that throwing something like a 'NodeError' results in a proper rollback.
ggtxDBTests :: Spec
ggtxDBTests = parallel $ around withTestDB $ beforeWith (\ctx -> setupEnvironment ctx >>= (const $ pure ctx)) $
ggtxDBTests = sequential $ around withTestDB $ beforeWith (\ctx -> setupEnvironment ctx >>= (const $ pure ctx)) $
describe "GGTX Transactions" $ do
describe "Rollback support" $ do
it "can rollback if a ggtx error gets thrown" testGGTXErrorRollback
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment