• Alfredo Di Napoli's avatar
    Fix bug in selectCountDocs · 6ff05ee1
    Alfredo Di Napoli authored
    The refactored DB API now has a separate building block to create an Opaleye query that
    counts the number of returned results; we do that via `countRows`, exactly like the previous version.
    
    However, I have discovered a small footgun in the Opaleye API -- if you have
    two `Select` statements both calling countRows in a chain, that will always yield a value of 1,
    because the inner `countRows` will give you the actual number of results by returning
    a single row with an integer inside (i.e. the count).
    
    However, the subsequent (outer) call to `countRows` will return the number of rows
    of the previous step .. which is always going to be one!
    
    The bug was that I had left somewhere the spurious `countRows` in the query which
    would return the number of documents needed for the TFICF field, triggering the bug
    (because then we had `it` ALWAYS equal to 1.0).
    
    In the new API, while we cannot prevent the bug at the type level we can
    easily do an audit by grepping for `countRows`, making sure we have exactly one instance,
    i.e. inside `mkOpaCountQuery`.
    6ff05ee1
Operations.hs 8.87 KB
{-# LANGUAGE QuasiQuotes          #-}
{-# LANGUAGE TupleSections        #-}
{-# LANGUAGE TypeApplications     #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE TypeFamilies         #-}
{-# OPTIONS_GHC -Wno-orphans      #-}

module Test.Database.Operations (
     tests
   , nodeStoryTests
  ) where

import Control.Monad.Reader
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Database.PostgreSQL.Simple
import Database.PostgreSQL.Simple.SqlQQ
import Gargantext.API.Node.Corpus.Update
import Gargantext.Core
import Gargantext.Core.Mail (EmailAddress)
import Gargantext.Core.Types.Individu
import Gargantext.Database.Action.User
import Gargantext.Database.Action.User.New
import Gargantext.Database.Admin.Types.Hyperdata.Corpus
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Tree.Root (getRootId)
import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Prelude
import Test.API.Setup (createAliceAndBob, setupEnvironment)
import Test.Database.Operations.DocumentSearch
import Test.Database.Operations.NodeStory
import Test.Database.Operations.PublishNode
import Test.Database.Setup (withTestDB)
import Test.Database.Types
import Test.Hspec
import Test.QuickCheck.Monadic
import Test.Tasty.HUnit hiding (assert)
import Test.Tasty.QuickCheck


-- | Keeps a log of usernames we have already generated, so that our
-- roundtrip tests won't fail.
uniqueArbitraryNewUser :: Int -> Gen (NewUser GargPassword)
uniqueArbitraryNewUser currentIx = do
  ur <- (`mappend` ((show currentIx :: Text) <> "-")) <$> ascii_txt
  let email = ur <> "@foo.com"
  NewUser <$> pure ur <*> pure email <*> elements arbitraryPassword
  where
   ascii_txt :: Gen T.Text
   ascii_txt = fmap (T.pack . getPrintableString) arbitrary

tests :: Spec
tests = parallel $ around withTestDB $ beforeWith (\ctx -> setupEnvironment ctx >>= (const $ pure ctx)) $
  describe "Database" $ do
    describe "User creation" $ do
      it "Simple write/read" writeRead01
      it "Read/Write roundtrip" prop_userCreationRoundtrip
    describe "With test user" $ beforeWith setupTestUser $ do
      describe "User tests" $ do
        it "Simple duplicate"  mkUserDup
      describe "Corpus creation" $ do
        it "Simple write/read" corpusReadWrite01
      describe "With test corpus" $ beforeWith setupTestCorpus $ do
        it "Can add language to Corpus" corpusAddLanguage
        describe "With test documents" $ beforeWith addCorpusDocuments $ do
          it "Can add documents to a Corpus" corpusAddDocuments
          describe "Corpus search" $ do
            it "Can stem query terms" stemmingTest
            it "Can return the number of docs in a corpus" corpusReturnCount
            it "Can perform a simple search inside documents" corpusSearch01
            it "Can perform search by author in documents" corpusSearch02
            it "Can perform more complex searches using the boolean API" corpusSearch03
            it "Can correctly count doc score" corpusScore01
            it "Can perform search with spaces for doc in db" corpusSearchDB01
    beforeWith (\env -> createAliceAndBob env >>= (const $ pure env)) $
      describe "Publishing a node" $ do
        it "Returns the root public folder for a user" testGetUserRootPublicNode
        it "Correctly detects if a node is read only"  testIsReadOnlyWorks
        it "Publishes the root and its first level children"  testPublishRecursiveFirstLevel
        it "Publishes the root and its recursive children"  testPublishRecursiveNLevel
        it "Publishes in a lenient way but it's still considered read-only"  testPublishLenientWorks
        
nodeStoryTests :: Spec
nodeStoryTests = sequential $
  -- run 'withTestDB' before _every_ test item
  around setupDBAndCorpus $
  describe "Database - node story" $ do
    describe "Node story" $ do
      it "[#281] Can create a list" createListTest
      it "[#281] Can query node story" queryNodeStoryTest
      it "[#218] Can add new terms to node story" insertNewTermsToNodeStoryTest
      it "[#281] Can add new terms (with children) to node story" insertNewTermsWithChildrenToNodeStoryTest
      it "[#281] Fixes child terms to match parents' terms" insertNodeStoryChildrenWithDifferentNgramsTypeThanParentTest
      it "[#281] Can update node story when 'setListNgrams' is called" setListNgramsUpdatesNodeStoryTest
      it "[#281] When 'setListNgrams' is called, childrens' parents are updated" setListNgramsUpdatesNodeStoryWithChildrenTest
      it "[#281] Correctly commits patches to node story - simple" commitPatchSimpleTest
  where
    setupDBAndCorpus testsFunc = withTestDB $ \env -> do
      setupEnvironment env
      testsFunc env
  
data ExpectedActual a =
    Expected a
  | Actual a
  deriving Show

instance Eq a => Eq (ExpectedActual a) where
  (Expected a) == (Actual b)   = a == b
  (Actual a)   == (Expected b) = a == b
  _ == _ = False


testUsername' :: Text
testUsername' = "alfredo"
testUsername :: User
testUsername = UserName testUsername'
testUser :: EmailAddress
testUser = testUsername' <> "@well-typed.com"
testUserPassword :: GargPassword
testUserPassword = GargPassword "my_secret"
testCorpusName :: Text
testCorpusName = "Text_Corpus"

writeRead01 :: TestEnv -> Assertion
writeRead01 env = runTestMonad env $ do
  let nur1 = mkNewUser testUser testUserPassword
  let nur2 = mkNewUser "paul@acme.com" (GargPassword "my_secret")

  uid1 <- new_user nur1
  uid2 <- new_user nur2

  liftBase $ uid1 `shouldBe` UnsafeMkUserId 2
  liftBase $ uid2 `shouldBe` UnsafeMkUserId 3

  -- Getting the users by username returns the expected IDs
  (uid1', uid2') <- runDBQuery $ (,) <$> getUserId testUsername <*> getUserId (UserName "paul")
  liftBase $ uid1' `shouldBe` UnsafeMkUserId 2
  liftBase $ uid2' `shouldBe` UnsafeMkUserId 3

-- | Create test user, to be used in subsequent tests
setupTestUser :: TestEnv -> IO TestEnv
setupTestUser env = runTestMonad env $ do
  let nur = mkNewUser testUser testUserPassword
  _ <- new_user nur
  pure env

mkUserDup :: TestEnv -> Assertion
mkUserDup env = do
  let x = runTestMonad env $ do
        let nur = mkNewUser testUser testUserPassword
        
        -- This should fail, because user 'alfredo' exists already.
        new_user nur
  --
  -- SqlError {sqlState = "23505", sqlExecStatus = FatalError
  --          , sqlErrorMsg = "duplicate key value violates unique constraint \"auth_user_username_idx1\""
  --          , sqlErrorDetail = "Key (username)=(alfredo) already exists.", sqlErrorHint = ""
  --          }
  --
  -- Postgres increments the underlying SERIAL for the user even if the request fails, see
  -- https://stackoverflow.com/questions/37204749/serial-in-postgres-is-being-increased-even-though-i-added-on-conflict-do-nothing
  -- This means that the next available ID is '3'.
  x `shouldThrow` (\SqlError{..} -> sqlErrorDetail == ("Key (username)=(" <> TE.encodeUtf8 testUsername' <> ") already exists."))

runEnv :: TestEnv -> TestMonad a -> PropertyM IO a
runEnv env act = run (runTestMonad env act)

prop_userCreationRoundtrip :: TestEnv -> Property
prop_userCreationRoundtrip env = monadicIO $ do
  nextAvailableCounter <- run (nextCounter $ test_usernameGen env)
  nur  <- pick (uniqueArbitraryNewUser nextAvailableCounter)
  uid <- runEnv env (new_user nur)
  ur' <- runEnv env (runDBQuery $ getUserId (UserName $ _nu_username nur))
  run (Expected uid `shouldBe` Actual ur')

-- | Create a test corpus, to be used in subsequent tests
setupTestCorpus :: TestEnv -> IO TestEnv
setupTestCorpus env = runTestMonad env $ runDBTx $ do
  uid      <- getUserId testUsername
  parentId <- getRootId testUsername
  _ <- mk (Just testCorpusName) (Nothing :: Maybe HyperdataCorpus) parentId uid
  pure env

-- | We test that we can create and later read-back a 'Corpus'.
corpusReadWrite01 :: TestEnv -> Assertion
corpusReadWrite01 env = do
  runTestMonad env $ do
    uid      <- runDBQuery $ getUserId testUsername
    parentId <- runDBQuery $ getRootId testUsername
    [corpusId] <- runDBTx $ mk (Just testCorpusName) (Nothing :: Maybe HyperdataCorpus) parentId uid
    [Only corpusId'] <- runDBQuery $ mkPGQuery [sql|SELECT id FROM nodes WHERE name = ?|] (Only testCorpusName)
    liftIO $ corpusId `shouldBe` UnsafeMkNodeId corpusId'
    -- Retrieve the corpus by Id
    [corpus] <- runDBQuery $ getCorporaWithParentId parentId
    liftIO $ corpusId `shouldBe` (_node_id corpus)

-- | We test that we can update the existing language for a 'Corpus'.
corpusAddLanguage :: TestEnv -> Assertion
corpusAddLanguage env = do
  runTestMonad env $ do
    parentId <- runDBQuery $ getRootId testUsername
    corpus   <- runDBQuery $ getCorporaWithParentIdOrFail parentId
    liftIO $ (_hc_lang . _node_hyperdata $ corpus) `shouldBe` Just EN -- defaults to English
    corpus' <- runDBTx $ do
      addLanguageToCorpus (_node_id corpus) IT
      getCorporaWithParentIdOrFail parentId
    liftIO $ (_hc_lang . _node_hyperdata $ corpus') `shouldBe` Just IT