Commit 80760f1f authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Add tests (and one is failing)

I need to understand why I get what I think is a weird nodeId in return
for one test.
parent 05d58045
Pipeline #7655 failed with stages
in 63 minutes and 20 seconds
......@@ -739,6 +739,7 @@ common commonTestDependencies
, cache >= 0.1.3.0
, containers ^>= 0.6.7
, epo-api-client
, exceptions
, fast-logger ^>= 3.2.2
, filepath ^>= 1.4.2.2
, fmt
......
......@@ -33,7 +33,7 @@ import Gargantext.API.Errors.TH (deriveHttpStatusCode)
import Gargantext.API.Errors.Types as Types
import Gargantext.Database.Query.Table.Node.Error hiding (nodeError)
import Gargantext.Database.Query.Tree hiding (treeError)
import Gargantext.Utils.Jobs.Monad (JobError(..))
import Gargantext.Utils.Jobs.Monad (JobError(..), mkHumanFriendly)
import Network.HTTP.Types.Status qualified as HTTP
import Servant.Server (ServerError(..), err404, err500)
......@@ -155,6 +155,8 @@ nodeErrorToFrontendError ne = case ne of
-> mkFrontendErrShow $ FE_node_creation_failed_insert_node uId pId
UserHasNegativeId uid
-> mkFrontendErrShow $ FE_node_creation_failed_user_negative_id uid
InsertDocFailed rea
-> mkFrontendErrNoDiagnostic $ FE_node_generic_exception (mkHumanFriendly rea) -- TODO(adn) specialise
NodeLookupFailed reason
-> case reason of
NodeDoesNotExist nid
......@@ -176,7 +178,7 @@ nodeErrorToFrontendError ne = case ne of
NeedsConfiguration
-> mkFrontendErrShow $ FE_node_needs_configuration
NodeError err
-> mkFrontendErrShow $ FE_node_generic_exception (T.pack $ displayException err)
-> mkFrontendErrShow $ FE_node_generic_exception err
NodeIsReadOnly nodeId reason
-> mkFrontendErrShow $ FE_node_is_read_only nodeId reason
MoveError sourceId targetId reason
......
......@@ -51,6 +51,13 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
, allDataOrigins
, do_api
-- * Useful exports for testing
, noUncommittedNgrams
, insertMasterDoc
, insertMasterDocs
, extractNgramsFromDocument
, DocumentHashId(..)
, InsertDocError(..)
)
where
......@@ -102,7 +109,7 @@ import Gargantext.Database.Query.Table.ContextNodeNgrams2 ( ContextNodeNgrams2Po
import Gargantext.Database.Query.Table.Node ( MkCorpus, insertDefaultNodeIfNotExists, getOrMkList, getNodeWith )
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add)
import Gargantext.Database.Query.Table.Node.Document.Insert ( ToNode(toNode), UniqParameters (..), newUniqIdHash ) -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..), DocumentHashId(..), InsertDocError(..), nodeError, NodeError (..), NodeCreationError (..))
import Gargantext.Database.Query.Table.NodeContext (selectDocNodesOnlyId)
import Gargantext.Database.Query.Table.NodeNgrams (listInsertDb , getCgramsId)
import Gargantext.Database.Query.Tree.Root (MkCorpusUser(..), getOrMkRoot, getOrMkRootWithCorpus, userFromMkCorpusUser)
......@@ -112,7 +119,7 @@ import Gargantext.Database.Types
import Gargantext.Prelude hiding (try, catch, onException, to)
import Gargantext.System.Logging ( logLocM, LogLevel(DEBUG, ERROR), MonadLogger )
import Gargantext.Utils.Jobs.Error (HumanFriendlyErrorText(..))
import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..) )
import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..), mkHumanFriendly )
------------------------------------------------------------------------
-- Imports for upgrade function
......@@ -337,15 +344,13 @@ addDocumentsToHyperCorpus jobHandle mb_hyper la corpusId docs = do
uncommittedNgrams <- extractNgramsFromDocuments nlp la docs
(failures, ids) <- insertMasterDocs cfg uncommittedNgrams mb_hyper docs
forM_ failures $ \failure -> do
let msg = mkHumanFriendly failure
case failure of
NgramsNotFound _mb_hashId docId -> do
let msg = UnsafeMkHumanFriendlyErrorText $ "Couldn't find the associated ngrams for input document " <> show docId <> ", therefore the added document won't have any ngrams."
emitWarning msg jobHandle
NgramsNotFound{} -> pure ()
DocumentInsertionError err -> do
-- FIXME(adn) we should give some kind of identifier to help the user
$(logLocM) ERROR $ "Error in document insertion: " <> T.pack (displayException err)
let msg = UnsafeMkHumanFriendlyErrorText $ "Skipping document insertion due to an iternal database error"
emitWarning msg jobHandle
$(logLocM) ERROR $ "Error in document insertion: " <> err
emitWarning (UnsafeMkHumanFriendlyErrorText msg) jobHandle
runDBTx $ do
void $ Doc.add corpusId (map nodeId2ContextId ids)
pure ids
......@@ -463,11 +468,6 @@ buildSocialList l user userCorpusId listId ctype = \case
type CommittedNgrams =
HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
newtype DocumentHashId
= DocumentHashId { _DocumentHashId :: T.Text }
deriving stock (Show, Eq)
deriving newtype Ord
-- | Ngrams that have been extracted from the input 'doc' but not fully associated with
-- a persisted entity on the database.
newtype UncommittedNgrams doc = UncommittedNgrams
......@@ -475,10 +475,8 @@ newtype UncommittedNgrams doc = UncommittedNgrams
deriving stock Show
deriving newtype (Semigroup, Monoid)
data InsertDocError
= NgramsNotFound !(Maybe DocumentHashId) !DocId
| DocumentInsertionError !SomeException
deriving Show
noUncommittedNgrams :: UncommittedNgrams docs
noUncommittedNgrams = UncommittedNgrams mempty
extractNgramsFromDocument :: ( UniqParameters doc
, HasText doc
......@@ -568,18 +566,16 @@ insertMasterDocs cfg uncommittedNgrams c docs =
go (!errs, !documents) doc = do
res <- try $ runDBTx (insertMasterDoc cfg uncommittedNgrams c doc)
case res of
Left err
-> pure (DocumentInsertionError err : errs, documents)
Right (Left err)
-> pure (err : errs, documents)
Right (Right d)
Left (err :: SomeException)
-> pure (DocumentInsertionError (T.pack $ displayException err) : errs, documents)
Right d
-> pure (errs, d : documents)
insertMasterDoc :: ( HasNodeError err
, UniqParameters doc
, FlowCorpus doc
, MkCorpus c
)
, UniqParameters doc
, FlowCorpus doc
, MkCorpus c
)
=> GargConfig
-> UncommittedNgrams doc
-- ^ The ngrams extracted for /all/ the documents
......@@ -588,10 +584,11 @@ insertMasterDoc :: ( HasNodeError err
-- with the node being created.
-> Maybe c
-> doc
-> DBUpdate err (Either InsertDocError DocId)
-> DBUpdate err DocId
insertMasterDoc cfg uncommittedNgrams c h = do
(masterUserId, _, masterCorpusId) <- getOrMkRootWithCorpus cfg MkCorpusUserMaster c
documentWithId <- insertDoc masterUserId masterCorpusId (toNode masterUserId Nothing h)
_ <- Doc.add masterCorpusId [_index documentWithId]
-- TODO
......@@ -600,11 +597,13 @@ insertMasterDoc cfg uncommittedNgrams c h = do
-- this will enable global database monitoring
case commitNgramsForDocument uncommittedNgrams documentWithId of
Left failed -> pure $ Left failed
Left failed ->
-- this allows rollingback the whole tx, and we won't leave dangling nodes behind.
nodeError $ NodeCreationFailed (InsertDocFailed failed)
Right ngramsDocsMap -> do
lId <- getOrMkList masterCorpusId masterUserId
_ <- saveDocNgramsWith lId ngramsDocsMap
pure $ Right (contextId2NodeId $ _index documentWithId)
pure $ contextId2NodeId $ _index documentWithId
saveDocNgramsWith :: ListId
......
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE DerivingStrategies #-}
{-|
Module : Gargantext.Database.Types.Error
Description :
......@@ -14,6 +15,8 @@ module Gargantext.Database.Query.Table.Node.Error (
NodeError(..)
, NodeCreationError(..)
, NodeLookupError(..)
, DocumentHashId(..)
, InsertDocError(..)
-- * Classes
, HasNodeError(..)
......@@ -32,18 +35,38 @@ import Data.Aeson (object)
import Data.Text qualified as T
import Gargantext.Core.Types.Individu ( Username )
import Gargantext.Database.Admin.Types.Node (ListId, NodeId(..), ContextId, UserId, ParentId)
import Gargantext.Database.Transactional
import Gargantext.Prelude hiding (sum, head)
import Gargantext.Utils.Jobs.Error (ToHumanFriendlyError(..))
import Prelude hiding (null, id, map, sum, show)
import Prelude qualified
import Gargantext.Database.Transactional
data NodeCreationError
= UserParentAlreadyExists UserId ParentId
| UserParentDoesNotExist UserId
| UserHasNegativeId UserId
| InsertNodeFailed UserId (Maybe ParentId)
| InsertDocFailed InsertDocError
deriving (Show, Eq, Generic)
newtype DocumentHashId
= DocumentHashId { _DocumentHashId :: T.Text }
deriving stock (Show, Eq)
deriving newtype (Ord, ToJSON)
data InsertDocError
= NgramsNotFound !(Maybe DocumentHashId) !NodeId
| DocumentInsertionError !T.Text
deriving (Eq, Show, Generic)
instance ToHumanFriendlyError InsertDocError where
mkHumanFriendly = \case
NgramsNotFound mb_hashId _docId -> do
"Couldn't find the associated ngrams for document with hash " <> maybe "<no hash>" _DocumentHashId mb_hashId <> ", therefore the added document won't have any ngrams."
DocumentInsertionError _err -> do
"Skipping document insertion due to an internal database error"
instance ToJSON InsertDocError
instance ToJSON NodeCreationError
renderNodeCreationFailed :: NodeCreationError -> T.Text
......@@ -52,6 +75,7 @@ renderNodeCreationFailed = \case
UserParentDoesNotExist uid -> "user id " <> T.pack (show uid) <> " has no parent"
UserHasNegativeId uid -> "user id " <> T.pack (show uid) <> " is a negative id."
InsertNodeFailed uid pid -> "couldn't create the list for user id " <> T.pack (show uid) <> " and parent id " <> T.pack (show pid)
InsertDocFailed err -> mkHumanFriendly err
data NodeLookupError
= NodeDoesNotExist NodeId
......@@ -82,12 +106,15 @@ data NodeError = NoListFound ListId
| NotImplYet
| NoContextFound ContextId
| NeedsConfiguration
| NodeError SomeException
| NodeError T.Text
-- Left for backward compatibility, but we should remove them.
| DoesNotExist NodeId
| NodeIsReadOnly NodeId T.Text
| MoveError NodeId NodeId T.Text
| NodeNotExportable NodeId T.Text
deriving Eq
instance Exception NodeError
instance Prelude.Show NodeError
where
......@@ -100,7 +127,7 @@ instance Prelude.Show NodeError
show (NodeLookupFailed reason) = "Cannot lookup node due to: " <> T.unpack (renderNodeLookupFailed reason)
show (NoContextFound n) = "Context node does not exist (" <> show n <> ")"
show NeedsConfiguration = "Needs configuration"
show (NodeError e) = "NodeError: " <> displayException e
show (NodeError e) = "NodeError: " <> T.unpack e
show (DoesNotExist n) = "Node does not exist (" <> show n <> ")"
show (NodeIsReadOnly n reason) = "Node " <> show n <> " is read only, edits not allowed. Reason: " <> T.unpack reason
show (MoveError s t reason) = "Moving " <> show s <> " to " <> show t <> " failed: " <> T.unpack reason
......@@ -136,7 +163,7 @@ class HasNodeError e where
_NodeError :: Prism' e NodeError
errorWith :: HasNodeError e => Text -> DBTx e r a
errorWith x = nodeError (NodeError $ toException $ userError $ T.unpack x)
errorWith x = nodeError (NodeError $ x)
nodeError :: HasNodeError e => NodeError -> DBTx e r a
nodeError ne = dbFail $ _NodeError # ne
......@@ -156,4 +183,4 @@ catchNodeError f g = catchError f (\e -> maybe (throwError e) g (e ^? _NodeError
dbFailWith :: HasNodeError err => T.Text -> DBTx err r b
dbFailWith x = dbFail $ _NodeError # (NodeError $ toException $ userError $ T.unpack x)
dbFailWith x = dbFail $ _NodeError # (NodeError x)
......@@ -66,6 +66,9 @@ tests = parallel $ around withTestDB $ beforeWith (\ctx -> setupEnvironment ctx
it "Can add language to Corpus" corpusAddLanguage
describe "With test documents" $ beforeWith addCorpusDocuments $ do
it "Can add documents to a Corpus" corpusAddDocuments
it "Correctly return an error if uncommitted ngrams map is empty" corpusAddDocNgramsEmpty
it "Correctly return the same Id if a document is inserted twice" corpusAddDocAlreadyExisting
it "Can survive failures in a batch" corpusAddDocBatchSurvival
describe "Corpus search" $ do
it "Can stem query terms" stemmingTest
it "Can return the number of docs in a corpus" corpusReturnCount
......@@ -81,7 +84,7 @@ tests = parallel $ around withTestDB $ beforeWith (\ctx -> setupEnvironment ctx
it "Publishes the root and its first level children" testPublishRecursiveFirstLevel
it "Publishes the root and its recursive children" testPublishRecursiveNLevel
it "Publishes in a lenient way but it's still considered read-only" testPublishLenientWorks
nodeStoryTests :: Spec
nodeStoryTests = sequential $
-- run 'withTestDB' before _every_ test item
......
......@@ -9,14 +9,20 @@ Portability : POSIX
-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Test.Database.Operations.DocumentSearch where
import Control.Lens
import Control.Monad.Catch (try, SomeException, displayException)
import Control.Monad.Reader
import Data.Aeson.QQ.Simple
import Data.Aeson.Types
import Data.Maybe (fromJust)
import Data.Text qualified as T
import Gargantext.Core
import Gargantext.Core.Config
import Gargantext.Core.NLP (HasNLPServer(..))
import Gargantext.Core.Text.Corpus.Query qualified as API
import Gargantext.Core.Text.Terms.Mono.Stem
import Gargantext.Core.Types
......@@ -30,15 +36,15 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Facet
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Error (errorWith)
import Gargantext.Database.Query.Tree.Root
import Gargantext.Database.Query.Table.NodeContext (selectCountDocs)
import Gargantext.Database.Query.Tree.Root
import Gargantext.Database.Schema.Node (NodePoly(..))
import Network.URI (parseURI)
import Prelude
import Test.Database.Types
import Test.Hspec.Expectations
import Test.Tasty.HUnit
import Gargantext.Database.Query.Table.Node.Error
exampleDocument_01 :: HyperdataDocument
......@@ -114,6 +120,31 @@ exampleDocument_04 = either error id $ parseEither parseJSON $ [aesonQQ|
}
|]
exampleDocument_05 :: HyperdataDocument
exampleDocument_05 = either error id $ parseEither parseJSON $ [aesonQQ|
{
"bdd": "Arxiv"
, "doi": ""
, "url": "http://arxiv.org/pdf/1407.5670v1"
, "title": "GQL for Dummies"
, "source": ""
, "authors": "Ennio Annio"
, "abstract": " This article provides an introduction to GraphQL" , "institutes": ""
, "language_iso2": "EN"
, "publication_date": "2014-07-21T21:20:31Z"
, "publication_year": 2014
}
|]
exampleDocument_06 :: HyperdataDocument
exampleDocument_06 = exampleDocument_05 { _hd_title = Just "GQL Part 1" }
exampleDocument_07 :: HyperdataDocument
exampleDocument_07 = exampleDocument_05 { _hd_title = Just "GQL Part 2" }
exampleDocument_08 :: HyperdataDocument
exampleDocument_08 = exampleDocument_05 { _hd_title = Just "GQL Part 3" }
getCorporaWithParentIdOrFail :: HasNodeError err => NodeId -> DBQuery err x (Node HyperdataCorpus)
getCorporaWithParentIdOrFail parentId = do
xs <- getCorporaWithParentId parentId
......@@ -131,13 +162,78 @@ addCorpusDocuments env = runTestMonad env $ do
let lang = EN
let docs = [exampleDocument_01, exampleDocument_02, exampleDocument_03, exampleDocument_04]
_ <- addDocumentsToHyperCorpus (test_job_handle env)
ids <- addDocumentsToHyperCorpus (test_job_handle env)
(Just $ _node_hyperdata $ corpus)
(Multi lang)
corpusId
docs
liftIO $ ids `shouldBe` [ UnsafeMkNodeId 1, UnsafeMkNodeId 2, UnsafeMkNodeId 3, UnsafeMkNodeId 4 ]
pure env
corpusAddDocNgramsEmpty :: TestEnv -> Assertion
corpusAddDocNgramsEmpty env = runTestMonad env $ do
cfg <- view hasConfig
parentId <- runDBQuery $ getRootId (UserName userMaster)
[corpus] <- runDBQuery $ getCorporaWithParentId parentId
let doc = exampleDocument_05
(res :: Either SomeException DocId) <- try $ runDBTx $ insertMasterDoc cfg noUncommittedNgrams (Just $ _node_hyperdata corpus) doc
case res of
Left err
-> liftIO $ displayException err
`shouldBe`
"InternalNodeError Cannot make node due to: Couldn't find the associated ngrams for document with hash \\x0b41caa65782b9570530910d1942a21bb1859386ee4c976e7b4cd0624ee966f3, therefore the added document won't have any ngrams."
Right _ -> fail $ "did not throw" <> show res
corpusAddDocAlreadyExisting :: TestEnv -> Assertion
corpusAddDocAlreadyExisting env = runTestMonad env $ do
cfg <- view hasConfig
parentId <- runDBQuery $ getRootId (UserName userMaster)
[corpus] <- runDBQuery $ getCorporaWithParentId parentId
let doc = exampleDocument_05
let la = Mono EN
-- Let's use a bogus NLP server port
let nlp = NLPServerConfig CoreNLP (fromJust $ parseURI "http://localhost:9999")
uncommittedNgrams <- extractNgramsFromDocument nlp la doc
oldCount <- docsInCorpusCount
res <- runDBTx $ insertMasterDoc cfg uncommittedNgrams (Just $ _node_hyperdata corpus) doc
interimCount <- docsInCorpusCount
liftIO $ do
interimCount `shouldBe` oldCount + 1
res `shouldBe` UnsafeMkNodeId 5
-- The second time the node gets cached and returned.
res1 <- runDBTx $ insertMasterDoc cfg uncommittedNgrams (Just $ _node_hyperdata corpus) doc
newCount <- docsInCorpusCount
liftIO $ do
newCount `shouldBe` interimCount
res1 `shouldBe` UnsafeMkNodeId 5
-- | We test that if we add 3 documents, out of which the middle one doesn't have any keyed ngrams
-- the other two are still persisted.
corpusAddDocBatchSurvival :: TestEnv -> Assertion
corpusAddDocBatchSurvival env = runTestMonad env $ do
cfg <- view hasConfig
parentId <- runDBQuery $ getRootId (UserName userMaster)
[corpus] <- runDBQuery $ getCorporaWithParentId parentId
let la = Mono EN
nlp <- view (nlpServerGet (_tt_lang la))
oldCount <- docsInCorpusCount
let docs = [exampleDocument_06, exampleDocument_07, exampleDocument_08 ]
uNgrams1 <- extractNgramsFromDocument nlp la exampleDocument_06
uNgrams2 <- extractNgramsFromDocument nlp la exampleDocument_08
(failures, savedDocs) <- insertMasterDocs cfg (uNgrams1 <> uNgrams2) (Just $ _node_hyperdata corpus) docs
c' <- docsInCorpusCount
liftIO $ do
failures `shouldBe` [DocumentInsertionError "InternalNodeError Cannot make node due to: Couldn't find the associated ngrams for document with hash \\x666b8e7bfd7c0af37d630e1097791f7ba438a669ecb6d1cb38014edd0b7a2977, therefore the added document won't have any ngrams."]
c' `shouldBe` oldCount + 2
savedDocs `shouldBe` [ UnsafeMkNodeId 6, UnsafeMkNodeId 7 ]
corpusAddDocuments :: TestEnv -> Assertion
corpusAddDocuments env = runTestMonad env $ do
......@@ -247,11 +343,13 @@ corpusSearchDB01 env = do
liftIO $ do
length results `shouldBe` 0 -- doesn't exist, we just check that proper to_tsquery is called
docsInCorpusCount :: TestMonad Int
docsInCorpusCount = runDBQuery $ do
parentId <- getRootId (UserName userMaster)
corpus <- getCorporaWithParentIdOrFail parentId
selectCountDocs (_node_id corpus)
corpusReturnCount :: TestEnv -> Assertion
corpusReturnCount env = do
runTestMonad env $ do
count <- runDBQuery $ do
parentId <- getRootId (UserName userMaster)
corpus <- getCorporaWithParentIdOrFail parentId
selectCountDocs (_node_id corpus)
liftIO $ count @?= 4
count <- runTestMonad env $ docsInCorpusCount
liftIO $ count @?= 4
......@@ -25,8 +25,7 @@ import Data.Map qualified as Map
import Data.Pool
import Database.PostgreSQL.Simple qualified as PG
import Database.Postgres.Temp qualified as Tmp
import GHC.IO.Exception (userError)
import Gargantext hiding (to)
import Gargantext hiding (throwIO, to)
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Errors.Types
import Gargantext.API.Prelude
......@@ -96,10 +95,8 @@ runTestMonadM env = flip runReaderT env . _TestMonad
runTestMonad :: TestEnv -> TestMonadM TestEnv BackendInternalError a -> IO a
runTestMonad env = flip runReaderT env . _TestMonad
-- | Shoehorn a BackendInternalError into an IOException, suitable
-- for testing.
instance MonadError BackendInternalError (TestMonadM env BackendInternalError) where
throwError e = TestMonad $ throwError (userError $ show e)
throwError e = TestMonad $ throwIO (toException e)
catchError (TestMonad m) hdl =
TestMonad $ ReaderT $ \e -> catchError (flip runReaderT e m) (\e' -> runTestMonadM e $ hdl (InternalWorkerError e'))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment