[ngrams] more node story fixes

Added more tests for more subtle cases.
parent 2edc1dd1
Pipeline #5311 failed with stages
in 68 minutes and 40 seconds
......@@ -87,7 +87,7 @@ module Gargantext.API.Ngrams
)
where
import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, non, ifolded, to, withIndex, over)
import Control.Monad.Reader
import Data.Aeson.Text qualified as DAT
import Data.Foldable
......@@ -264,7 +264,9 @@ setListNgrams listId ngramsType ns = do
. at listId . _Just
. a_state
. at ngramsType
.~ Just ns
%~ (\mns' -> case mns' of
Nothing -> Just ns
Just ns' -> Just $ ns <> ns')
) nls
saveNodeStory
......@@ -294,7 +296,7 @@ commitStatePatch listId (Versioned _p_version p) = do
archiveSaver <- view hasNodeArchiveStoryImmediateSaver
ns <- liftBase $ atomically $ readTVar var
let
a = ns ^. unNodeStory . at listId . _Just
a = ns ^. unNodeStory . at listId . non initArchive
-- apply patches from version p_version to a ^. a_version
-- TODO Check this
--q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
......@@ -376,7 +378,7 @@ tableNgramsPull listId ngramsType p_version = do
r <- liftBase $ atomically $ readTVar var
let
a = r ^. unNodeStory . at listId . _Just
a = r ^. unNodeStory . at listId . non initArchive
q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
q_table = q ^. _PatchMap . at ngramsType . _Just
......
......@@ -147,7 +147,9 @@ makeLenses ''RootParent
data NgramsRepoElement = NgramsRepoElement
{ _nre_size :: !Int
, _nre_list :: !ListType
-- root is the top-most parent of ngrams
, _nre_root :: !(Maybe NgramsTerm)
-- parent is the direct parent of this ngram
, _nre_parent :: !(Maybe NgramsTerm)
, _nre_children :: !(MSet NgramsTerm)
}
......
......@@ -71,6 +71,8 @@ module Gargantext.Core.NodeStory
, getNodesArchiveHistory
, Archive(..)
, initArchive
, archiveAdvance
, unionArchives
, a_history
, a_state
, a_version
......@@ -120,7 +122,7 @@ import Gargantext.Database.Prelude (DbCmd', HasConnectionPool(..))
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Database.Query.Table.Node.Error (HasNodeError())
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Prelude
import Gargantext.Prelude hiding (to)
import Opaleye (DefaultFromField(..), SqlJsonb, fromPGSFromField)
------------------------------------------------------------------------
......@@ -161,7 +163,7 @@ class HasNodeArchiveStoryImmediateSaver env where
is implemented already
-}
newtype NodeStory s p = NodeStory { _unNodeStory :: Map NodeId (Archive s p) }
deriving (Generic, Show, Eq, Semigroup)
deriving (Generic, Show, Eq)
instance (FromJSON s, FromJSON p) => FromJSON (NodeStory s p)
instance (ToJSON s, ToJSON p) => ToJSON (NodeStory s p)
......@@ -206,29 +208,47 @@ instance DefaultFromField SqlJsonb (Archive NgramsState' NgramsStatePatch')
combineState :: NgramsState' -> NgramsState' -> NgramsState'
combineState = Map.unionWith (<>)
instance (Semigroup s, Semigroup p) => Semigroup (Archive s p) where
(<>) (Archive { _a_history = p }) (Archive { _a_version = v'
, _a_state = s'
, _a_history = p' }) =
Archive { _a_version = v'
, _a_state = s'
, _a_history = p' <> p }
instance (Monoid s, Semigroup p) => Monoid (Archive s p) where
mempty = Archive { _a_version = 0
, _a_state = mempty
, _a_history = [] }
-- This is not a typical Semigroup instance. The state is not
-- appended, instead it is replaced with the second entry. This is
-- because state changes with each version. We have to take into
-- account the removal of terms as well.
-- instance (Semigroup s, Semigroup p) => Semigroup (Archive s p) where
-- (<>) (Archive { _a_history = p }) (Archive { _a_version = v'
-- , _a_state = s'
-- , _a_history = p' }) =
-- Archive { _a_version = v'
-- , _a_state = s'
-- , _a_history = p' <> p }
-- instance (Monoid s, Semigroup p) => Monoid (Archive s p) where
-- mempty = Archive { _a_version = 0
-- , _a_state = mempty
-- , _a_history = [] }
instance (FromJSON s, FromJSON p) => FromJSON (Archive s p) where
parseJSON = genericParseJSON $ unPrefix "_a_"
instance (ToJSON s, ToJSON p) => ToJSON (Archive s p) where
toJSON = genericToJSON $ unPrefix "_a_"
toEncoding = genericToEncoding $ unPrefix "_a_"
-- | This is the normal way to update archive state, bumping the
-- version and history. Resulting state is taken directly from new
-- archive, omitting old archive completely.
archiveAdvance :: (Semigroup s, Semigroup p) => Archive s p -> Archive s p -> Archive s p
archiveAdvance aOld aNew = aNew { _a_history = _a_history aNew <> _a_history aOld }
-- | This is to merge archive states.
unionArchives :: (Semigroup s, Semigroup p) => Archive s p -> Archive s p -> Archive s p
unionArchives aOld aNew = aNew { _a_state = _a_state aOld <> _a_state aNew
, _a_history = _a_history aNew <> _a_history aOld }
------------------------------------------------------------------------
initNodeStory :: (Monoid s, Semigroup p) => NodeId -> NodeStory s p
initNodeStory ni = NodeStory $ Map.singleton ni initArchive
initArchive :: (Monoid s, Semigroup p) => Archive s p
initArchive = mempty
initArchive = Archive { _a_version = 0
, _a_state = mempty
, _a_history = [] }
initNodeListStoryMock :: NodeListStory
initNodeListStoryMock = NodeStory $ Map.singleton nodeListId archive
......@@ -425,7 +445,7 @@ getNodeStory c nId = do
pure ()
-}
pure $ NodeStory $ Map.singleton nId $ foldl combine mempty dbData
pure $ NodeStory $ Map.singleton nId $ foldl combine initArchive dbData
where
-- NOTE (<>) for Archive doesn't concatenate states, so we have to use `combine`
combine a1 a2 = a1 & a_state %~ combineState (a2 ^. a_state)
......@@ -582,12 +602,12 @@ upsertNodeStories c nodeId newArchive = do
pure ()
-- 3. Now we need to set versions of all node state to be the same
fixNodeStoryVersion c nodeId newArchive
updateNodeStoryVersion c nodeId newArchive
-- printDebug "[upsertNodeStories] STOP nId" nId
fixNodeStoryVersion :: PGS.Connection -> NodeId -> ArchiveList -> IO ()
fixNodeStoryVersion c nodeId newArchive = do
updateNodeStoryVersion :: PGS.Connection -> NodeId -> ArchiveList -> IO ()
updateNodeStoryVersion c nodeId newArchive = do
let ngramsTypes = Map.keys $ newArchive ^. a_state
mapM_ (\nt -> runPGSExecute c query (newArchive ^. a_version, nodeId, nt)) ngramsTypes
where
......@@ -605,7 +625,9 @@ writeNodeStories c (NodeStory nls) = do
nodeStoryInc :: PGS.Connection -> NodeListStory -> NodeId -> IO NodeListStory
nodeStoryInc c ns@(NodeStory nls) nId = do
case Map.lookup nId nls of
Nothing -> getNodeStory c nId >>= pure . (ns <>)
Nothing -> do
NodeStory nls' <- getNodeStory c nId
pure $ NodeStory $ Map.unionWith archiveAdvance nls' nls
Just _ -> pure ns
nodeStoryIncrementalRead :: PGS.Connection -> Maybe NodeListStory -> [NodeId] -> IO NodeListStory
......@@ -633,8 +655,8 @@ nodeStoryIncrementalRead c (Just nls) ns = foldM (\m n -> nodeStoryInc c m n) nl
-- `list` as their parent entry.
fixChildrenTermTypes :: NodeListStory -> NodeListStory
fixChildrenTermTypes (NodeStory nls) =
NodeStory $ Map.fromList [ (nId, a & a_state %~ fixChildrenInNgramsStatePatch) |
(nId, a) <- Map.toList nls ]
NodeStory $ Map.fromList [ (nId, a & a_state %~ fixChildrenInNgramsStatePatch)
| (nId, a) <- Map.toList nls ]
fixChildrenInNgramsStatePatch :: NgramsState' -> NgramsState'
fixChildrenInNgramsStatePatch ns = archiveStateFromList $ nsParents <> nsChildrenFixed
......@@ -653,17 +675,50 @@ fixChildrenInNgramsStatePatch ns = archiveStateFromList $ nsParents <> nsChildre
)
) <$> nsChildren
-- | Sometimes, when we upload a new list, a child can be left without
-- a parent. Find such ngrams and set their 'root' and 'parent' to
-- 'Nothing'.
fixChildrenWithNoParent :: NodeListStory -> NodeListStory
fixChildrenWithNoParent (NodeStory nls) =
NodeStory $ Map.fromList [ (nId, a & a_state %~ fixChildrenWithNoParentStatePatch)
| (nId, a) <- Map.toList nls ]
fixChildrenWithNoParentStatePatch :: NgramsState' -> NgramsState'
fixChildrenWithNoParentStatePatch ns = archiveStateFromList $ nsParents <> nsChildrenFixed
where
nls = archiveStateToList ns
nsParents = filter (\(_nt, _t, nre) -> isNothing $ nre ^. nre_parent) nls
parentNtMap = Map.fromList $ (\(_nt, t, nre) -> (t, nre ^. nre_children & mSetToSet)) <$> nsParents
nsChildren = filter (\(_nt, _t, nre) -> isJust $ nre ^. nre_parent) nls
nsChildrenFixFunc (nt, t, nre) =
( nt
, t
, nre { _nre_root = root
, _nre_parent = parent }
)
where
(root, parent) = case parentNtMap ^. at (nre ^. nre_parent . _Just) . _Just . at t of
Just _ -> (nre ^. nre_root, nre ^. nre_parent)
Nothing -> (Nothing, Nothing)
nsChildrenFixed = nsChildrenFixFunc <$> nsChildren
------------------------------------
fromDBNodeStoryEnv :: Pool PGS.Connection -> IO NodeStoryEnv
fromDBNodeStoryEnv pool = do
tvar <- nodeStoryVar pool Nothing []
let saver_immediate = do
ns <- atomically $ do
ns' <- readTVar tvar
let ns'' = fixChildrenTermTypes ns'
writeTVar tvar ns''
pure ns''
ns <- atomically $
readTVar tvar
-- fix children so their 'list' is the same as their parents'
>>= pure . fixChildrenTermTypes
-- fix children that don't have a parent anymore
>>= pure . fixChildrenWithNoParent
>>= writeTVar tvar
>> readTVar tvar
withResource pool $ \c -> do
--printDebug "[mkNodeStorySaver] will call writeNodeStories, ns" ns
writeNodeStories c ns
......@@ -707,6 +762,7 @@ currentVersion listId = do
-----------------------------------------
-- | To be called from the REPL
fixNodeStoryVersions :: (HasNodeStory env err m) => m ()
fixNodeStoryVersions = do
pool <- view connPool
......
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
module Test.API.Setup where
......@@ -21,6 +22,7 @@ import Gargantext.Database.Admin.Trigger.Init
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node (getOrMkList)
import Gargantext.Prelude (printDebug)
import Gargantext.Prelude.Config
import Gargantext.System.Logging
import Network.HTTP.Client.TLS (newTlsManager)
......@@ -101,6 +103,7 @@ setupEnvironment env = flip runReaderT env $ runTestMonad $ do
(Left corpusMasterName)
(Nothing :: Maybe HyperdataCorpus)
masterListId <- getOrMkList masterCorpusId masterUserId
printDebug "[setupEnvironment] masterListId: " masterListId
void $ initLastTriggers masterListId
-- | Creates two users, Alice & Bob. Alice shouldn't be able to see
......
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE UndecidableInstances #-}
......@@ -6,13 +7,15 @@
{-# OPTIONS_GHC -Wno-orphans #-}
module Test.Database.Operations (
tests
tests
, nodeStoryTests
) where
import Control.Monad.Except
import Control.Monad.Reader
import Data.Text qualified as T
import Database.PostgreSQL.Simple
import Database.PostgreSQL.Simple.SqlQQ
import Gargantext.API.Node.Corpus.Update
import Gargantext.Core
import Gargantext.Core.Types.Individu
......@@ -20,6 +23,7 @@ import Gargantext.Database.Action.User
import Gargantext.Database.Action.User.New
import Gargantext.Database.Admin.Types.Hyperdata.Corpus
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (runPGSQuery)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Tree.Root (getRootId)
import Gargantext.Database.Schema.Node (NodePoly(..))
......@@ -64,13 +68,25 @@ tests = sequential $ aroundAll withTestDB $ describe "Database" $ do
it "Can perform search by author in documents" corpusSearch02
it "Can perform more complex searches using the boolean API" corpusSearch03
it "Can correctly count doc score" corpusScore01
nodeStoryTests :: Spec
nodeStoryTests = sequential $
-- run 'withTestDB' before _every_ test item
around setupDBAndCorpus $
describe "Database - node story" $ do
describe "Node story" $ do
it "Can create a list" createListTest
it "Can add query node story" queryNodeStoryTest
it "Can query node story" queryNodeStoryTest
it "Can add new terms to node story" insertNewTermsToNodeStoryTest
it "Can add new terms (with children) to node story" insertNewTermsWithChildrenToNodeStoryTest
it "Can add fix children terms to match parents" insertNodeStoryChildrenWithDifferentNgramsTypeThanParentTest
it "Fixes child terms to match parents' terms" insertNodeStoryChildrenWithDifferentNgramsTypeThanParentTest
it "Can update node story when 'setListNgrams' is called" setListNgramsUpdatesNodeStoryTest
it "When 'setListNgrams' is called, childrens' parents are updated" setListNgramsUpdatesNodeStoryWithChildrenTest
where
setupDBAndCorpus testsFunc = withTestDB $ \env -> do
setupEnvironment env
testsFunc env
data ExpectedActual a =
Expected a
| Actual a
......@@ -133,8 +149,10 @@ corpusReadWrite01 env = do
flip runReaderT env $ runTestMonad $ do
uid <- getUserId (UserName "alfredo")
parentId <- getRootId (UserName "alfredo")
[corpusId] <- mk (Just "Test_Corpus") (Nothing :: Maybe HyperdataCorpus) parentId uid
liftIO $ corpusId `shouldBe` UnsafeMkNodeId 416
let corpusName = "Test_Corpus"
[corpusId] <- mk (Just corpusName) (Nothing :: Maybe HyperdataCorpus) parentId uid
[Only corpusId'] <- runPGSQuery [sql|SELECT id FROM nodes WHERE name = ?|] (Only corpusName)
liftIO $ corpusId `shouldBe` UnsafeMkNodeId corpusId'
-- Retrieve the corpus by Id
[corpus] <- getCorporaWithParentId parentId
liftIO $ corpusId `shouldBe` (_node_id corpus)
......
......@@ -253,3 +253,107 @@ insertNodeStoryChildrenWithDifferentNgramsTypeThanParentTest env = do
FROM node_stories
WHERE ngrams_id = ?|] (PSQL.Only tChildId)
liftIO $ childType `shouldBe` ("MapTerm" :: Text)
setListNgramsUpdatesNodeStoryTest :: TestEnv -> Assertion
setListNgramsUpdatesNodeStoryTest env = do
flip runReaderT env $ runTestMonad $ do
-- NOTE(adn) We need to create user 'gargantua'(!!) in order
-- for 'addDocumentsToHyperCorpus' to work.
let user = UserName userMaster
parentId <- getRootId user
[corpus] <- getCorporaWithParentId parentId
let corpusId = _node_id corpus
userId <- getUserId user
listId <- getOrMkList corpusId userId
v <- getNodeStoryVar [listId]
let nre = NgramsRepoElement { _nre_size = 1
, _nre_list = MapTerm
, _nre_root = Nothing
, _nre_parent = Nothing
, _nre_children = MSet Map.empty }
let terms = "HELLO"
let nls = Map.singleton (NgramsTerm terms) nre
setListNgrams listId NgramsTerms nls
liftIO $ do
ns <- atomically $ readTVar v
ns `shouldBe` (NodeStory $ Map.singleton listId
((initArchive :: ArchiveList) & a_state .~
Map.singleton NgramsTerms nls))
-- check that the ngrams are in the DB as well
ngramsMap <- selectNgramsId [terms]
liftIO $ (snd <$> Map.toList ngramsMap) `shouldBe` [terms]
let nre2 = NgramsRepoElement { _nre_size = 1
, _nre_list = MapTerm
, _nre_root = Nothing
, _nre_parent = Nothing
, _nre_children = MSet Map.empty }
let terms2 = "WORLD"
let nls2 = Map.singleton (NgramsTerm terms2) nre2
setListNgrams listId NgramsTerms nls2
liftIO $ do
ns <- atomically $ readTVar v
ns `shouldBe` (NodeStory $ Map.singleton listId
((initArchive :: ArchiveList) & a_state .~
(Map.singleton NgramsTerms $ nls <> nls2)))
setListNgramsUpdatesNodeStoryWithChildrenTest :: TestEnv -> Assertion
setListNgramsUpdatesNodeStoryWithChildrenTest env = do
flip runReaderT env $ runTestMonad $ do
-- NOTE(adn) We need to create user 'gargantua'(!!) in order
-- for 'addDocumentsToHyperCorpus' to work.
let user = UserName userMaster
parentId <- getRootId user
[corpus] <- getCorporaWithParentId parentId
let corpusId = _node_id corpus
userId <- getUserId user
listId <- getOrMkList corpusId userId
v <- getNodeStoryVar [listId]
let tParent = NgramsTerm "hello"
let tChild = NgramsTerm "world"
let terms = unNgramsTerm <$> [tParent, tChild]
let nreParent = NgramsRepoElement { _nre_size = 1
, _nre_list = MapTerm
, _nre_root = Nothing
, _nre_parent = Nothing
, _nre_children = MSet $ Map.singleton tChild () }
let nreChild = NgramsRepoElement { _nre_size = 1
, _nre_list = MapTerm
, _nre_root = Just tParent
, _nre_parent = Just tParent
, _nre_children = MSet Map.empty }
let nls = Map.fromList [(tParent, nreParent), (tChild, nreChild)]
setListNgrams listId NgramsTerms nls
liftIO $ do
ns <- atomically $ readTVar v
ns `shouldBe` (NodeStory $ Map.singleton listId
((initArchive :: ArchiveList) & a_state .~
Map.singleton NgramsTerms nls))
-- OK, now we substitute parent with no children, the parent of
-- 'nreChild' should become Nothing
let nreParentNew = nreParent { _nre_children = MSet $ Map.empty }
let nlsToInsert = Map.fromList [(tParent, nreParentNew)]
setListNgrams listId NgramsTerms nlsToInsert
let nreChildNew = nreChild { _nre_parent = Nothing
, _nre_root = Nothing }
let nlsNew = Map.fromList [(tParent, nreParentNew), (tChild, nreChildNew)]
liftIO $ do
ns <- atomically $ readTVar v
ns `shouldBe` (NodeStory $ Map.singleton listId
((initArchive :: ArchiveList) & a_state .~
Map.singleton NgramsTerms nlsNew))
......@@ -43,3 +43,4 @@ main = do
bracket startCoreNLPServer stopCoreNLPServer $ \_ -> hspec $ do
API.tests
DB.tests
DB.nodeStoryTests
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment