Commit 43191319 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Add loop detection when importing ngrams

This avoids creating pathological ngram forests.
parent 8ce014ba
......@@ -36,6 +36,7 @@ import Gargantext.Database.Query.Tree hiding (treeError)
import Gargantext.Utils.Jobs.Monad (JobError(..))
import Network.HTTP.Types.Status qualified as HTTP
import Servant.Server (ServerError(..), err404, err500)
import Gargantext.Core.NodeStory (NodeStoryError(..), renderLoop, BuildForestError (..))
$(deriveHttpStatusCode ''BackendErrorCode)
......@@ -91,6 +92,12 @@ backendErrorToFrontendError = \case
AccessPolicyErrorReason reason
-> mkFrontendErr' "A policy check failed"
$ FE_policy_check_error reason
InternalNodeStoryError nodeStoryError
-> case nodeStoryError of
NodeStoryUpsertFailed (BFE_loop_detected visited)
-- FIXME(adn) proper constructor.
-> let msg = "A loop was detected in ngrams: " <> renderLoop visited
in mkFrontendErr' msg $ FE_internal_server_error msg
frontendErrorToGQLServerError :: FrontendError -> ServerError
frontendErrorToGQLServerError fe@(FrontendError diag ty _) =
......
......@@ -25,7 +25,7 @@ Portability : POSIX
module Gargantext.API.Errors.Types (
HasServerError(..)
, serverError
-- * The main frontend error type
, FrontendError(..)
......@@ -48,9 +48,10 @@ module Gargantext.API.Errors.Types (
) where
import Control.Lens ((#), makePrisms, Prism')
import Control.Lens.Prism (prism')
import Control.Monad.Fail (fail)
import Data.Aeson.Types (typeMismatch, emptyArray)
import Data.Aeson (Value(..), (.:), (.=), object, withObject)
import Data.Aeson.Types (typeMismatch, emptyArray)
import Data.List.NonEmpty qualified as NE
import Data.Singletons.TH ( SingI(sing), SingKind(fromSing) )
import Data.Text qualified as T
......@@ -59,6 +60,7 @@ import Gargantext.API.Admin.Auth.Types (AuthenticationError)
import Gargantext.API.Errors.Class (HasAuthenticationError(..))
import Gargantext.API.Errors.TH ( deriveIsFrontendErrorData )
import Gargantext.API.Errors.Types.Backend
import Gargantext.Core.NodeStory.Types
import Gargantext.Core.Types (HasValidationError(..))
import Gargantext.Core.Types.Individu (Username)
import Gargantext.Database.Admin.Types.Node
......@@ -68,7 +70,6 @@ import Gargantext.Prelude hiding (Location, WithStacktrace)
import Gargantext.Utils.Dict (Dict(..))
import Gargantext.Utils.Jobs.Monad qualified as Jobs
import Servant (ServerError)
import Control.Lens.Prism (prism')
-- | A 'WithStacktrace' carries an error alongside its
-- 'CallStack', to be able to print the correct source location
......@@ -120,6 +121,7 @@ data BackendInternalError
| InternalValidationError !Validation
| InternalWorkerError !IOException
| AccessPolicyError !AccessPolicyErrorReason
| InternalNodeStoryError !NodeStoryError
deriving (Show, Typeable)
makePrisms ''BackendInternalError
......@@ -159,6 +161,9 @@ instance HasServerError BackendInternalError where
instance HasAuthenticationError BackendInternalError where
_AuthenticationError = _InternalAuthenticationError
instance HasNodeStoryError BackendInternalError where
_NodeStoryError = _InternalNodeStoryError
-- | An error that can be returned to the frontend. It carries a human-friendly
-- diagnostic, the 'type' of the error as well as some context-specific data.
data FrontendError where
......
......@@ -105,12 +105,12 @@ import Data.Map.Strict.Patch qualified as PM
import Data.Patch.Class (Action(act), Transformable(..), ours)
import Data.Set qualified as Set
import Data.Text (isInfixOf, toLower, unpack)
import Data.Text qualified as T
import Data.Text.Lazy.IO as DTL ( writeFile )
import Data.Tree
import Gargantext.API.Ngrams.Tools (getNodeStory)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory hiding (buildForest)
import Gargantext.Core.NodeStory qualified as NodeStory
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, TODO, assertValid, ContextId, HasValidationError)
import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
......@@ -218,6 +218,13 @@ addListNgrams listId ngramsType nes = do
-- | TODO: incr the Version number
-- && should use patch
-- UNSAFE
-- FIXME(adinapoli): This function used to be very dangerous as it didn't
-- prevent imports from creating loops: if we had a list of imported terms with a tree
-- referencing an existing node in a forest, we could accidentally create loops. The most
-- efficient way would be to use the patch API to generate a patch for the input, apply it
-- to the current state and handle conflicts, discovering loops there. However, given that
-- it's complex to do that, for the moment we use the Forest API to detect loops, failing
-- if one is found.
setListNgrams :: NodeStoryEnv err
-> NodeId
-> NgramsType
......@@ -230,18 +237,6 @@ setListNgrams env listId ngramsType ns = do
Nothing -> Just ns
Just ns' -> Just $ ns <> ns')
saveNodeStory env listId a'
-- liftBase $ atomically $ do
-- nls <- readTVar var
-- writeTVar var $
-- ( unNodeStory
-- . at listId . _Just
-- . a_state
-- . at ngramsType
-- %~ (\mns' -> case mns' of
-- Nothing -> Just ns
-- Just ns' -> Just $ ns <> ns')
-- ) nls
-- saveNodeStory
newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
......@@ -451,63 +446,10 @@ matchingNode listType minSize maxSize searchQuery (Node inputNode children) =
&& (searchQuery (inputNode ^. ne_ngrams) || any (matchingNode listType minSize maxSize searchQuery) children)
&& matchesListType (inputNode ^. ne_list)
-- | Errors returned by 'buildForest'.
data BuildForestError
= -- We found a loop, something that shouldn't normally happen if the calling
-- code is correct by construction, but if that does happen, the value will
-- contain the full path to the cycle.
BFE_loop_detected !(Set VisitedNode)
deriving (Show, Eq)
renderLoop :: Set VisitedNode -> T.Text
renderLoop = T.intercalate " -> " . map (unNgramsTerm . _vn_term) . Set.toAscList
-- | Keeps track of the relative order in which visited a node, to be able to print cycles.
data VisitedNode =
VN { _vn_position :: !Int, _vn_term :: !NgramsTerm }
deriving (Show)
instance Eq VisitedNode where
(VN _ t1) == (VN _ t2) = t1 == t2
instance Ord VisitedNode where
compare (VN _ t1) (VN _ t2) = t1 `compare` t2
type TreeNode = (NgramsTerm, NgramsElement)
-- | Version of 'buildForest' specialised over the 'NgramsElement' as the values of the tree.
-- We can't use a single function to \"rule them all\" because the 'NgramsRepoElement', that
-- the 'NodeStory' uses does not have an 'ngrams' we can use as the key when building and
-- destroying a forest.
-- /IMPORTANT/: This functions returns an error in case we found a loop.
buildForest :: Map NgramsTerm NgramsElement -> Either BuildForestError (Forest NgramsElement)
buildForest mp = fmap (map (fmap snd)) . unfoldForestM unfoldNode $ Map.toList mp
where
unfoldNode :: TreeNode -> Either BuildForestError (TreeNode, [TreeNode])
unfoldNode (n, el) = flip evalState (1 :: Int, mempty) . runExceptT $ do
let initialChildren = getChildren (mSetToList $ _ne_children el)
go initialChildren *> pure (mkTreeNode (n, el))
where
go :: [ NgramsElement ]
-> ExceptT BuildForestError (State (Int, Set VisitedNode)) ()
go [] = pure ()
go (x:xs) = do
(pos, visited) <- get
let nt = _ne_ngrams x
case Set.member (VN pos nt) visited of
True -> throwError $ BFE_loop_detected visited
False -> do
put (pos + 1, Set.insert (VN (pos + 1) nt) visited)
go (getChildren (mSetToList $ _ne_children x) <> xs)
mkTreeNode :: TreeNode -> (TreeNode, [TreeNode])
mkTreeNode (k, el) = ((k, el), mapMaybe findChildren $ mSetToList (el ^. ne_children))
findChildren :: NgramsTerm -> Maybe TreeNode
findChildren t = Map.lookup t mp <&> \el -> (t, el)
getChildren :: [NgramsTerm] -> [NgramsElement]
getChildren = mapMaybe (`Map.lookup` mp)
buildForest = fmap (map (fmap snd)) . NodeStory.buildForest
-- | Folds an Ngrams forest back to a table map.
-- This function doesn't aggregate information, but merely just recostructs the original
......
......@@ -44,6 +44,7 @@ TODO:
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
module Gargantext.Core.NodeStory
( module Gargantext.Core.NodeStory.Types
......@@ -62,14 +63,18 @@ module Gargantext.Core.NodeStory
, fixNodeStoryVersions
, getParentsChildren
-- * Operations on trees and forests
, TreeNode
, BuildForestError(..)
, VisitedNode(..)
, buildForest
, pruneForest
) where
import Control.Lens ((%~), non, _Just, at, over, Lens')
import Control.Lens ((%~), non, _Just, at, over, Lens', (#))
import Data.ListZipper
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Tree
import Database.PostgreSQL.Simple qualified as PGS
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.ToField qualified as PGS
......@@ -77,11 +82,10 @@ import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory.DB
import Gargantext.Core.NodeStory.Types
import Gargantext.Core.Text.Ngrams qualified as Ngrams
import Gargantext.Database.Admin.Types.Node ( ListId, NodeId(..) )
import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Admin.Types.Node ( ListId, NodeId(..) )
import Gargantext.Database.Prelude
import Gargantext.Prelude hiding (to)
import Data.Tree
class HasNgramChildren e where
ngramsElementChildren :: Lens' e (MSet NgramsTerm)
......@@ -109,35 +113,58 @@ instance HasNgramParent NgramsElement where
-- piece of a data structure.
type ArchiveStateForest = ListZipper (Tree (NgramsTerm, NgramsRepoElement))
buildForestsFromArchiveState :: NgramsState' -> Map Ngrams.NgramsType (Forest (NgramsTerm, NgramsRepoElement))
buildForestsFromArchiveState = Map.map buildForest
type TreeNode e = (NgramsTerm, e)
buildForestsFromArchiveState :: NgramsState'
-> Either BuildForestError (Map Ngrams.NgramsType (Forest (TreeNode NgramsRepoElement)))
buildForestsFromArchiveState = traverse buildForest
destroyArchiveStateForest :: Map Ngrams.NgramsType (Forest (NgramsTerm, NgramsRepoElement)) -> NgramsState'
destroyArchiveStateForest :: Map Ngrams.NgramsType (Forest (TreeNode NgramsRepoElement)) -> NgramsState'
destroyArchiveStateForest = Map.map destroyForest
-- | Builds an ngrams forest from the input ngrams table map.
buildForest :: forall e. HasNgramChildren e => Map NgramsTerm e -> Forest (NgramsTerm, e)
buildForest mp = unfoldForest mkTreeNode (Map.toList mp)
buildForest :: forall e. HasNgramChildren e => Map NgramsTerm e -> Either BuildForestError (Forest (TreeNode e))
buildForest mp = unfoldForestM unfoldNode $ Map.toList mp
where
mkTreeNode :: (NgramsTerm, e) -> ((NgramsTerm, e), [(NgramsTerm, e)])
unfoldNode :: TreeNode e -> Either BuildForestError (TreeNode e, [TreeNode e])
unfoldNode (n, el) = flip evalState (1 :: Int, mempty) . runExceptT $ do
let initialChildren = getChildren (mSetToList $ el ^. ngramsElementChildren)
go initialChildren *> pure (mkTreeNode (n, el))
where
go :: [ TreeNode e ]
-> ExceptT BuildForestError (State (Int, Set VisitedNode)) ()
go [] = pure ()
go (x:xs) = do
(pos, visited) <- get
let nt = fst x
case Set.member (VN pos nt) visited of
True -> throwError $ BFE_loop_detected visited
False -> do
put (pos + 1, Set.insert (VN (pos + 1) nt) visited)
go (getChildren (mSetToList $ snd x ^. ngramsElementChildren) <> xs)
mkTreeNode :: TreeNode e -> (TreeNode e, [TreeNode e])
mkTreeNode (k, el) = ((k, el), mapMaybe findChildren $ mSetToList (el ^. ngramsElementChildren))
findChildren :: NgramsTerm -> Maybe (NgramsTerm, e)
findChildren :: NgramsTerm -> Maybe (TreeNode e)
findChildren t = Map.lookup t mp <&> \el -> (t, el)
getChildren :: [NgramsTerm] -> [TreeNode e]
getChildren = mapMaybe (\t -> (t,) <$> Map.lookup t mp)
-- | Folds an Ngrams forest back to a table map.
-- This function doesn't aggregate information, but merely just recostructs the original
-- map without loss of information. To perform operations on the forest, use the appropriate
-- functions.
destroyForest :: Forest (NgramsTerm, NgramsRepoElement) -> Map NgramsTerm NgramsRepoElement
destroyForest :: Forest (TreeNode NgramsRepoElement) -> Map NgramsTerm NgramsRepoElement
destroyForest f = Map.fromList . map (foldTree destroyTree) $ f
where
destroyTree :: (NgramsTerm, NgramsRepoElement)
-> [(NgramsTerm, NgramsRepoElement)]
-> (NgramsTerm, NgramsRepoElement)
destroyTree :: TreeNode NgramsRepoElement
-> [TreeNode NgramsRepoElement]
-> TreeNode NgramsRepoElement
destroyTree (k, rootEl) childrenEl = (k, squashElements rootEl childrenEl)
squashElements :: e -> [(NgramsTerm, e)] -> e
squashElements :: e -> [TreeNode e] -> e
squashElements r _ = r
-- | Prunes the input 'Forest' of 'NgramsElement' by keeping only the roots, i.e. the
......@@ -357,17 +384,14 @@ getParentsChildren ns = (nsParents, nsChildren)
------------------------------------
mkNodeStoryEnv :: NodeStoryEnv err
mkNodeStoryEnv :: HasNodeStoryError err => NodeStoryEnv err
mkNodeStoryEnv = do
let saver_immediate nId a = do
-- |NOTE Fixing a_state is kinda a hack. We shouldn't land
-- |with bad state in the first place.
upsertNodeStories nId $
a & a_state %~ (
destroyArchiveStateForest
. fixChildrenWithNoParent
. buildForestsFromArchiveState
)
forests <- dbCheckOrFail (first (\e -> _NodeStoryError # NodeStoryUpsertFailed e) $ buildForestsFromArchiveState $ a ^. a_state)
upsertNodeStories nId $ do
a & a_state .~ (destroyArchiveStateForest . fixChildrenWithNoParent $ forests)
let archive_saver_immediate nId a = do
insertNodeArchiveHistory nId (a ^. a_version) $ reverse $ a ^. a_history
pure $ a & a_history .~ []
......
......@@ -42,11 +42,19 @@ module Gargantext.Core.NodeStory.Types
, combineState
, ArchiveState
, ArchiveStateSet
, ArchiveStateList )
, ArchiveStateList
-- * Errors
, HasNodeStoryError(..)
, NodeStoryError(..)
, BuildForestError(..)
, VisitedNode(..)
, renderLoop
)
where
import Codec.Serialise.Class ( Serialise )
import Control.Lens (Getter, Lens')
import Control.Lens (Getter, Lens', Prism', prism')
import Data.Aeson hiding ((.=), decode)
import Data.Map.Strict qualified as Map
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
......@@ -61,6 +69,7 @@ import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node.Error (HasNodeError())
import Gargantext.Prelude hiding (to)
import Opaleye (DefaultFromField(..), SqlJsonb, fromPGSFromField)
import qualified Data.Text as T
------------------------------------------------------------------------
......@@ -183,7 +192,31 @@ $(makeAdaptorAndInstance "pNodeArchiveStory" ''NodeStoryArchivePoly)
type ArchiveList = Archive NgramsState' NgramsStatePatch'
-- | Errors returned by 'buildForest'.
data BuildForestError
= -- We found a loop, something that shouldn't normally happen if the calling
-- code is correct by construction, but if that does happen, the value will
-- contain the full path to the cycle.
BFE_loop_detected !(Set VisitedNode)
deriving (Show, Eq)
renderLoop :: Set VisitedNode -> T.Text
renderLoop = T.intercalate " -> " . map (unNgramsTerm . _vn_term) . Set.toAscList
-- | Keeps track of the relative order in which visited a node, to be able to print cycles.
data VisitedNode =
VN { _vn_position :: !Int, _vn_term :: !NgramsTerm }
deriving (Show)
instance Eq VisitedNode where
(VN _ t1) == (VN _ t2) = t1 == t2
instance Ord VisitedNode where
compare (VN _ t1) (VN _ t2) = t1 `compare` t2
data NodeStoryError =
NodeStoryUpsertFailed BuildForestError
deriving (Show, Eq)
------------------------------------------------------------------------
data NodeStoryEnv err = NodeStoryEnv
......@@ -195,6 +228,12 @@ data NodeStoryEnv err = NodeStoryEnv
-- , _nse_lock :: !FileLock -- TODO (it depends on the option: if with database or file only)
}
class HasNodeStoryError e where
_NodeStoryError :: Prism' e NodeStoryError
instance HasNodeStoryError NodeStoryError where
_NodeStoryError = prism' identity Just
type HasNodeStory env err m = ( IsDBCmd env err m, HasNodeStoryEnv env err, HasNodeError err)
class HasNodeStoryEnv env err where
......
......@@ -35,6 +35,7 @@ module Gargantext.Database.Transactional (
-- * Throwing and catching errors (which allows rollbacks)
, dbFail
, dbCheckOrFail
, catchDBTxError
, handleDBTxError
) where
......@@ -335,3 +336,7 @@ mkOpaDelete a = DBTx $ liftF (OpaDelete a id)
dbFail :: err -> DBTx err r b
dbFail = DBTx . liftF . DBFail
dbCheckOrFail :: Either err a -> DBTx err r a
dbCheckOrFail (Left e) = DBTx . liftF . DBFail $ e
dbCheckOrFail (Right r) = DBTx $ pure r
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment