Commit 616f2982 authored by Fabien Maniere's avatar Fabien Maniere

Merge branch 'adinapoli/issue-498' into 'dev'

Allow ngrams to be searched even if they appear deeply nested

See merge request !433
parents e0c188ca 30d2b444
Pipeline #7824 passed with stages
in 51 minutes and 17 seconds
...@@ -15,9 +15,6 @@ add get ...@@ -15,9 +15,6 @@ add get
-} -}
{-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE BangPatterns #-} {-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE IncoherentInstances #-} {-# LANGUAGE IncoherentInstances #-}
...@@ -25,6 +22,7 @@ add get ...@@ -25,6 +22,7 @@ add get
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TupleSections #-}
module Gargantext.API.Ngrams module Gargantext.API.Ngrams
...@@ -91,6 +89,8 @@ module Gargantext.API.Ngrams ...@@ -91,6 +89,8 @@ module Gargantext.API.Ngrams
, filterNgramsNodes , filterNgramsNodes
-- * Operations on a forest -- * Operations on a forest
, BuildForestError(..)
, renderLoop
, buildForest , buildForest
, destroyForest , destroyForest
, pruneForest , pruneForest
...@@ -105,12 +105,12 @@ import Data.Map.Strict.Patch qualified as PM ...@@ -105,12 +105,12 @@ import Data.Map.Strict.Patch qualified as PM
import Data.Patch.Class (Action(act), Transformable(..), ours) import Data.Patch.Class (Action(act), Transformable(..), ours)
import Data.Set qualified as Set import Data.Set qualified as Set
import Data.Text (isInfixOf, toLower, unpack) import Data.Text (isInfixOf, toLower, unpack)
import Data.Text qualified as T
import Data.Text.Lazy.IO as DTL ( writeFile ) import Data.Text.Lazy.IO as DTL ( writeFile )
import Data.Tree import Data.Tree
import Gargantext.API.Ngrams.Tools (getNodeStory) import Gargantext.API.Ngrams.Tools (getNodeStory)
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory hiding (buildForest) import Gargantext.Core.NodeStory hiding (buildForest)
import Gargantext.Core.NodeStory qualified as NodeStory
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType) import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, TODO, assertValid, ContextId, HasValidationError) import Gargantext.Core.Types (ListType(..), NodeId, ListId, TODO, assertValid, ContextId, HasValidationError)
import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..)) import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
...@@ -173,10 +173,6 @@ saveNodeStory env nId a = do ...@@ -173,10 +173,6 @@ saveNodeStory env nId a = do
let saver = view hasNodeStoryImmediateSaver env let saver = view hasNodeStoryImmediateSaver env
saver nId a saver nId a
listTypeConflictResolution :: ListType -> ListType -> ListType
listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
ngramsStatePatchConflictResolution :: NgramsType ngramsStatePatchConflictResolution :: NgramsType
-> NgramsTerm -> NgramsTerm
-> ConflictResolutionNgramsPatch -> ConflictResolutionNgramsPatch
...@@ -190,14 +186,6 @@ ngramsStatePatchConflictResolution _ngramsType _ngramsTerm ...@@ -190,14 +186,6 @@ ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
-- Current state:
-- Insertions are not considered as patches,
-- they do not extend history,
-- they do not bump version.
insertNewOnly :: a -> Maybe b -> a
insertNewOnly m = maybe m (const $ errorTrace "insertNewOnly: impossible")
-- TODO error handling
{- unused {- unused
-- TODO refactor with putListNgrams -- TODO refactor with putListNgrams
copyListNgrams :: RepoCmdM env err m copyListNgrams :: RepoCmdM env err m
...@@ -438,13 +426,10 @@ filterNgramsNodes :: Maybe ListType ...@@ -438,13 +426,10 @@ filterNgramsNodes :: Maybe ListType
-> Maybe MinSize -> Maybe MinSize
-> Maybe MaxSize -> Maybe MaxSize
-> (NgramsTerm -> Bool) -> (NgramsTerm -> Bool)
-> Map NgramsTerm NgramsElement -> Forest NgramsElement
-> Map NgramsTerm NgramsElement -> Forest NgramsElement
filterNgramsNodes listTy minSize maxSize searchFn tblMap = filterNgramsNodes listTy minSize maxSize searchFn =
flip Map.mapMaybe tblMap $ \e -> filter (matchingNode listTy minSize maxSize searchFn)
case matchingNode listTy minSize maxSize searchFn e of
False -> Nothing
True -> Just e
-- | Returns 'True' if the input 'NgramsElement' satisfies the search criteria -- | Returns 'True' if the input 'NgramsElement' satisfies the search criteria
-- mandated by 'NgramsSearchQuery'. -- mandated by 'NgramsSearchQuery'.
...@@ -452,9 +437,9 @@ matchingNode :: Maybe ListType ...@@ -452,9 +437,9 @@ matchingNode :: Maybe ListType
-> Maybe MinSize -> Maybe MinSize
-> Maybe MaxSize -> Maybe MaxSize
-> (NgramsTerm -> Bool) -> (NgramsTerm -> Bool)
-> NgramsElement -> Tree NgramsElement
-> Bool -> Bool
matchingNode listType minSize maxSize searchQuery inputNode = matchingNode listType minSize maxSize searchQuery (Node inputNode children) =
let nodeSize = inputNode ^. ne_size let nodeSize = inputNode ^. ne_size
matchesListType = maybe (const True) (==) listType matchesListType = maybe (const True) (==) listType
respectsMinSize = maybe (const True) ((<=) . getMinSize) minSize respectsMinSize = maybe (const True) ((<=) . getMinSize) minSize
...@@ -462,15 +447,67 @@ matchingNode listType minSize maxSize searchQuery inputNode = ...@@ -462,15 +447,67 @@ matchingNode listType minSize maxSize searchQuery inputNode =
in respectsMinSize nodeSize in respectsMinSize nodeSize
&& respectsMaxSize nodeSize && respectsMaxSize nodeSize
&& searchQuery (inputNode ^. ne_ngrams) -- Search for the query either in the root or in the children.
&& (searchQuery (inputNode ^. ne_ngrams) || any (matchingNode listType minSize maxSize searchQuery) children)
&& matchesListType (inputNode ^. ne_list) && matchesListType (inputNode ^. ne_list)
-- | Errors returned by 'buildForest'.
data BuildForestError
= -- We found a loop, something that shouldn't normally happen if the calling
-- code is correct by construction, but if that does happen, the value will
-- contain the full path to the cycle.
BFE_loop_detected !(Set VisitedNode)
deriving (Show, Eq)
renderLoop :: Set VisitedNode -> T.Text
renderLoop = T.intercalate " -> " . map (unNgramsTerm . _vn_term) . Set.toAscList
-- | Keeps track of the relative order in which visited a node, to be able to print cycles.
data VisitedNode =
VN { _vn_position :: !Int, _vn_term :: !NgramsTerm }
deriving (Show)
instance Eq VisitedNode where
(VN _ t1) == (VN _ t2) = t1 == t2
instance Ord VisitedNode where
compare (VN _ t1) (VN _ t2) = t1 `compare` t2
type TreeNode = (NgramsTerm, NgramsElement)
-- | Version of 'buildForest' specialised over the 'NgramsElement' as the values of the tree. -- | Version of 'buildForest' specialised over the 'NgramsElement' as the values of the tree.
-- We can't use a single function to \"rule them all\" because the 'NgramsRepoElement', that -- We can't use a single function to \"rule them all\" because the 'NgramsRepoElement', that
-- the 'NodeStory' uses does not have an 'ngrams' we can use as the key when building and -- the 'NodeStory' uses does not have an 'ngrams' we can use as the key when building and
-- destroying a forest. -- destroying a forest.
buildForest :: Map NgramsTerm NgramsElement -> Forest NgramsElement -- /IMPORTANT/: This functions returns an error in case we found a loop.
buildForest = map (fmap snd) . NodeStory.buildForest buildForest :: Map NgramsTerm NgramsElement -> Either BuildForestError (Forest NgramsElement)
buildForest mp = fmap (map (fmap snd)) . unfoldForestM unfoldNode $ Map.toList mp
where
unfoldNode :: TreeNode -> Either BuildForestError (TreeNode, [TreeNode])
unfoldNode (n, el) = flip evalState (1 :: Int, mempty) . runExceptT $ do
let initialChildren = getChildren (mSetToList $ _ne_children el)
go initialChildren *> pure (mkTreeNode (n, el))
where
go :: [ NgramsElement ]
-> ExceptT BuildForestError (State (Int, Set VisitedNode)) ()
go [] = pure ()
go (x:xs) = do
(pos, visited) <- get
let nt = _ne_ngrams x
case Set.member (VN pos nt) visited of
True -> throwError $ BFE_loop_detected visited
False -> do
put (pos + 1, Set.insert (VN (pos + 1) nt) visited)
go (getChildren (mSetToList $ _ne_children x) <> xs)
mkTreeNode :: TreeNode -> (TreeNode, [TreeNode])
mkTreeNode (k, el) = ((k, el), mapMaybe findChildren $ mSetToList (el ^. ne_children))
findChildren :: NgramsTerm -> Maybe TreeNode
findChildren t = Map.lookup t mp <&> \el -> (t, el)
getChildren :: [NgramsTerm] -> [NgramsElement]
getChildren = mapMaybe (`Map.lookup` mp)
-- | Folds an Ngrams forest back to a table map. -- | Folds an Ngrams forest back to a table map.
-- This function doesn't aggregate information, but merely just recostructs the original -- This function doesn't aggregate information, but merely just recostructs the original
...@@ -497,15 +534,21 @@ destroyForest f = Map.fromList . map (foldTree destroyTree) $ f ...@@ -497,15 +534,21 @@ destroyForest f = Map.fromList . map (foldTree destroyTree) $ f
searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement) searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
-> NgramsSearchQuery -> NgramsSearchQuery
-- ^ The search query on the retrieved data -- ^ The search query on the retrieved data
-> VersionedWithCount NgramsTable -> Either BuildForestError (VersionedWithCount NgramsTable)
searchTableNgrams versionedTableMap NgramsSearchQuery{..} = searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
let tableMap = versionedTableMap ^. v_data let tableMap = versionedTableMap ^. v_data
filteredData = filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery tableMap in case buildForest tableMap of
forestRoots = Set.fromList . Map.elems . destroyForest . buildForest $ filteredData Left err -> Left err
tableMapSorted = versionedTableMap Right fs ->
& v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ forestRoots) let forestRoots = Set.fromList
. Map.elems
in toVersionedWithCount (Set.size forestRoots) tableMapSorted . destroyForest
. filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery
$ fs
tableMapSorted = versionedTableMap
& v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ forestRoots)
in Right $ toVersionedWithCount (Set.size forestRoots) tableMapSorted
where where
-- Sorts the input 'NgramsElement' list. -- Sorts the input 'NgramsElement' list.
...@@ -564,8 +607,11 @@ getTableNgrams :: NodeStoryEnv err ...@@ -564,8 +607,11 @@ getTableNgrams :: NodeStoryEnv err
getTableNgrams env nodeId listId tabType searchQuery = do getTableNgrams env nodeId listId tabType searchQuery = do
let ngramsType = ngramsTypeFromTabType tabType let ngramsType = ngramsTypeFromTabType tabType
versionedInput <- getNgramsTable' env nodeId listId ngramsType versionedInput <- getNgramsTable' env nodeId listId ngramsType
pure $ searchTableNgrams versionedInput searchQuery -- FIXME(adn) In case of a loop at the moment we just return the
-- empty result set, but we should probably bubble the error upstream.
pure $ case searchTableNgrams versionedInput searchQuery of
Left _err -> VersionedWithCount 0 0 (NgramsTable mempty)
Right x -> x
-- | Helper function to get the ngrams table with scores. -- | Helper function to get the ngrams table with scores.
getNgramsTable' :: NodeStoryEnv err getNgramsTable' :: NodeStoryEnv err
...@@ -610,11 +656,6 @@ setNgramsTableScores nId listId ngramsType table = do ...@@ -610,11 +656,6 @@ setNgramsTableScores nId listId ngramsType table = do
-- TODO: find a better place for the code above, All APIs stay here -- TODO: find a better place for the code above, All APIs stay here
needsScores :: Maybe OrderBy -> Bool
needsScores (Just ScoreAsc) = True
needsScores (Just ScoreDesc) = True
needsScores _ = False
getTableNgramsCorpusHandler :: (IsDBCmd err env m, HasNodeStoryEnv err env) getTableNgramsCorpusHandler :: (IsDBCmd err env m, HasNodeStoryEnv err env)
=> NodeId => NodeId
-> TabType -> TabType
......
...@@ -208,7 +208,7 @@ instance ToSchema NgramsElement where ...@@ -208,7 +208,7 @@ instance ToSchema NgramsElement where
instance NFData NgramsElement where instance NFData NgramsElement where
------------------------------------------------------------------------ ------------------------------------------------------------------------
newtype NgramsTable = NgramsTable [NgramsElement] newtype NgramsTable = NgramsTable { getNgramsTable :: [NgramsElement] }
deriving stock (Ord, Eq, Generic, Show) deriving stock (Ord, Eq, Generic, Show)
deriving newtype (ToJSON, FromJSON) deriving newtype (ToJSON, FromJSON)
deriving anyclass (ToExpr) deriving anyclass (ToExpr)
......
...@@ -14,18 +14,22 @@ Portability : POSIX ...@@ -14,18 +14,22 @@ Portability : POSIX
{-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
module Test.Instances module Test.Instances where
where
import Control.Lens hiding (elements)
import Data.List.NonEmpty qualified as NE import Data.List.NonEmpty qualified as NE
import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PM import Data.Map.Strict.Patch qualified as PM
import Data.Patch.Class (Replace, replace) import Data.Patch.Class (Replace, replace)
import Data.Set qualified as Set
import Data.Text qualified as T import Data.Text qualified as T
import Data.Validity (Validation(..), ValidationChain (..), prettyValidation) import Data.Validity (Validation(..), ValidationChain (..), prettyValidation)
import EPO.API.Client.Types qualified as EPO import EPO.API.Client.Types qualified as EPO
import Gargantext.API.Admin.Auth.Types (AuthenticatedUser, ForgotPasswordAsyncParams) import Gargantext.API.Admin.Auth.Types (AuthenticatedUser, ForgotPasswordAsyncParams)
import Gargantext.API.Admin.Orchestrator.Types qualified as Orch import Gargantext.API.Admin.Orchestrator.Types qualified as Orch
import Gargantext.API.Errors.Types qualified as Errors import Gargantext.API.Errors.Types qualified as Errors
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Types qualified as Ngrams import Gargantext.API.Ngrams.Types qualified as Ngrams
import Gargantext.API.Node.Contact.Types (AddContactParams) import Gargantext.API.Node.Contact.Types (AddContactParams)
import Gargantext.API.Node.Corpus.Annuaire (AnnuaireWithForm) import Gargantext.API.Node.Corpus.Annuaire (AnnuaireWithForm)
...@@ -33,14 +37,14 @@ import Gargantext.API.Node.Corpus.New (ApiInfo) ...@@ -33,14 +37,14 @@ import Gargantext.API.Node.Corpus.New (ApiInfo)
import Gargantext.API.Node.Corpus.New.Types (FileFormat, FileType) import Gargantext.API.Node.Corpus.New.Types (FileFormat, FileType)
import Gargantext.API.Node.Corpus.Types (Datafield) import Gargantext.API.Node.Corpus.Types (Datafield)
import Gargantext.API.Node.Corpus.Types qualified as CT import Gargantext.API.Node.Corpus.Types qualified as CT
import Gargantext.API.Node.DocumentsFromWriteNodes.Types qualified as DFWN
import Gargantext.API.Node.DocumentUpload.Types (DocumentUpload) import Gargantext.API.Node.DocumentUpload.Types (DocumentUpload)
import Gargantext.API.Node.DocumentsFromWriteNodes.Types qualified as DFWN
import Gargantext.API.Node.FrameCalcUpload.Types qualified as FCU import Gargantext.API.Node.FrameCalcUpload.Types qualified as FCU
import Gargantext.API.Node.Get (GetNodeParams) import Gargantext.API.Node.Get (GetNodeParams)
import Gargantext.API.Node.New.Types (PostNode(..)) import Gargantext.API.Node.New.Types (PostNode(..))
import Gargantext.API.Node.Share.Types (ShareNodeParams(..)) import Gargantext.API.Node.Share.Types (ShareNodeParams(..))
import Gargantext.API.Node.Update.Types qualified as NU
import Gargantext.API.Node.Types (NewWithForm, NewWithTempFile(..), RenameNode(..), WithQuery) import Gargantext.API.Node.Types (NewWithForm, NewWithTempFile(..), RenameNode(..), WithQuery)
import Gargantext.API.Node.Update.Types qualified as NU
import Gargantext.API.Public.Types (PublicData(..)) import Gargantext.API.Public.Types (PublicData(..))
import Gargantext.API.Routes.Named.Publish (PublishRequest(..)) import Gargantext.API.Routes.Named.Publish (PublishRequest(..))
import Gargantext.API.Routes.Named.Remote (RemoteExportRequest(..)) import Gargantext.API.Routes.Named.Remote (RemoteExportRequest(..))
...@@ -61,12 +65,12 @@ import Gargantext.Database.Admin.Types.Node (NodeId(..), UserId(..), NodeType(.. ...@@ -61,12 +65,12 @@ import Gargantext.Database.Admin.Types.Node (NodeId(..), UserId(..), NodeType(..
import Gargantext.Database.Query.Facet (OrderBy(..)) import Gargantext.Database.Query.Facet (OrderBy(..))
import Gargantext.Prelude hiding (replace, Location) import Gargantext.Prelude hiding (replace, Location)
import Servant.Client.Core.BaseUrl (BaseUrl(..), Scheme(Http)) import Servant.Client.Core.BaseUrl (BaseUrl(..), Scheme(Http))
import Text.Parsec.Error (ParseError, Message(..), newErrorMessage)
import Text.Parsec.Pos
import Test.QuickCheck import Test.QuickCheck
import Test.QuickCheck.Arbitrary.Generic import Test.QuickCheck.Arbitrary.Generic
import Test.QuickCheck.Instances.Text () import Test.QuickCheck.Instances.Text ()
import Test.QuickCheck.Instances.Vector () import Test.QuickCheck.Instances.Vector ()
import Text.Parsec.Error (ParseError, Message(..), newErrorMessage)
import Text.Parsec.Pos
instance Arbitrary AuthenticatedUser where arbitrary = genericArbitrary instance Arbitrary AuthenticatedUser where arbitrary = genericArbitrary
...@@ -500,14 +504,14 @@ instance Arbitrary Ngrams.NgramsElement where ...@@ -500,14 +504,14 @@ instance Arbitrary Ngrams.NgramsElement where
-- because we still want to simulate potential hierarchies, i.e. forests of ngrams. -- because we still want to simulate potential hierarchies, i.e. forests of ngrams.
-- so we sample the ngrams terms from a selection, and we restrict the number of max -- so we sample the ngrams terms from a selection, and we restrict the number of max
-- children for each 'NgramsElement' to the size parameter to not have very large trees. -- children for each 'NgramsElement' to the size parameter to not have very large trees.
arbitrary = do arbitrary = sized $ \n -> do
_ne_ngrams <- arbitrary _ne_ngrams <- arbitrary
_ne_size <- arbitrary _ne_size <- getPositive <$> arbitrary -- it doesn't make sense to have a negative size
_ne_list <- arbitrary _ne_list <- arbitrary
_ne_occurrences <- arbitrary _ne_occurrences <- resize n arbitrary
_ne_root <- arbitrary `suchThat` (maybe True (\x -> x /= _ne_ngrams)) -- can't be root of itself _ne_root <- arbitrary `suchThat` (maybe True (\x -> x /= _ne_ngrams)) -- can't be root of itself
_ne_parent <- arbitrary `suchThat` (maybe True (\x -> x /= _ne_ngrams)) -- can't be parent of itself _ne_parent <- arbitrary `suchThat` (maybe True (\x -> x /= _ne_ngrams)) -- can't be parent of itself
_ne_children <- Ngrams.mSetFromList <$> (sized (\n -> vectorOf n arbitrary `suchThat` (\x -> _ne_ngrams `notElem` x))) -- can't be cyclic _ne_children <- Ngrams.mSetFromList <$> (vectorOf n arbitrary `suchThat` (\x -> _ne_ngrams `notElem` x)) -- can't be cyclic
pure Ngrams.NgramsElement{..} pure Ngrams.NgramsElement{..}
instance Arbitrary Ngrams.NgramsTable where instance Arbitrary Ngrams.NgramsTable where
...@@ -738,3 +742,53 @@ instance Arbitrary Job where ...@@ -738,3 +742,53 @@ instance Arbitrary Job where
recomputeGraphGen = RecomputeGraph <$> arbitrary recomputeGraphGen = RecomputeGraph <$> arbitrary
updateNodeGen = UpdateNode <$> arbitrary <*> arbitrary updateNodeGen = UpdateNode <$> arbitrary <*> arbitrary
uploadDocumentGen = UploadDocument <$> arbitrary <*> arbitrary uploadDocumentGen = UploadDocument <$> arbitrary <*> arbitrary
-- | An 'AcyclicTableMap' models a map that associates an 'NgramsTerm' with its 'NgramsElement'.
-- In particular, there are a few preconditions that govers its QuickCheck generator:
--
-- 1. The key matches the '_ne_ngrams' field of each 'NgramsElement';
-- 2. Acyclic: loops are removed.
-- The 'AcyclicTableMap' also carries a random element of the map, which can be
-- used in other tests requiring more specific orchestrations (for example query tests, where
-- we need to be searching for an element in the collection).
data AcyclicTableMap =
AcyclicTableMap
{ getTableMap :: Map NgramsTerm NgramsElement
, randomMapElement :: NgramsElement
}
deriving (Show, Eq)
-- | Generate a hierarchy of ngrams element /WITH NO LOOPS/.
-- Furthermore, we need to ensure that when we generate elements, we are not
-- accidentally creating duplicates, because ggtx won't allow us (in the frontend, at least)
-- to create (say) a MapTerm called \"foo\" if we have already a candidate term with that name.
genCorpusWithMatchingElement :: Gen AcyclicTableMap
genCorpusWithMatchingElement = do
-- cap the depth of the tree, to not make the tests too slow
depth <- choose (1, 5)
let mkEntry = do
trm <- arbitrary
el <- over ne_children (breakLoop trm) <$> (resize depth arbitrary)
pure (trm, el { _ne_ngrams = trm })
-- Let's build the map first, so that duplicates will be overwritten.
fullMap <- (Map.fromList <$> vectorOf depth mkEntry) `suchThat` (\x -> isRight (buildForest x)) -- exclude loops
let (hd NE.:| _) = NE.fromList $ Map.elems fullMap
pure $ AcyclicTableMap fullMap hd
where
breakLoop :: NgramsTerm -> MSet NgramsTerm -> MSet NgramsTerm
breakLoop t = mSetFromSet . Set.delete t . mSetToSet
instance Arbitrary AcyclicTableMap where
arbitrary = genCorpusWithMatchingElement
shrink = shrinkTree
shrinkTree :: AcyclicTableMap -> [AcyclicTableMap]
shrinkTree (AcyclicTableMap mp el) =
[ AcyclicTableMap (Map.insert k shrunk mp) (if k == _ne_ngrams el then shrunk else el)
| (k, ne) <- Map.toList mp
, shrunkChildren <- shrinkSet (_ne_children ne)
, let shrunk = ne { _ne_children = shrunkChildren }
]
shrinkSet :: Ord a => MSet a -> [MSet a]
shrinkSet s = map mSetFromList (shrinkList (const []) (mSetToList s))
This diff is collapsed.
...@@ -13,7 +13,7 @@ import Data.Map.Strict (Map) ...@@ -13,7 +13,7 @@ import Data.Map.Strict (Map)
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Text qualified as T import Data.Text qualified as T
import Data.Tree import Data.Tree
import Gargantext.API.Ngrams (filterNgramsNodes, buildForest, destroyForest, pruneForest) import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Types qualified as NT import Gargantext.API.Ngrams.Types qualified as NT
import Gargantext.Core import Gargantext.Core
...@@ -26,8 +26,8 @@ import Gargantext.Database.Schema.Context ...@@ -26,8 +26,8 @@ import Gargantext.Database.Schema.Context
import Test.HUnit import Test.HUnit
import Test.Hspec import Test.Hspec
import Test.Hspec.QuickCheck (prop) import Test.Hspec.QuickCheck (prop)
import Test.Instances () import Test.Instances (AcyclicTableMap(..))
import Test.Ngrams.Query (mkMapTerm) import Test.Ngrams.Query (mkMapTerm, hierarchicalTableMap)
import Test.QuickCheck import Test.QuickCheck
import Test.QuickCheck qualified as QC import Test.QuickCheck qualified as QC
import Text.RawString.QQ (r) import Text.RawString.QQ (r)
...@@ -113,19 +113,9 @@ tests = describe "Ngrams" $ do ...@@ -113,19 +113,9 @@ tests = describe "Ngrams" $ do
describe "hierarchical grouping" $ do describe "hierarchical grouping" $ do
it "filterNgramsNodes with empty query is identity" testFilterNgramsNodesEmptyQuery it "filterNgramsNodes with empty query is identity" testFilterNgramsNodesEmptyQuery
hierarchicalTableMap :: Map NgramsTerm NgramsElement
hierarchicalTableMap = Map.fromList [
("vehicle", mkMapTerm "vehicle" & ne_children .~ mSetFromList ["car"])
, ("car", mkMapTerm "car" & ne_root .~ Just "vehicle"
& ne_parent .~ Just "vehicle"
& ne_children .~ mSetFromList ["ford"])
, ("ford", mkMapTerm "ford" & ne_root .~ Just "vehicle"
& ne_parent .~ Just "car")
]
testFilterNgramsNodesEmptyQuery :: Assertion testFilterNgramsNodesEmptyQuery :: Assertion
testFilterNgramsNodesEmptyQuery = do testFilterNgramsNodesEmptyQuery = do
let input = hierarchicalTableMap let input = buildForestOrFail hierarchicalTableMap
let actual = filterNgramsNodes (Just MapTerm) Nothing Nothing (const True) input let actual = filterNgramsNodes (Just MapTerm) Nothing Nothing (const True) input
actual @?= input actual @?= input
...@@ -149,6 +139,11 @@ newtype ASCIIForest = ASCIIForest String ...@@ -149,6 +139,11 @@ newtype ASCIIForest = ASCIIForest String
instance Show ASCIIForest where instance Show ASCIIForest where
show (ASCIIForest x) = x show (ASCIIForest x) = x
buildForestOrFail :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForestOrFail mp = case buildForest mp of
Left (BFE_loop_detected treeLoop) -> error (T.unpack $ renderLoop treeLoop)
Right x -> x
compareForestVisually :: Forest NgramsElement -> String -> Property compareForestVisually :: Forest NgramsElement -> String -> Property
compareForestVisually f expected = compareForestVisually f expected =
let actual = init $ drawForest (map (fmap renderEl) f) let actual = init $ drawForest (map (fmap renderEl) f)
...@@ -173,7 +168,7 @@ testBuildNgramsTree_01 = ...@@ -173,7 +168,7 @@ testBuildNgramsTree_01 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"]) let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_parent .~ Just "foo") , ( "bar", mkMapTerm "bar" & ne_parent .~ Just "foo")
] ]
in (buildForest t1) `compareForestVisually` [r| in (buildForestOrFail t1) `compareForestVisually` [r|
bar bar
foo foo
...@@ -183,7 +178,7 @@ testBuildNgramsTree_01 = ...@@ -183,7 +178,7 @@ testBuildNgramsTree_01 =
testBuildNgramsTree_02 :: Property testBuildNgramsTree_02 :: Property
testBuildNgramsTree_02 = testBuildNgramsTree_02 =
buildForest hierarchicalTableMap `compareForestVisually` [r| buildForestOrFail hierarchicalTableMap `compareForestVisually` [r|
car car
| |
`- ford `- ford
...@@ -246,7 +241,7 @@ testBuildNgramsTree_03 = ...@@ -246,7 +241,7 @@ testBuildNgramsTree_03 =
) )
] ]
in pruneForest (buildForest input) `compareForestVisually` [r| in pruneForest (buildForestOrFail input) `compareForestVisually` [r|
animalia animalia
| |
`- chordata `- chordata
...@@ -271,25 +266,17 @@ testBuildNgramsTree_03 = ...@@ -271,25 +266,17 @@ testBuildNgramsTree_03 =
|] |]
newtype TableMapLockStep = TableMapLockStep { getTableMap :: Map NgramsTerm NgramsElement }
deriving (Show, Eq)
instance Arbitrary TableMapLockStep where
arbitrary = do
pairs <- map (\(k,v) -> (k, v & ne_ngrams .~ k)) <$> arbitrary
pure $ TableMapLockStep (Map.fromList pairs)
-- /PRECONDITION/: The '_ne_ngrams' field always matches the 'NgramsTerm', key of the map. -- /PRECONDITION/: The '_ne_ngrams' field always matches the 'NgramsTerm', key of the map.
buildDestroyForestRoundtrips :: TableMapLockStep -> Property buildDestroyForestRoundtrips :: AcyclicTableMap -> Property
buildDestroyForestRoundtrips (TableMapLockStep mp) = buildDestroyForestRoundtrips (AcyclicTableMap mp _) =
(destroyForest . buildForest $ mp) === mp (destroyForest . buildForestOrFail $ mp) === mp
testPruningNgramsForest_01 :: Property testPruningNgramsForest_01 :: Property
testPruningNgramsForest_01 = testPruningNgramsForest_01 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"]) let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_parent .~ Just "foo") , ( "bar", mkMapTerm "bar" & ne_parent .~ Just "foo")
] ]
in (pruneForest $ buildForest t1) `compareForestVisually` [r| in (pruneForest $ buildForestOrFail t1) `compareForestVisually` [r|
foo foo
| |
`- bar `- bar
...@@ -297,7 +284,7 @@ testPruningNgramsForest_01 = ...@@ -297,7 +284,7 @@ testPruningNgramsForest_01 =
testPruningNgramsForest_02 :: Property testPruningNgramsForest_02 :: Property
testPruningNgramsForest_02 = testPruningNgramsForest_02 =
(pruneForest $ buildForest hierarchicalTableMap) `compareForestVisually` [r| (pruneForest $ buildForestOrFail hierarchicalTableMap) `compareForestVisually` [r|
vehicle vehicle
| |
`- car `- car
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment