Commit f036a436 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Allow terms to be searched even if they appear nested

parent 204c2052
......@@ -25,6 +25,7 @@ add get
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TupleSections #-}
module Gargantext.API.Ngrams
......@@ -91,6 +92,8 @@ module Gargantext.API.Ngrams
, filterNgramsNodes
-- * Operations on a forest
, BuildForestError(..)
, renderLoop
, buildForest
, destroyForest
, pruneForest
......@@ -105,12 +108,12 @@ import Data.Map.Strict.Patch qualified as PM
import Data.Patch.Class (Action(act), Transformable(..), ours)
import Data.Set qualified as Set
import Data.Text (isInfixOf, toLower, unpack)
import Data.Text qualified as T
import Data.Text.Lazy.IO as DTL ( writeFile )
import Data.Tree
import Gargantext.API.Ngrams.Tools (getNodeStory)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory hiding (buildForest)
import Gargantext.Core.NodeStory qualified as NodeStory
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, TODO, assertValid, ContextId, HasValidationError)
import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
......@@ -451,7 +454,7 @@ matchingNode :: Maybe ListType
-> (NgramsTerm -> Bool)
-> Tree NgramsElement
-> Bool
matchingNode listType minSize maxSize searchQuery (Node inputNode _children) =
matchingNode listType minSize maxSize searchQuery (Node inputNode children) =
let nodeSize = inputNode ^. ne_size
matchesListType = maybe (const True) (==) listType
respectsMinSize = maybe (const True) ((<=) . getMinSize) minSize
......@@ -459,15 +462,67 @@ matchingNode listType minSize maxSize searchQuery (Node inputNode _children) =
in respectsMinSize nodeSize
&& respectsMaxSize nodeSize
&& searchQuery (inputNode ^. ne_ngrams)
-- Search for the query either in the root or in the children.
&& (searchQuery (inputNode ^. ne_ngrams) || any (matchingNode listType minSize maxSize searchQuery) children)
&& matchesListType (inputNode ^. ne_list)
-- | Errors returned by 'buildForest'.
data BuildForestError
= -- We found a loop, something that shouldn't normally happen if the calling
-- code is correct by construction, but if that does happen, the value will
-- contain the full path to the cycle.
BFE_loop_detected !(Set VisitedNode)
deriving (Show, Eq)
renderLoop :: Set VisitedNode -> T.Text
renderLoop = T.intercalate " -> " . map (unNgramsTerm . _vn_term) . Set.toAscList
-- | Keeps track of the relative order in which visited a node, to be able to print cycles.
data VisitedNode =
VN { _vn_position :: !Int, _vn_term :: !NgramsTerm }
deriving (Show)
instance Eq VisitedNode where
(VN _ t1) == (VN _ t2) = t1 == t2
instance Ord VisitedNode where
compare (VN _ t1) (VN _ t2) = t1 `compare` t2
type TreeNode = (NgramsTerm, NgramsElement)
-- | Version of 'buildForest' specialised over the 'NgramsElement' as the values of the tree.
-- We can't use a single function to \"rule them all\" because the 'NgramsRepoElement', that
-- the 'NodeStory' uses does not have an 'ngrams' we can use as the key when building and
-- destroying a forest.
buildForest :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForest = map (fmap snd) . NodeStory.buildForest
-- /IMPORTANT/: This functions returns an error in case we found a loop.
buildForest :: Map NgramsTerm NgramsElement -> Either BuildForestError (Forest NgramsElement)
buildForest mp = fmap (map (fmap snd)) . unfoldForestM unfoldNode $ Map.toList mp
where
unfoldNode :: TreeNode -> Either BuildForestError (TreeNode, [TreeNode])
unfoldNode (n, el) = flip evalState (1 :: Int, mempty) . runExceptT $ do
let initialChildren = getChildren (mSetToList $ _ne_children el)
go initialChildren *> pure (mkTreeNode (n, el))
where
go :: [ NgramsElement ]
-> ExceptT BuildForestError (State (Int, Set VisitedNode)) ()
go [] = pure ()
go (x:xs) = do
(pos, visited) <- get
let nt = _ne_ngrams x
case Set.member (VN pos nt) visited of
True -> throwError $ BFE_loop_detected visited
False -> do
put (pos + 1, Set.insert (VN (pos + 1) nt) visited)
go (getChildren (mSetToList $ _ne_children x) <> xs)
mkTreeNode :: TreeNode -> (TreeNode, [TreeNode])
mkTreeNode (k, el) = ((k, el), mapMaybe findChildren $ mSetToList (el ^. ne_children))
findChildren :: NgramsTerm -> Maybe TreeNode
findChildren t = Map.lookup t mp <&> \el -> (t, el)
getChildren :: [NgramsTerm] -> [NgramsElement]
getChildren = mapMaybe (`Map.lookup` mp)
-- | Folds an Ngrams forest back to a table map.
-- This function doesn't aggregate information, but merely just recostructs the original
......@@ -494,19 +549,21 @@ destroyForest f = Map.fromList . map (foldTree destroyTree) $ f
searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
-> NgramsSearchQuery
-- ^ The search query on the retrieved data
-> VersionedWithCount NgramsTable
-> Either BuildForestError (VersionedWithCount NgramsTable)
searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
let tableMap = versionedTableMap ^. v_data
forestRoots = Set.fromList
. Map.elems
. destroyForest
. filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery
. buildForest
$ tableMap
tableMapSorted = versionedTableMap
& v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ forestRoots)
in toVersionedWithCount (Set.size forestRoots) tableMapSorted
in case buildForest tableMap of
Left err -> Left err
Right fs ->
let forestRoots = Set.fromList
. Map.elems
. destroyForest
. filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery
$ fs
tableMapSorted = versionedTableMap
& v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ forestRoots)
in Right $ toVersionedWithCount (Set.size forestRoots) tableMapSorted
where
-- Sorts the input 'NgramsElement' list.
......@@ -565,8 +622,11 @@ getTableNgrams :: NodeStoryEnv err
getTableNgrams env nodeId listId tabType searchQuery = do
let ngramsType = ngramsTypeFromTabType tabType
versionedInput <- getNgramsTable' env nodeId listId ngramsType
pure $ searchTableNgrams versionedInput searchQuery
-- FIXME(adn) In case of a loop at the moment we just return the
-- empty result set, but we should probably bubble the error upstream.
pure $ case searchTableNgrams versionedInput searchQuery of
Left _err -> VersionedWithCount 0 0 (NgramsTable mempty)
Right x -> x
-- | Helper function to get the ngrams table with scores.
getNgramsTable' :: NodeStoryEnv err
......
......@@ -500,14 +500,14 @@ instance Arbitrary Ngrams.NgramsElement where
-- because we still want to simulate potential hierarchies, i.e. forests of ngrams.
-- so we sample the ngrams terms from a selection, and we restrict the number of max
-- children for each 'NgramsElement' to the size parameter to not have very large trees.
arbitrary = do
arbitrary = sized $ \n -> do
_ne_ngrams <- arbitrary
_ne_size <- getPositive <$> arbitrary -- it doesn't make sense to have a negative size
_ne_list <- arbitrary
_ne_occurrences <- arbitrary
_ne_occurrences <- resize n arbitrary
_ne_root <- arbitrary `suchThat` (maybe True (\x -> x /= _ne_ngrams)) -- can't be root of itself
_ne_parent <- arbitrary `suchThat` (maybe True (\x -> x /= _ne_ngrams)) -- can't be parent of itself
_ne_children <- Ngrams.mSetFromList <$> (sized (\n -> vectorOf n arbitrary `suchThat` (\x -> _ne_ngrams `notElem` x))) -- can't be cyclic
_ne_children <- Ngrams.mSetFromList <$> (vectorOf n arbitrary `suchThat` (\x -> _ne_ngrams `notElem` x)) -- can't be cyclic
pure Ngrams.NgramsElement{..}
instance Arbitrary Ngrams.NgramsTable where
......
This diff is collapsed.
......@@ -125,7 +125,7 @@ hierarchicalTableMap = Map.fromList [
testFilterNgramsNodesEmptyQuery :: Assertion
testFilterNgramsNodesEmptyQuery = do
let input = buildForest hierarchicalTableMap
let input = buildForestOrFail hierarchicalTableMap
let actual = filterNgramsNodes (Just MapTerm) Nothing Nothing (const True) input
actual @?= input
......@@ -149,6 +149,11 @@ newtype ASCIIForest = ASCIIForest String
instance Show ASCIIForest where
show (ASCIIForest x) = x
buildForestOrFail :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForestOrFail mp = case buildForest mp of
Left err -> error (show err)
Right x -> x
compareForestVisually :: Forest NgramsElement -> String -> Property
compareForestVisually f expected =
let actual = init $ drawForest (map (fmap renderEl) f)
......@@ -173,7 +178,7 @@ testBuildNgramsTree_01 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_parent .~ Just "foo")
]
in (buildForest t1) `compareForestVisually` [r|
in (buildForestOrFail t1) `compareForestVisually` [r|
bar
foo
......@@ -183,7 +188,7 @@ testBuildNgramsTree_01 =
testBuildNgramsTree_02 :: Property
testBuildNgramsTree_02 =
buildForest hierarchicalTableMap `compareForestVisually` [r|
buildForestOrFail hierarchicalTableMap `compareForestVisually` [r|
car
|
`- ford
......@@ -246,7 +251,7 @@ testBuildNgramsTree_03 =
)
]
in pruneForest (buildForest input) `compareForestVisually` [r|
in pruneForest (buildForestOrFail input) `compareForestVisually` [r|
animalia
|
`- chordata
......@@ -282,14 +287,14 @@ instance Arbitrary TableMapLockStep where
-- /PRECONDITION/: The '_ne_ngrams' field always matches the 'NgramsTerm', key of the map.
buildDestroyForestRoundtrips :: TableMapLockStep -> Property
buildDestroyForestRoundtrips (TableMapLockStep mp) =
(destroyForest . buildForest $ mp) === mp
(destroyForest . buildForestOrFail $ mp) === mp
testPruningNgramsForest_01 :: Property
testPruningNgramsForest_01 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_parent .~ Just "foo")
]
in (pruneForest $ buildForest t1) `compareForestVisually` [r|
in (pruneForest $ buildForestOrFail t1) `compareForestVisually` [r|
foo
|
`- bar
......@@ -297,7 +302,7 @@ testPruningNgramsForest_01 =
testPruningNgramsForest_02 :: Property
testPruningNgramsForest_02 =
(pruneForest $ buildForest hierarchicalTableMap) `compareForestVisually` [r|
(pruneForest $ buildForestOrFail hierarchicalTableMap) `compareForestVisually` [r|
vehicle
|
`- car
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment