Commit 616f2982 authored by Fabien Maniere's avatar Fabien Maniere

Merge branch 'adinapoli/issue-498' into 'dev'

Allow ngrams to be searched even if they appear deeply nested

See merge request !433
parents e0c188ca 30d2b444
Pipeline #7824 passed with stages
in 51 minutes and 17 seconds
......@@ -15,9 +15,6 @@ add get
-}
{-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE IncoherentInstances #-}
......@@ -25,6 +22,7 @@ add get
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TupleSections #-}
module Gargantext.API.Ngrams
......@@ -91,6 +89,8 @@ module Gargantext.API.Ngrams
, filterNgramsNodes
-- * Operations on a forest
, BuildForestError(..)
, renderLoop
, buildForest
, destroyForest
, pruneForest
......@@ -105,12 +105,12 @@ import Data.Map.Strict.Patch qualified as PM
import Data.Patch.Class (Action(act), Transformable(..), ours)
import Data.Set qualified as Set
import Data.Text (isInfixOf, toLower, unpack)
import Data.Text qualified as T
import Data.Text.Lazy.IO as DTL ( writeFile )
import Data.Tree
import Gargantext.API.Ngrams.Tools (getNodeStory)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory hiding (buildForest)
import Gargantext.Core.NodeStory qualified as NodeStory
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, TODO, assertValid, ContextId, HasValidationError)
import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
......@@ -173,10 +173,6 @@ saveNodeStory env nId a = do
let saver = view hasNodeStoryImmediateSaver env
saver nId a
listTypeConflictResolution :: ListType -> ListType -> ListType
listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
ngramsStatePatchConflictResolution :: NgramsType
-> NgramsTerm
-> ConflictResolutionNgramsPatch
......@@ -190,14 +186,6 @@ ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
-- Current state:
-- Insertions are not considered as patches,
-- they do not extend history,
-- they do not bump version.
insertNewOnly :: a -> Maybe b -> a
insertNewOnly m = maybe m (const $ errorTrace "insertNewOnly: impossible")
-- TODO error handling
{- unused
-- TODO refactor with putListNgrams
copyListNgrams :: RepoCmdM env err m
......@@ -438,13 +426,10 @@ filterNgramsNodes :: Maybe ListType
-> Maybe MinSize
-> Maybe MaxSize
-> (NgramsTerm -> Bool)
-> Map NgramsTerm NgramsElement
-> Map NgramsTerm NgramsElement
filterNgramsNodes listTy minSize maxSize searchFn tblMap =
flip Map.mapMaybe tblMap $ \e ->
case matchingNode listTy minSize maxSize searchFn e of
False -> Nothing
True -> Just e
-> Forest NgramsElement
-> Forest NgramsElement
filterNgramsNodes listTy minSize maxSize searchFn =
filter (matchingNode listTy minSize maxSize searchFn)
-- | Returns 'True' if the input 'NgramsElement' satisfies the search criteria
-- mandated by 'NgramsSearchQuery'.
......@@ -452,9 +437,9 @@ matchingNode :: Maybe ListType
-> Maybe MinSize
-> Maybe MaxSize
-> (NgramsTerm -> Bool)
-> NgramsElement
-> Tree NgramsElement
-> Bool
matchingNode listType minSize maxSize searchQuery inputNode =
matchingNode listType minSize maxSize searchQuery (Node inputNode children) =
let nodeSize = inputNode ^. ne_size
matchesListType = maybe (const True) (==) listType
respectsMinSize = maybe (const True) ((<=) . getMinSize) minSize
......@@ -462,15 +447,67 @@ matchingNode listType minSize maxSize searchQuery inputNode =
in respectsMinSize nodeSize
&& respectsMaxSize nodeSize
&& searchQuery (inputNode ^. ne_ngrams)
-- Search for the query either in the root or in the children.
&& (searchQuery (inputNode ^. ne_ngrams) || any (matchingNode listType minSize maxSize searchQuery) children)
&& matchesListType (inputNode ^. ne_list)
-- | Errors returned by 'buildForest'.
data BuildForestError
= -- We found a loop, something that shouldn't normally happen if the calling
-- code is correct by construction, but if that does happen, the value will
-- contain the full path to the cycle.
BFE_loop_detected !(Set VisitedNode)
deriving (Show, Eq)
renderLoop :: Set VisitedNode -> T.Text
renderLoop = T.intercalate " -> " . map (unNgramsTerm . _vn_term) . Set.toAscList
-- | Keeps track of the relative order in which visited a node, to be able to print cycles.
data VisitedNode =
VN { _vn_position :: !Int, _vn_term :: !NgramsTerm }
deriving (Show)
instance Eq VisitedNode where
(VN _ t1) == (VN _ t2) = t1 == t2
instance Ord VisitedNode where
compare (VN _ t1) (VN _ t2) = t1 `compare` t2
type TreeNode = (NgramsTerm, NgramsElement)
-- | Version of 'buildForest' specialised over the 'NgramsElement' as the values of the tree.
-- We can't use a single function to \"rule them all\" because the 'NgramsRepoElement', that
-- the 'NodeStory' uses does not have an 'ngrams' we can use as the key when building and
-- destroying a forest.
buildForest :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForest = map (fmap snd) . NodeStory.buildForest
-- /IMPORTANT/: This functions returns an error in case we found a loop.
buildForest :: Map NgramsTerm NgramsElement -> Either BuildForestError (Forest NgramsElement)
buildForest mp = fmap (map (fmap snd)) . unfoldForestM unfoldNode $ Map.toList mp
where
unfoldNode :: TreeNode -> Either BuildForestError (TreeNode, [TreeNode])
unfoldNode (n, el) = flip evalState (1 :: Int, mempty) . runExceptT $ do
let initialChildren = getChildren (mSetToList $ _ne_children el)
go initialChildren *> pure (mkTreeNode (n, el))
where
go :: [ NgramsElement ]
-> ExceptT BuildForestError (State (Int, Set VisitedNode)) ()
go [] = pure ()
go (x:xs) = do
(pos, visited) <- get
let nt = _ne_ngrams x
case Set.member (VN pos nt) visited of
True -> throwError $ BFE_loop_detected visited
False -> do
put (pos + 1, Set.insert (VN (pos + 1) nt) visited)
go (getChildren (mSetToList $ _ne_children x) <> xs)
mkTreeNode :: TreeNode -> (TreeNode, [TreeNode])
mkTreeNode (k, el) = ((k, el), mapMaybe findChildren $ mSetToList (el ^. ne_children))
findChildren :: NgramsTerm -> Maybe TreeNode
findChildren t = Map.lookup t mp <&> \el -> (t, el)
getChildren :: [NgramsTerm] -> [NgramsElement]
getChildren = mapMaybe (`Map.lookup` mp)
-- | Folds an Ngrams forest back to a table map.
-- This function doesn't aggregate information, but merely just recostructs the original
......@@ -497,15 +534,21 @@ destroyForest f = Map.fromList . map (foldTree destroyTree) $ f
searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
-> NgramsSearchQuery
-- ^ The search query on the retrieved data
-> VersionedWithCount NgramsTable
-> Either BuildForestError (VersionedWithCount NgramsTable)
searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
let tableMap = versionedTableMap ^. v_data
filteredData = filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery tableMap
forestRoots = Set.fromList . Map.elems . destroyForest . buildForest $ filteredData
in case buildForest tableMap of
Left err -> Left err
Right fs ->
let forestRoots = Set.fromList
. Map.elems
. destroyForest
. filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery
$ fs
tableMapSorted = versionedTableMap
& v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ forestRoots)
in toVersionedWithCount (Set.size forestRoots) tableMapSorted
in Right $ toVersionedWithCount (Set.size forestRoots) tableMapSorted
where
-- Sorts the input 'NgramsElement' list.
......@@ -564,8 +607,11 @@ getTableNgrams :: NodeStoryEnv err
getTableNgrams env nodeId listId tabType searchQuery = do
let ngramsType = ngramsTypeFromTabType tabType
versionedInput <- getNgramsTable' env nodeId listId ngramsType
pure $ searchTableNgrams versionedInput searchQuery
-- FIXME(adn) In case of a loop at the moment we just return the
-- empty result set, but we should probably bubble the error upstream.
pure $ case searchTableNgrams versionedInput searchQuery of
Left _err -> VersionedWithCount 0 0 (NgramsTable mempty)
Right x -> x
-- | Helper function to get the ngrams table with scores.
getNgramsTable' :: NodeStoryEnv err
......@@ -610,11 +656,6 @@ setNgramsTableScores nId listId ngramsType table = do
-- TODO: find a better place for the code above, All APIs stay here
needsScores :: Maybe OrderBy -> Bool
needsScores (Just ScoreAsc) = True
needsScores (Just ScoreDesc) = True
needsScores _ = False
getTableNgramsCorpusHandler :: (IsDBCmd err env m, HasNodeStoryEnv err env)
=> NodeId
-> TabType
......
......@@ -208,7 +208,7 @@ instance ToSchema NgramsElement where
instance NFData NgramsElement where
------------------------------------------------------------------------
newtype NgramsTable = NgramsTable [NgramsElement]
newtype NgramsTable = NgramsTable { getNgramsTable :: [NgramsElement] }
deriving stock (Ord, Eq, Generic, Show)
deriving newtype (ToJSON, FromJSON)
deriving anyclass (ToExpr)
......
......@@ -14,18 +14,22 @@ Portability : POSIX
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Test.Instances
where
module Test.Instances where
import Control.Lens hiding (elements)
import Data.List.NonEmpty qualified as NE
import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PM
import Data.Patch.Class (Replace, replace)
import Data.Set qualified as Set
import Data.Text qualified as T
import Data.Validity (Validation(..), ValidationChain (..), prettyValidation)
import EPO.API.Client.Types qualified as EPO
import Gargantext.API.Admin.Auth.Types (AuthenticatedUser, ForgotPasswordAsyncParams)
import Gargantext.API.Admin.Orchestrator.Types qualified as Orch
import Gargantext.API.Errors.Types qualified as Errors
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Types qualified as Ngrams
import Gargantext.API.Node.Contact.Types (AddContactParams)
import Gargantext.API.Node.Corpus.Annuaire (AnnuaireWithForm)
......@@ -33,14 +37,14 @@ import Gargantext.API.Node.Corpus.New (ApiInfo)
import Gargantext.API.Node.Corpus.New.Types (FileFormat, FileType)
import Gargantext.API.Node.Corpus.Types (Datafield)
import Gargantext.API.Node.Corpus.Types qualified as CT
import Gargantext.API.Node.DocumentsFromWriteNodes.Types qualified as DFWN
import Gargantext.API.Node.DocumentUpload.Types (DocumentUpload)
import Gargantext.API.Node.DocumentsFromWriteNodes.Types qualified as DFWN
import Gargantext.API.Node.FrameCalcUpload.Types qualified as FCU
import Gargantext.API.Node.Get (GetNodeParams)
import Gargantext.API.Node.New.Types (PostNode(..))
import Gargantext.API.Node.Share.Types (ShareNodeParams(..))
import Gargantext.API.Node.Update.Types qualified as NU
import Gargantext.API.Node.Types (NewWithForm, NewWithTempFile(..), RenameNode(..), WithQuery)
import Gargantext.API.Node.Update.Types qualified as NU
import Gargantext.API.Public.Types (PublicData(..))
import Gargantext.API.Routes.Named.Publish (PublishRequest(..))
import Gargantext.API.Routes.Named.Remote (RemoteExportRequest(..))
......@@ -61,12 +65,12 @@ import Gargantext.Database.Admin.Types.Node (NodeId(..), UserId(..), NodeType(..
import Gargantext.Database.Query.Facet (OrderBy(..))
import Gargantext.Prelude hiding (replace, Location)
import Servant.Client.Core.BaseUrl (BaseUrl(..), Scheme(Http))
import Text.Parsec.Error (ParseError, Message(..), newErrorMessage)
import Text.Parsec.Pos
import Test.QuickCheck
import Test.QuickCheck.Arbitrary.Generic
import Test.QuickCheck.Instances.Text ()
import Test.QuickCheck.Instances.Vector ()
import Text.Parsec.Error (ParseError, Message(..), newErrorMessage)
import Text.Parsec.Pos
instance Arbitrary AuthenticatedUser where arbitrary = genericArbitrary
......@@ -500,14 +504,14 @@ instance Arbitrary Ngrams.NgramsElement where
-- because we still want to simulate potential hierarchies, i.e. forests of ngrams.
-- so we sample the ngrams terms from a selection, and we restrict the number of max
-- children for each 'NgramsElement' to the size parameter to not have very large trees.
arbitrary = do
arbitrary = sized $ \n -> do
_ne_ngrams <- arbitrary
_ne_size <- arbitrary
_ne_size <- getPositive <$> arbitrary -- it doesn't make sense to have a negative size
_ne_list <- arbitrary
_ne_occurrences <- arbitrary
_ne_occurrences <- resize n arbitrary
_ne_root <- arbitrary `suchThat` (maybe True (\x -> x /= _ne_ngrams)) -- can't be root of itself
_ne_parent <- arbitrary `suchThat` (maybe True (\x -> x /= _ne_ngrams)) -- can't be parent of itself
_ne_children <- Ngrams.mSetFromList <$> (sized (\n -> vectorOf n arbitrary `suchThat` (\x -> _ne_ngrams `notElem` x))) -- can't be cyclic
_ne_children <- Ngrams.mSetFromList <$> (vectorOf n arbitrary `suchThat` (\x -> _ne_ngrams `notElem` x)) -- can't be cyclic
pure Ngrams.NgramsElement{..}
instance Arbitrary Ngrams.NgramsTable where
......@@ -738,3 +742,53 @@ instance Arbitrary Job where
recomputeGraphGen = RecomputeGraph <$> arbitrary
updateNodeGen = UpdateNode <$> arbitrary <*> arbitrary
uploadDocumentGen = UploadDocument <$> arbitrary <*> arbitrary
-- | An 'AcyclicTableMap' models a map that associates an 'NgramsTerm' with its 'NgramsElement'.
-- In particular, there are a few preconditions that govers its QuickCheck generator:
--
-- 1. The key matches the '_ne_ngrams' field of each 'NgramsElement';
-- 2. Acyclic: loops are removed.
-- The 'AcyclicTableMap' also carries a random element of the map, which can be
-- used in other tests requiring more specific orchestrations (for example query tests, where
-- we need to be searching for an element in the collection).
data AcyclicTableMap =
AcyclicTableMap
{ getTableMap :: Map NgramsTerm NgramsElement
, randomMapElement :: NgramsElement
}
deriving (Show, Eq)
-- | Generate a hierarchy of ngrams element /WITH NO LOOPS/.
-- Furthermore, we need to ensure that when we generate elements, we are not
-- accidentally creating duplicates, because ggtx won't allow us (in the frontend, at least)
-- to create (say) a MapTerm called \"foo\" if we have already a candidate term with that name.
genCorpusWithMatchingElement :: Gen AcyclicTableMap
genCorpusWithMatchingElement = do
-- cap the depth of the tree, to not make the tests too slow
depth <- choose (1, 5)
let mkEntry = do
trm <- arbitrary
el <- over ne_children (breakLoop trm) <$> (resize depth arbitrary)
pure (trm, el { _ne_ngrams = trm })
-- Let's build the map first, so that duplicates will be overwritten.
fullMap <- (Map.fromList <$> vectorOf depth mkEntry) `suchThat` (\x -> isRight (buildForest x)) -- exclude loops
let (hd NE.:| _) = NE.fromList $ Map.elems fullMap
pure $ AcyclicTableMap fullMap hd
where
breakLoop :: NgramsTerm -> MSet NgramsTerm -> MSet NgramsTerm
breakLoop t = mSetFromSet . Set.delete t . mSetToSet
instance Arbitrary AcyclicTableMap where
arbitrary = genCorpusWithMatchingElement
shrink = shrinkTree
shrinkTree :: AcyclicTableMap -> [AcyclicTableMap]
shrinkTree (AcyclicTableMap mp el) =
[ AcyclicTableMap (Map.insert k shrunk mp) (if k == _ne_ngrams el then shrunk else el)
| (k, ne) <- Map.toList mp
, shrunkChildren <- shrinkSet (_ne_children ne)
, let shrunk = ne { _ne_children = shrunkChildren }
]
shrinkSet :: Ord a => MSet a -> [MSet a]
shrinkSet s = map mSetFromList (shrinkList (const []) (mSetToList s))
{-# LANGUAGE TypeApplications #-}
module Test.Ngrams.Query (tests, mkMapTerm) where
{-# OPTIONS_GHC -Wno-deprecations #-}
module Test.Ngrams.Query (
tests
, mkMapTerm
, hierarchicalTableMap
) where
import Control.Lens
import Control.Monad
import Data.Coerce
import Data.Map.Strict qualified as Map
......@@ -9,16 +15,29 @@ import Data.Patch.Class qualified as Patch
import Data.String
import Data.Text qualified as T
import Data.Validity qualified as Validity
import Text.Collate qualified as Unicode
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main
import Gargantext.Core.Types.Query
import Gargantext.Prelude
import Test.Ngrams.Query.PaginationCorpus
import Test.HUnit
import Test.Hspec
import Test.Hspec.QuickCheck
import Test.Ngrams.Query.PaginationCorpus
import Test.QuickCheck
import Test.Utils ((@??=))
import Text.Collate qualified as Unicode
import Test.Instances
hierarchicalTableMap :: Map NgramsTerm NgramsElement
hierarchicalTableMap = Map.fromList [
("vehicle", mkMapTerm "vehicle" & ne_children .~ mSetFromList ["car"])
, ("car", mkMapTerm "car" & ne_root .~ Just "vehicle"
& ne_parent .~ Just "vehicle"
& ne_children .~ mSetFromList ["ford"])
, ("ford", mkMapTerm "ford" & ne_root .~ Just "vehicle"
& ne_parent .~ Just "car")
]
tests :: Spec
......@@ -56,6 +75,8 @@ unitTests = describe "Query tests" $ do
it "Simple query (listType = StopTerm)" testFlat04
-- -- Full text search
it "Simple query (search with match)" testFlat05
prop "Searching something that is there should always succeed" testForestSearchProp
it "Searching for nested terms should succeed" testSearchNestedTerms
-- -- Pagination
it "Simple pagination on all terms" test_pagination_allTerms
it "Simple pagination on MapTerm" test_pagination01
......@@ -74,7 +95,7 @@ unitTests = describe "Query tests" $ do
testFlat01 :: Assertion
testFlat01 = do
let res = searchTableNgrams mockFlatCorpus searchQuery
res @?= VersionedWithCount 0 2 ( NgramsTable [curryElem, elbaElem] )
res @?= Right (VersionedWithCount 0 2 ( NgramsTable [curryElem, elbaElem] ))
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 5
......@@ -91,7 +112,7 @@ testFlat01 = do
testFlat02 :: Assertion
testFlat02 = do
let res = searchTableNgrams mockFlatCorpus searchQuery
res @?= VersionedWithCount 0 2 ( NgramsTable [elbaElem, curryElem] )
res @?= Right (VersionedWithCount 0 2 ( NgramsTable [elbaElem, curryElem] ))
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 5
......@@ -112,7 +133,7 @@ testSortDiacriticsDucet = do
testNaturalSortAsceding :: Assertion
testNaturalSortAsceding = do
let res = searchTableNgrams frenchCorpus searchQuery
res @??= VersionedWithCount 0 4 ( NgramsTable $ map mkMapTerm [ "âge", "étude", "période", "vue" ])
res @?= Right (VersionedWithCount 0 4 ( NgramsTable $ map mkMapTerm [ "âge", "étude", "période", "vue" ]))
where
frenchCorpus :: Versioned (Map NgramsTerm NgramsElement)
......@@ -136,7 +157,7 @@ testNaturalSortAsceding = do
testNaturalSortDescending :: Assertion
testNaturalSortDescending = do
let res = searchTableNgrams frenchCorpus searchQuery
res @??= VersionedWithCount 0 4 ( NgramsTable $ map mkMapTerm [ "vue", "période", "étude", "âge" ])
res @?= Right (VersionedWithCount 0 4 ( NgramsTable $ map mkMapTerm [ "vue", "période", "étude", "âge" ]))
where
frenchCorpus :: Versioned (Map NgramsTerm NgramsElement)
......@@ -161,7 +182,7 @@ testNaturalSortDescending = do
testFlat03 :: Assertion
testFlat03 = do
let res = searchTableNgrams mockFlatCorpus searchQuery
res @?= VersionedWithCount 0 2 ( NgramsTable [elbaElem, curryElem] )
res @?= Right (VersionedWithCount 0 2 ( NgramsTable [elbaElem, curryElem] ))
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 5
......@@ -179,7 +200,7 @@ testFlat03 = do
testFlat04 :: Assertion
testFlat04 = do
let res = searchTableNgrams mockFlatCorpus searchQuery
res @?= VersionedWithCount 0 0 ( NgramsTable [] )
res @?= Right (VersionedWithCount 0 0 ( NgramsTable [] ))
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 5
......@@ -196,7 +217,7 @@ testFlat04 = do
testFlat05 :: Assertion
testFlat05 = do
let res = searchTableNgrams mockFlatCorpus searchQuery
res @?= VersionedWithCount 0 1 ( NgramsTable [curryElem] )
res @?= Right (VersionedWithCount 0 1 ( NgramsTable [curryElem] ))
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 5
......@@ -208,12 +229,50 @@ testFlat05 = do
, _nsq_searchQuery = mockQueryFn (Just "curry")
}
-- | Property that tests that if we make a search for a given term that we know it's
-- present in the list, we need to get it back, either directly (i.e. a single match) or
-- indirectly (i.e. present in the list of results, because it's included in a hierarchy of nodes).
testForestSearchProp :: Property
testForestSearchProp = forAll arbitrary $ \(AcyclicTableMap ngramsTable el) -> do
case searchTableNgrams (Versioned 0 ngramsTable) (searchQuery el) of
Left (BFE_loop_detected err) -> fail (T.unpack $ renderLoop err)
Right res -> res ^. vc_data `shouldSatisfy` (elem (_ne_ngrams el) . map _ne_ngrams . getNgramsTable)
where
searchQuery term = NgramsSearchQuery {
_nsq_limit = Limit 5
, _nsq_offset = Nothing
, _nsq_listType = Just $ _ne_list term -- search using the list of the candidate
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just TermDesc
, _nsq_searchQuery = mockQueryFn (Just $ unNgramsTerm $ _ne_ngrams term)
}
-- | In this test we check that if we have nested terms, they will still show up in search.
-- In this test we have a nested hierarchy of a level-2 tree, and we search for the children,
-- and it still shows up.
testSearchNestedTerms :: Assertion
testSearchNestedTerms = do
case searchTableNgrams (Versioned 0 hierarchicalTableMap) searchQuery of
Left (BFE_loop_detected err) -> fail (T.unpack $ renderLoop err)
Right res -> res ^. vc_data `shouldSatisfy` (elem "ford" . map _ne_ngrams . getNgramsTable)
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 5
, _nsq_offset = Nothing
, _nsq_listType = Nothing
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just TermDesc
, _nsq_searchQuery = mockQueryFn (Just "ford")
}
-- Pagination tests
test_pagination_allTerms :: Assertion
test_pagination_allTerms = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 10 ( NgramsTable [ haskellElem
res @?= Right (VersionedWithCount 0 10 ( NgramsTable [ haskellElem
, sideEffectsElem
, concHaskellElem
, implementationElem
......@@ -221,7 +280,7 @@ test_pagination_allTerms = do
, languagesElem
, javaElem
, termsElem
] )
] ))
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 8
......@@ -238,7 +297,7 @@ test_pagination_allTerms = do
test_pagination01 :: Assertion
test_pagination01 = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 4 ( NgramsTable [implementationElem, languagesElem, termsElem, proofElem] )
res @?= Right (VersionedWithCount 0 4 ( NgramsTable [implementationElem, languagesElem, termsElem, proofElem] ))
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 5
......@@ -253,7 +312,7 @@ test_pagination01 = do
test_pagination02 :: Assertion
test_pagination02 = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 4 ( NgramsTable [implementationElem, languagesElem, termsElem] )
res @?= Right (VersionedWithCount 0 4 ( NgramsTable [implementationElem, languagesElem, termsElem] ))
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 3
......@@ -268,7 +327,7 @@ test_pagination02 = do
test_pagination02_offset :: Assertion
test_pagination02_offset = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 4 ( NgramsTable [termsElem, proofElem] )
res @?= Right (VersionedWithCount 0 4 ( NgramsTable [termsElem, proofElem] ))
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 2
......@@ -283,7 +342,7 @@ test_pagination02_offset = do
test_pagination03 :: Assertion
test_pagination03 = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 4 ( NgramsTable [sideEffectsElem, ooElem, javaElem] )
res @?= Right (VersionedWithCount 0 4 ( NgramsTable [sideEffectsElem, ooElem, javaElem] ))
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 3
......@@ -298,7 +357,7 @@ test_pagination03 = do
test_pagination03_offset :: Assertion
test_pagination03_offset = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 4 ( NgramsTable [javaElem, pascalElem] )
res @?= Right (VersionedWithCount 0 4 ( NgramsTable [javaElem, pascalElem] ))
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 2
......@@ -313,7 +372,7 @@ test_pagination03_offset = do
test_pagination04 :: Assertion
test_pagination04 = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 2 ( NgramsTable [haskellElem] )
res @?= Right (VersionedWithCount 0 2 ( NgramsTable [haskellElem] ))
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 1
......@@ -327,7 +386,9 @@ test_pagination04 = do
test_paginationQuantum :: Assertion
test_paginationQuantum = do
let res = searchTableNgrams quantumComputingCorpus searchQuery
case searchTableNgrams quantumComputingCorpus searchQuery of
Left err -> fail (show err)
Right res -> do
let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res
length elems @?= 10
forM_ elems $ \term ->
......@@ -345,7 +406,9 @@ test_paginationQuantum = do
test_paginationQuantum_02 :: Assertion
test_paginationQuantum_02 = do
let res = searchTableNgrams quantumComputingCorpus searchQuery
case searchTableNgrams quantumComputingCorpus searchQuery of
Left err -> fail (show err)
Right res -> do
let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res
assertBool ("found only " <> show (length elems) <> " in: " <> show elems) (length elems == 10)
where
......
......@@ -13,7 +13,7 @@ import Data.Map.Strict (Map)
import Data.Map.Strict qualified as Map
import Data.Text qualified as T
import Data.Tree
import Gargantext.API.Ngrams (filterNgramsNodes, buildForest, destroyForest, pruneForest)
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Types qualified as NT
import Gargantext.Core
......@@ -26,8 +26,8 @@ import Gargantext.Database.Schema.Context
import Test.HUnit
import Test.Hspec
import Test.Hspec.QuickCheck (prop)
import Test.Instances ()
import Test.Ngrams.Query (mkMapTerm)
import Test.Instances (AcyclicTableMap(..))
import Test.Ngrams.Query (mkMapTerm, hierarchicalTableMap)
import Test.QuickCheck
import Test.QuickCheck qualified as QC
import Text.RawString.QQ (r)
......@@ -113,19 +113,9 @@ tests = describe "Ngrams" $ do
describe "hierarchical grouping" $ do
it "filterNgramsNodes with empty query is identity" testFilterNgramsNodesEmptyQuery
hierarchicalTableMap :: Map NgramsTerm NgramsElement
hierarchicalTableMap = Map.fromList [
("vehicle", mkMapTerm "vehicle" & ne_children .~ mSetFromList ["car"])
, ("car", mkMapTerm "car" & ne_root .~ Just "vehicle"
& ne_parent .~ Just "vehicle"
& ne_children .~ mSetFromList ["ford"])
, ("ford", mkMapTerm "ford" & ne_root .~ Just "vehicle"
& ne_parent .~ Just "car")
]
testFilterNgramsNodesEmptyQuery :: Assertion
testFilterNgramsNodesEmptyQuery = do
let input = hierarchicalTableMap
let input = buildForestOrFail hierarchicalTableMap
let actual = filterNgramsNodes (Just MapTerm) Nothing Nothing (const True) input
actual @?= input
......@@ -149,6 +139,11 @@ newtype ASCIIForest = ASCIIForest String
instance Show ASCIIForest where
show (ASCIIForest x) = x
buildForestOrFail :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForestOrFail mp = case buildForest mp of
Left (BFE_loop_detected treeLoop) -> error (T.unpack $ renderLoop treeLoop)
Right x -> x
compareForestVisually :: Forest NgramsElement -> String -> Property
compareForestVisually f expected =
let actual = init $ drawForest (map (fmap renderEl) f)
......@@ -173,7 +168,7 @@ testBuildNgramsTree_01 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_parent .~ Just "foo")
]
in (buildForest t1) `compareForestVisually` [r|
in (buildForestOrFail t1) `compareForestVisually` [r|
bar
foo
......@@ -183,7 +178,7 @@ testBuildNgramsTree_01 =
testBuildNgramsTree_02 :: Property
testBuildNgramsTree_02 =
buildForest hierarchicalTableMap `compareForestVisually` [r|
buildForestOrFail hierarchicalTableMap `compareForestVisually` [r|
car
|
`- ford
......@@ -246,7 +241,7 @@ testBuildNgramsTree_03 =
)
]
in pruneForest (buildForest input) `compareForestVisually` [r|
in pruneForest (buildForestOrFail input) `compareForestVisually` [r|
animalia
|
`- chordata
......@@ -271,25 +266,17 @@ testBuildNgramsTree_03 =
|]
newtype TableMapLockStep = TableMapLockStep { getTableMap :: Map NgramsTerm NgramsElement }
deriving (Show, Eq)
instance Arbitrary TableMapLockStep where
arbitrary = do
pairs <- map (\(k,v) -> (k, v & ne_ngrams .~ k)) <$> arbitrary
pure $ TableMapLockStep (Map.fromList pairs)
-- /PRECONDITION/: The '_ne_ngrams' field always matches the 'NgramsTerm', key of the map.
buildDestroyForestRoundtrips :: TableMapLockStep -> Property
buildDestroyForestRoundtrips (TableMapLockStep mp) =
(destroyForest . buildForest $ mp) === mp
buildDestroyForestRoundtrips :: AcyclicTableMap -> Property
buildDestroyForestRoundtrips (AcyclicTableMap mp _) =
(destroyForest . buildForestOrFail $ mp) === mp
testPruningNgramsForest_01 :: Property
testPruningNgramsForest_01 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_parent .~ Just "foo")
]
in (pruneForest $ buildForest t1) `compareForestVisually` [r|
in (pruneForest $ buildForestOrFail t1) `compareForestVisually` [r|
foo
|
`- bar
......@@ -297,7 +284,7 @@ testPruningNgramsForest_01 =
testPruningNgramsForest_02 :: Property
testPruningNgramsForest_02 =
(pruneForest $ buildForest hierarchicalTableMap) `compareForestVisually` [r|
(pruneForest $ buildForestOrFail hierarchicalTableMap) `compareForestVisually` [r|
vehicle
|
`- car
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment