Commit ca82be78 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

destroyForest . buildForest == id roundtrip property

parent 4b0e60f2
......@@ -472,21 +472,17 @@ buildForest mp = unfoldForest mkTreeNode (Map.toList mp)
findChildren t = Map.lookup t mp <&> \el -> (t, el)
-- | Folds an Ngrams forest back to a table map.
-- FIXME(adn) propagate the root information.
-- This function doesn't aggregate information, but merely just recostructs the original
-- map without loss of information. To perform operations on the forest, use the appropriate
-- functions.
destroyForest :: Forest NgramsElement -> Map NgramsTerm NgramsElement
destroyForest f = Map.fromList . map (foldTree destroyTree) $ f
where
destroyTree :: NgramsElement -> [(NgramsTerm, NgramsElement)] -> (NgramsTerm, NgramsElement)
destroyTree rootEl childrenEl = (_ne_ngrams rootEl, squashElements rootEl childrenEl)
-- Given a list of children, generate a single node that has as the parent
-- the children, as the score the sum of the individual elements.
squashElements :: NgramsElement -> [(NgramsTerm, NgramsElement)] -> NgramsElement
squashElements r c =
r { _ne_size = _ne_size r <> sum (map (_ne_size . snd) c)
, _ne_occurrences = _ne_occurrences r <> (mconcat $ map (_ne_occurrences . snd) c)
, _ne_children = mSetFromList $ map fst c
}
squashElements r _ = r
-- | TODO Errors management
-- TODO: polymorphic for Annuaire or Corpus or ...
......
......@@ -12,6 +12,7 @@ Portability : POSIX
{-# OPTIONS_GHC -fconstraint-solver-iterations=10 #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Test.Instances
where
......@@ -382,15 +383,133 @@ instance Arbitrary DET.WSRequest where
, pure DET.WSDeauthorize ]
arbitraryNgramsTerm :: Gen Ngrams.NgramsTerm
arbitraryNgramsTerm = elements
[ "time"
, "year"
, "people"
, "way"
, "day"
, "man"
, "thing"
, "woman"
, "life"
, "child"
, "world"
, "school"
, "state"
, "family"
, "student"
, "group"
, "country"
, "problem"
, "hand"
, "part"
, "place"
, "case"
, "week"
, "company"
, "system"
, "program"
, "question"
, "work"
, "government"
, "number"
, "night"
, "point"
, "home"
, "water"
, "room"
, "mother"
, "area"
, "money"
, "story"
, "fact"
, "month"
, "lot"
, "right"
, "study"
, "book"
, "eye"
, "job"
, "word"
, "business"
, "issue"
, "side"
, "kind"
, "head"
, "house"
, "service"
, "friend"
, "father"
, "power"
, "hour"
, "game"
, "line"
, "end"
, "member"
, "law"
, "car"
, "city"
, "community"
, "name"
, "president"
, "team"
, "minute"
, "idea"
, "kid"
, "body"
, "information"
, "back"
, "parent"
, "face"
, "others"
, "level"
, "office"
, "door"
, "health"
, "person"
, "art"
, "war"
, "history"
, "party"
, "result"
, "change"
, "morning"
, "reason"
, "research"
, "girl"
, "guy"
, "moment"
, "air"
, "teacher"
, "force"
, "education"
]
-- Ngrams
instance Arbitrary a => Arbitrary (Ngrams.MSet a)
-- We cannot pick some completely arbitrary values for the ngrams terms,
-- see the rationale in the instance for 'NgramsElement'.
instance Arbitrary Ngrams.NgramsTerm where
arbitrary = Ngrams.NgramsTerm <$>
-- we take into accoutn the fact, that tojsonkey strips the text
(arbitrary `suchThat` (\t -> t == T.strip t))
arbitrary = arbitraryNgramsTerm
instance Arbitrary Ngrams.TabType where arbitrary = arbitraryBoundedEnum
instance Arbitrary Ngrams.NgramsElement where
arbitrary = elements [Ngrams.newNgramsElement Nothing "sport"]
-- We cannot pick some completely arbitrary values for the ngrams elements
-- because we still want to simulate potential hierarchies, i.e. forests of ngrams.
-- so we sample the ngrams terms from a selection, and we restrict the number of max
-- children for each 'NgramsElement' to the size parameter to not have very large trees.
arbitrary = do
_ne_ngrams <- arbitrary
_ne_size <- arbitrary
_ne_list <- arbitrary
_ne_occurrences <- arbitrary
_ne_root <- arbitrary `suchThat` (maybe True (\x -> x /= _ne_ngrams)) -- can't be root of itself
_ne_parent <- arbitrary `suchThat` (maybe True (\x -> x /= _ne_ngrams)) -- can't be parent of itself
_ne_children <- Ngrams.mSetFromList <$> (sized (\n -> vectorOf n arbitrary `suchThat` (\x -> _ne_ngrams `notElem` x))) -- can't be cyclic
pure Ngrams.NgramsElement{..}
instance Arbitrary Ngrams.NgramsTable where
arbitrary = pure ngramsMockTable
instance Arbitrary Ngrams.OrderBy where arbitrary = arbitraryBoundedEnum
......
......@@ -8,7 +8,7 @@ import Prelude
import Control.Lens
import Data.Map.Strict qualified as Map
import Data.Text qualified as T
import Gargantext.API.Ngrams (filterNgramsNodes, buildForest)
import Gargantext.API.Ngrams (filterNgramsNodes, buildForest, destroyForest)
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Types qualified as NT
import Gargantext.Core
......@@ -28,6 +28,7 @@ import Data.Tree
import Text.RawString.QQ (r)
import Data.Char (isSpace)
import Data.Map.Strict (Map)
import Test.Hspec.QuickCheck (prop)
genScientificText :: Gen T.Text
......@@ -103,6 +104,7 @@ tests = describe "Ngrams" $ do
describe "ngram forests" $ do
it "building a simple tree works" testBuildNgramsTree_01
it "building a complex tree works" testBuildNgramsTree_02
prop "destroyForest . buildForest === id" buildDestroyForestRoundtrips
describe "hierarchical grouping" $ do
it "filterNgramsNodes with empty query is identity" testFilterNgramsNodesEmptyQuery
......@@ -189,3 +191,16 @@ testBuildNgramsTree_02 =
|
`- ford
|]
newtype TableMapLockStep = TableMapLockStep { getTableMap :: Map NgramsTerm NgramsElement }
deriving (Show, Eq)
instance Arbitrary TableMapLockStep where
arbitrary = do
pairs <- map (\(k,v) -> (k, v & ne_ngrams .~ k)) <$> arbitrary
pure $ TableMapLockStep (Map.fromList pairs)
-- /PRECONDITION/: The '_ne_ngrams' field always matches the 'NgramsTerm', key of the map.
buildDestroyForestRoundtrips :: TableMapLockStep -> Property
buildDestroyForestRoundtrips (TableMapLockStep mp) =
(destroyForest . buildForest $ mp) === mp
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment