Commit fca2b73e authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Clean up a bit the NgramsTree API

I think we can use the NgramsTree and NgramsForest types to our
advantage for the 'searchTableNgrams' & co, so I have revamped that
module to put it to good use.
parent 0314bf55
......@@ -10,10 +10,17 @@ Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.API.Ngrams.NgramsTree
where
( -- * Types
NgramsForest(..)
, NgramsTree
, GeneralisedNgramsTree(..)
-- * Construction
, toNgramsTree
, toNgramsForest
) where
import Data.Aeson
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
......@@ -23,41 +30,73 @@ import Data.Tree ( Tree(Node), unfoldForest )
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main ( ListType(..) )
import Gargantext.Database.Admin.Types.Node ( NodeId )
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Gargantext.Prelude
import Test.QuickCheck ( Arbitrary(arbitrary) )
type Children = Text
type Root = Text
-- | Ngrams forms a forest, i.e. a set of trees, each tree represents a strong grouping
-- between terms. Each tree has a root and some children, of arbitrary depth. We use
-- this data structure internally to represent ngrams tables in a principled way, and later
-- we \"render\" them back into an 'NgramsTable' and/or a set of ngrams elements, where
-- each 'NgramElement' is a standalone tree.
--
-- Properties:
--
-- * Aciclic: each tree is a DAG, and therefore there must be no cycles within the tree,
-- and no cycles between trees in the forest.
--
newtype NgramsForest =
NgramsForest { getNgramsForest :: [NgramsTree] }
deriving (Show, Eq, Ord)
type NgramsTree = GeneralisedNgramsTree Text Int
data NgramsTree = NgramsTree { mt_label :: Text
, mt_value :: Double
, mt_children :: [NgramsTree]
}
deriving (Generic, Show, Eq)
-- | Models a general ngram tree polymorphic over a label 'l' and a measure 'm'.
data GeneralisedNgramsTree l m =
GeneralisedNgramsTree { mt_label :: l
, mt_value :: m
, mt_children :: [NgramsTree]
}
deriving (Generic, Show, Eq, Ord)
toNgramsTree :: Tree (NgramsTerm,Double) -> NgramsTree
toNgramsTree (Node (NgramsTerm l,v) xs) = NgramsTree l v (map toNgramsTree xs)
instance (ToJSON l, ToJSON m) => ToJSON (GeneralisedNgramsTree l m) where
toJSON (GeneralisedNgramsTree l m children) =
object [ "label" .= toJSON l
, "value" .= toJSON m
, "children" .= toJSON children
]
deriveJSON (unPrefix "mt_") ''NgramsTree
instance (FromJSON l, FromJSON m) => FromJSON (GeneralisedNgramsTree l m) where
parseJSON = withObject "NgramsTree" $ \o -> do
mt_label <- o .: "label"
mt_value <- o .: "value"
mt_children <- o .: "children"
pure $ GeneralisedNgramsTree{..}
instance ToSchema NgramsTree where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "mt_")
instance Arbitrary NgramsTree
where
arbitrary = NgramsTree <$> arbitrary <*> arbitrary <*> arbitrary
arbitrary = GeneralisedNgramsTree <$> arbitrary <*> arbitrary <*> arbitrary
--
-- Constructing trees and forests
--
toNgramsTree :: Tree (NgramsTerm,Int) -> NgramsTree
toNgramsTree (Node (NgramsTerm l,v) xs) = GeneralisedNgramsTree l v (map toNgramsTree xs)
toTree :: ListType
-> HashMap NgramsTerm (Set NodeId)
-> HashMap NgramsTerm NgramsRepoElement
-> [NgramsTree]
toTree lt vs m = map toNgramsTree $ unfoldForest buildNode roots
toNgramsForest :: ListType
-> HashMap NgramsTerm (Set NodeId)
-> HashMap NgramsTerm NgramsRepoElement
-> NgramsForest
toNgramsForest lt vs m = NgramsForest $ map toNgramsTree $ unfoldForest buildNode roots
where
buildNode r = maybe ((r, value r),[])
(\x -> ((r, value r), mSetToList $ _nre_children x))
(HashMap.lookup r m)
value l = maybe 0 (fromIntegral . Set.size) $ HashMap.lookup l vs
value l = maybe 0 Set.size $ HashMap.lookup l vs
rootsCandidates :: [NgramsTerm]
rootsCandidates = catMaybes
......
......@@ -17,7 +17,7 @@ import Data.List qualified as List
import Data.Map.Strict (toList)
import Data.Set qualified as Set
import Data.Vector qualified as V
import Gargantext.API.Ngrams.NgramsTree ( toTree, NgramsTree )
import Gargantext.API.Ngrams.NgramsTree ( toNgramsForest, NgramsTree, getNgramsForest )
import Gargantext.API.Ngrams.Tools ( filterListWithRoot, getListNgrams, getRepo, mapTermListRoot )
import Gargantext.API.Ngrams.Types ( NgramsTerm(NgramsTerm) )
import Gargantext.Core.NodeStory.Types ( NodeStoryEnv )
......@@ -91,4 +91,4 @@ treeData env cId nt lt = do
cs' <- HashMap.map (Set.map contextId2NodeId) <$> getContextsByNgramsOnlyUser cId (ls' <> ls) nt terms
m <- getListNgrams env ls nt
pure $ V.fromList $ toTree lt cs' m
pure $ V.fromList $ getNgramsForest $ toNgramsForest lt cs' m
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment