Commit fca2b73e authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Clean up a bit the NgramsTree API

I think we can use the NgramsTree and NgramsForest types to our
advantage for the 'searchTableNgrams' & co, so I have revamped that
module to put it to good use.
parent 0314bf55
...@@ -10,10 +10,17 @@ Portability : POSIX ...@@ -10,10 +10,17 @@ Portability : POSIX
-} -}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
module Gargantext.API.Ngrams.NgramsTree module Gargantext.API.Ngrams.NgramsTree
where ( -- * Types
NgramsForest(..)
, NgramsTree
, GeneralisedNgramsTree(..)
-- * Construction
, toNgramsTree
, toNgramsForest
) where
import Data.Aeson
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List import Data.List qualified as List
...@@ -23,41 +30,73 @@ import Data.Tree ( Tree(Node), unfoldForest ) ...@@ -23,41 +30,73 @@ import Data.Tree ( Tree(Node), unfoldForest )
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main ( ListType(..) ) import Gargantext.Core.Types.Main ( ListType(..) )
import Gargantext.Database.Admin.Types.Node ( NodeId ) import Gargantext.Database.Admin.Types.Node ( NodeId )
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger) import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Gargantext.Prelude import Gargantext.Prelude
import Test.QuickCheck ( Arbitrary(arbitrary) ) import Test.QuickCheck ( Arbitrary(arbitrary) )
type Children = Text -- | Ngrams forms a forest, i.e. a set of trees, each tree represents a strong grouping
type Root = Text -- between terms. Each tree has a root and some children, of arbitrary depth. We use
-- this data structure internally to represent ngrams tables in a principled way, and later
-- we \"render\" them back into an 'NgramsTable' and/or a set of ngrams elements, where
-- each 'NgramElement' is a standalone tree.
--
-- Properties:
--
-- * Aciclic: each tree is a DAG, and therefore there must be no cycles within the tree,
-- and no cycles between trees in the forest.
--
newtype NgramsForest =
NgramsForest { getNgramsForest :: [NgramsTree] }
deriving (Show, Eq, Ord)
type NgramsTree = GeneralisedNgramsTree Text Int
data NgramsTree = NgramsTree { mt_label :: Text -- | Models a general ngram tree polymorphic over a label 'l' and a measure 'm'.
, mt_value :: Double data GeneralisedNgramsTree l m =
, mt_children :: [NgramsTree] GeneralisedNgramsTree { mt_label :: l
} , mt_value :: m
deriving (Generic, Show, Eq) , mt_children :: [NgramsTree]
}
deriving (Generic, Show, Eq, Ord)
toNgramsTree :: Tree (NgramsTerm,Double) -> NgramsTree instance (ToJSON l, ToJSON m) => ToJSON (GeneralisedNgramsTree l m) where
toNgramsTree (Node (NgramsTerm l,v) xs) = NgramsTree l v (map toNgramsTree xs) toJSON (GeneralisedNgramsTree l m children) =
object [ "label" .= toJSON l
, "value" .= toJSON m
, "children" .= toJSON children
]
deriveJSON (unPrefix "mt_") ''NgramsTree instance (FromJSON l, FromJSON m) => FromJSON (GeneralisedNgramsTree l m) where
parseJSON = withObject "NgramsTree" $ \o -> do
mt_label <- o .: "label"
mt_value <- o .: "value"
mt_children <- o .: "children"
pure $ GeneralisedNgramsTree{..}
instance ToSchema NgramsTree where instance ToSchema NgramsTree where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "mt_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "mt_")
instance Arbitrary NgramsTree instance Arbitrary NgramsTree
where where
arbitrary = NgramsTree <$> arbitrary <*> arbitrary <*> arbitrary arbitrary = GeneralisedNgramsTree <$> arbitrary <*> arbitrary <*> arbitrary
--
-- Constructing trees and forests
--
toNgramsTree :: Tree (NgramsTerm,Int) -> NgramsTree
toNgramsTree (Node (NgramsTerm l,v) xs) = GeneralisedNgramsTree l v (map toNgramsTree xs)
toTree :: ListType toNgramsForest :: ListType
-> HashMap NgramsTerm (Set NodeId) -> HashMap NgramsTerm (Set NodeId)
-> HashMap NgramsTerm NgramsRepoElement -> HashMap NgramsTerm NgramsRepoElement
-> [NgramsTree] -> NgramsForest
toTree lt vs m = map toNgramsTree $ unfoldForest buildNode roots toNgramsForest lt vs m = NgramsForest $ map toNgramsTree $ unfoldForest buildNode roots
where where
buildNode r = maybe ((r, value r),[]) buildNode r = maybe ((r, value r),[])
(\x -> ((r, value r), mSetToList $ _nre_children x)) (\x -> ((r, value r), mSetToList $ _nre_children x))
(HashMap.lookup r m) (HashMap.lookup r m)
value l = maybe 0 (fromIntegral . Set.size) $ HashMap.lookup l vs value l = maybe 0 Set.size $ HashMap.lookup l vs
rootsCandidates :: [NgramsTerm] rootsCandidates :: [NgramsTerm]
rootsCandidates = catMaybes rootsCandidates = catMaybes
......
...@@ -17,7 +17,7 @@ import Data.List qualified as List ...@@ -17,7 +17,7 @@ import Data.List qualified as List
import Data.Map.Strict (toList) import Data.Map.Strict (toList)
import Data.Set qualified as Set import Data.Set qualified as Set
import Data.Vector qualified as V import Data.Vector qualified as V
import Gargantext.API.Ngrams.NgramsTree ( toTree, NgramsTree ) import Gargantext.API.Ngrams.NgramsTree ( toNgramsForest, NgramsTree, getNgramsForest )
import Gargantext.API.Ngrams.Tools ( filterListWithRoot, getListNgrams, getRepo, mapTermListRoot ) import Gargantext.API.Ngrams.Tools ( filterListWithRoot, getListNgrams, getRepo, mapTermListRoot )
import Gargantext.API.Ngrams.Types ( NgramsTerm(NgramsTerm) ) import Gargantext.API.Ngrams.Types ( NgramsTerm(NgramsTerm) )
import Gargantext.Core.NodeStory.Types ( NodeStoryEnv ) import Gargantext.Core.NodeStory.Types ( NodeStoryEnv )
...@@ -91,4 +91,4 @@ treeData env cId nt lt = do ...@@ -91,4 +91,4 @@ treeData env cId nt lt = do
cs' <- HashMap.map (Set.map contextId2NodeId) <$> getContextsByNgramsOnlyUser cId (ls' <> ls) nt terms cs' <- HashMap.map (Set.map contextId2NodeId) <$> getContextsByNgramsOnlyUser cId (ls' <> ls) nt terms
m <- getListNgrams env ls nt m <- getListNgrams env ls nt
pure $ V.fromList $ toTree lt cs' m pure $ V.fromList $ getNgramsForest $ toNgramsForest lt cs' m
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment