Commit eb9455f7 authored by Quentin Lobbé's avatar Quentin Lobbé

fix the coocurencies

parent 901125c8
...@@ -81,7 +81,8 @@ data Conf = ...@@ -81,7 +81,8 @@ data Conf =
, phyloLevel :: Int , phyloLevel :: Int
, viewLevel :: Int , viewLevel :: Int
, fisSupport :: Int , fisSupport :: Int
, fisClique :: Int , fisClique :: Int
, minSizeBranch :: Int
} deriving (Show,Generic) } deriving (Show,Generic)
instance FromJSON Conf instance FromJSON Conf
...@@ -172,7 +173,7 @@ main = do ...@@ -172,7 +173,7 @@ main = do
(Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (phyloLevel conf) (Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (phyloLevel conf)
(RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf)) (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf))
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [defaultSmallBranch] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [SizeBranch $ SBParams (minSizeBranch conf)] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
let phylo = toPhylo query corpus roots termList let phylo = toPhylo query corpus roots termList
......
...@@ -149,7 +149,6 @@ data PhyloGroup = ...@@ -149,7 +149,6 @@ data PhyloGroup =
, _phylo_groupLabel :: Text , _phylo_groupLabel :: Text
, _phylo_groupNgrams :: [Int] , _phylo_groupNgrams :: [Int]
, _phylo_groupMeta :: Map Text Double , _phylo_groupMeta :: Map Text Double
, _phylo_groupCooc :: Map (Int, Int) Double
, _phylo_groupBranchId :: Maybe PhyloBranchId , _phylo_groupBranchId :: Maybe PhyloBranchId
, _phylo_groupPeriodParents :: [Pointer] , _phylo_groupPeriodParents :: [Pointer]
...@@ -280,13 +279,19 @@ data HammingParams = HammingParams ...@@ -280,13 +279,19 @@ data HammingParams = HammingParams
-- | Filter constructors -- | Filter constructors
data Filter = SmallBranch SBParams deriving (Generic, Show, Eq) data Filter = LonelyBranch LBParams
| SizeBranch SBParams
deriving (Generic, Show, Eq)
-- | Parameters for SmallBranch filter -- | Parameters for LonelyBranch filter
data LBParams = LBParams
{ _lb_periodsInf :: Int
, _lb_periodsSup :: Int
, _lb_minNodes :: Int } deriving (Generic, Show, Eq)
-- | Parameters for SizeBranch filter
data SBParams = SBParams data SBParams = SBParams
{ _sb_periodsInf :: Int { _sb_minSize :: Int } deriving (Generic, Show, Eq)
, _sb_periodsSup :: Int
, _sb_minNodes :: Int } deriving (Generic, Show, Eq)
---------------- ----------------
...@@ -483,6 +488,8 @@ $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams ) ...@@ -483,6 +488,8 @@ $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
$(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams ) $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
$(deriveJSON (unPrefix "_rc_" ) ''RCParams ) $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
$(deriveJSON (unPrefix "_wlj_" ) ''WLJParams ) $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
--
$(deriveJSON (unPrefix "_lb_" ) ''LBParams )
$(deriveJSON (unPrefix "_sb_" ) ''SBParams ) $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
-- --
$(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild ) $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
......
...@@ -77,7 +77,7 @@ type GetPhylo = QueryParam "listId" ListId ...@@ -77,7 +77,7 @@ type GetPhylo = QueryParam "listId" ListId
getPhylo :: PhyloId -> GargServer GetPhylo getPhylo :: PhyloId -> GargServer GetPhylo
getPhylo _phyloId _lId l f b l' ms x y z ts s o e d b' = do getPhylo _phyloId _lId l f b l' ms x y z ts s o e d b' = do
let let
fs' = maybe (Just []) (\p -> Just [p]) $ SmallBranch <$> (SBParams <$> x <*> y <*> z) fs' = maybe (Just []) (\p -> Just [p]) $ LonelyBranch <$> (LBParams <$> x <*> y <*> z)
so = (,) <$> s <*> o so = (,) <$> s <*> o
q = initPhyloQueryView l f b l' ms fs' ts so e d b' q = initPhyloQueryView l f b l' ms fs' ts so e d b'
-- | TODO remove phylo for real data here -- | TODO remove phylo for real data here
...@@ -149,6 +149,7 @@ instance ToSchema PhyloPeriod ...@@ -149,6 +149,7 @@ instance ToSchema PhyloPeriod
instance ToSchema PhyloQueryBuild instance ToSchema PhyloQueryBuild
instance ToSchema PhyloView instance ToSchema PhyloView
instance ToSchema RCParams instance ToSchema RCParams
instance ToSchema LBParams
instance ToSchema SBParams instance ToSchema SBParams
instance ToSchema Software instance ToSchema Software
instance ToSchema WLJParams instance ToSchema WLJParams
......
...@@ -17,7 +17,7 @@ Portability : POSIX ...@@ -17,7 +17,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Cluster module Gargantext.Viz.Phylo.Aggregates.Cluster
where where
import Data.List (null,tail,concat) import Data.List (null,tail,concat,sort,intersect)
import Data.Map (Map) import Data.Map (Map)
import Data.Tuple (fst) import Data.Tuple (fst)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -25,6 +25,7 @@ import Gargantext.Viz.Phylo ...@@ -25,6 +25,7 @@ import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity import Gargantext.Viz.Phylo.Metrics.Proximity
import Gargantext.Viz.Phylo.Metrics.Clustering import Gargantext.Viz.Phylo.Metrics.Clustering
import Gargantext.Viz.Phylo.Aggregates.Cooc
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Vector.Storable as VS import qualified Data.Vector.Storable as VS
...@@ -32,6 +33,13 @@ import Debug.Trace (trace) ...@@ -32,6 +33,13 @@ import Debug.Trace (trace)
import Numeric.Statistics (percentile) import Numeric.Statistics (percentile)
-- | Optimisation to filter only relevant candidates
getCandidates :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
$ filter (\(g,g') -> g /= g')
$ listToDirectedCombi gs
-- | To transform a Graph into Clusters -- | To transform a Graph into Clusters
graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster] graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
graphToClusters clust (nodes,edges) = case clust of graphToClusters clust (nodes,edges) = case clust of
...@@ -41,12 +49,12 @@ graphToClusters clust (nodes,edges) = case clust of ...@@ -41,12 +49,12 @@ graphToClusters clust (nodes,edges) = case clust of
-- | To transform a list of PhyloGroups into a Graph of Proximity -- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph :: Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge]) groupsToGraph :: Proximity -> [PhyloGroup] -> Map (Int, Int) Double -> ([GroupNode],[GroupEdge])
groupsToGraph prox gs = case prox of groupsToGraph prox gs cooc = case prox of
WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), weightedLogJaccard sens (getGroupCooc x) (getGroupCooc y))) WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), weightedLogJaccard sens (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
$ listToDirectedCombi gs) $ getCandidates gs)
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
$ listToDirectedCombi gs) $ getCandidates gs)
_ -> undefined _ -> undefined
...@@ -73,7 +81,7 @@ phyloToClusters lvl clus p = Map.fromList ...@@ -73,7 +81,7 @@ phyloToClusters lvl clus p = Map.fromList
-------------------------------------- --------------------------------------
graphs :: [([GroupNode],[GroupEdge])] graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox) graphs = traceGraph lvl (getThreshold prox)
$ map (\prd -> groupsToGraph prox (getGroupsWithFilters lvl prd p)) periods $ map (\prd -> groupsToGraph prox (getGroupsWithFilters lvl prd p) (getCooc [prd] p)) periods
-------------------------------------- --------------------------------------
prox :: Proximity prox :: Proximity
prox = getProximity clus prox = getProximity clus
...@@ -96,7 +104,7 @@ traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (l ...@@ -96,7 +104,7 @@ traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (l
<> show (percentile 75 (VS.fromList lst)) <> " (75%) " <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
where where
lst = map snd $ concat $ map snd g lst = sort $ map snd $ concat $ map snd g
traceGraphFiltered :: Level -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])] traceGraphFiltered :: Level -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
...@@ -107,5 +115,5 @@ traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <> ...@@ -107,5 +115,5 @@ traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <>
<> show (percentile 75 (VS.fromList lst)) <> " (75%) " <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
where where
lst = map snd $ concat $ map snd g lst = sort $ map snd $ concat $ map snd g
...@@ -17,8 +17,8 @@ Portability : POSIX ...@@ -17,8 +17,8 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Cooc module Gargantext.Viz.Phylo.Aggregates.Cooc
where where
import Data.List (union,concat) import Data.List (union,concat,nub)
import Data.Map (Map, elems, adjust) import Data.Map (Map,elems,adjust,filterWithKey)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
...@@ -26,12 +26,12 @@ import qualified Data.Map as Map ...@@ -26,12 +26,12 @@ import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
-- | To transform the Fis into a coocurency Matrix in a Phylo but as a triangle -- | To transform the Fis into a full coocurency Matrix in a Phylo
fisToCooc' :: Map (Date, Date) [PhyloFis] -> Phylo -> Map (Int, Int) Double fisToCooc :: Map (Date, Date) [PhyloFis] -> Phylo -> Map (Int, Int) Double
fisToCooc' m p = map (/docs) fisToCooc m p = map (/docs)
$ foldl (\mem x -> adjust (+1) (getKeyPair x mem) mem) cooc $ foldl (\mem x -> adjust (+1) x mem) cooc
$ concat $ concat
$ map (\x -> listToUnDirectedCombiWith (\y -> getIdxInRoots y p) $ (Set.toList . getClique) x) $ map (\x -> listToDirectedCombiWith (\y -> getIdxInRoots y p) $ (Set.toList . getClique) x)
$ (concat . elems) m $ (concat . elems) m
where where
-------------------------------------- --------------------------------------
...@@ -42,28 +42,45 @@ fisToCooc' m p = map (/docs) ...@@ -42,28 +42,45 @@ fisToCooc' m p = map (/docs)
docs = fromIntegral $ foldl (\mem x -> mem + (getSupport x)) 0 $ (concat . elems) m docs = fromIntegral $ foldl (\mem x -> mem + (getSupport x)) 0 $ (concat . elems) m
-------------------------------------- --------------------------------------
cooc :: Map (Int, Int) (Double) cooc :: Map (Int, Int) (Double)
cooc = Map.fromList $ map (\x -> (x,0)) (listToUnDirectedCombiWith (\y -> getIdxInRoots y p) fisNgrams) cooc = Map.fromList $ map (\x -> (x,0)) (listToDirectedCombiWith (\y -> getIdxInRoots y p) fisNgrams)
-------------------------------------- --------------------------------------
-- | To transform the Fis into a full coocurency Matrix in a Phylo
fisToCooc :: Map (Date, Date) [PhyloFis] -> Phylo -> Map (Int, Int) Double -- | To transform a tuple of group's information into a coocurency Matrix
fisToCooc m p = map (/docs) toCooc :: [([Int],Double)] -> Map (Int, Int) Double
$ foldl (\mem x -> adjust (+1) x mem) cooc toCooc l = map (/docs)
$ concat $ foldl (\mem x -> adjust (+1) x mem) cooc
$ map (\x -> listToDirectedCombiWith (\y -> getIdxInRoots y p) $ (Set.toList . getClique) x) $ concat
$ (concat . elems) m $ map (\x -> listToDirectedCombi $ fst x) l
where where
-------------------------------------- --------------------------------------
fisNgrams :: [Ngrams] idx :: [Int]
fisNgrams = foldl (\mem x -> union mem $ (Set.toList . getClique) x) [] $ (concat . elems) m idx = nub $ concat $ map fst l
-------------------------------------- --------------------------------------
docs :: Double docs :: Double
docs = fromIntegral $ foldl (\mem x -> mem + (getSupport x)) 0 $ (concat . elems) m docs = sum $ map snd l
-------------------------------------- --------------------------------------
cooc :: Map (Int, Int) (Double) cooc :: Map (Int, Int) (Double)
cooc = Map.fromList $ map (\x -> (x,0)) (listToDirectedCombiWith (\y -> getIdxInRoots y p) fisNgrams) cooc = Map.fromList $ map (\x -> (x,0)) $ listToDirectedCombi idx
--------------------------------------
-- | To reduce a coocurency Matrix to some keys
getSubCooc :: [Int] -> Map (Int, Int) Double -> Map (Int, Int) Double
getSubCooc idx cooc = filterWithKey (\k _ -> (elem (fst k) idx)
&& (elem (snd k) idx)) cooc
-- | To get a coocurency Matrix related to a given list of Periods
getCooc :: [PhyloPeriodId] -> Phylo -> Map (Int, Int) Double
getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g)) gs
where
-------------------------------------- --------------------------------------
-- | Here we need to go back to the level 1 (aka : the Fis level)
gs :: [PhyloGroup]
gs = filter (\g -> elem (getGroupPeriod g) prds ) $ getGroupsWithLevel 1 p
--------------------------------------
-- phyloCooc :: Map (Int, Int) Double -- phyloCooc :: Map (Int, Int) Double
......
...@@ -77,7 +77,7 @@ queryViewEx = "level=3" ...@@ -77,7 +77,7 @@ queryViewEx = "level=3"
phyloQueryView :: PhyloQueryView phyloQueryView :: PhyloQueryView
phyloQueryView = PhyloQueryView 2 Merge False 1 [BranchAge] [defaultSmallBranch] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True phyloQueryView = PhyloQueryView 4 Merge False 1 [BranchAge] [defaultSizeBranch] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
-------------------------------------------------- --------------------------------------------------
...@@ -104,7 +104,7 @@ queryEx = "title=Cesar et Cleôpatre" ...@@ -104,7 +104,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)" phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.1 10) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.1 10) 5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.1 10) 5 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.1 10)
......
...@@ -21,13 +21,12 @@ module Gargantext.Viz.Phylo.LevelMaker ...@@ -21,13 +21,12 @@ module Gargantext.Viz.Phylo.LevelMaker
import Control.Lens hiding (both, Level) import Control.Lens hiding (both, Level)
import Data.List ((++), sort, concat, nub, zip, last) import Data.List ((++), sort, concat, nub, zip, last)
import Data.Map (Map, (!), empty, restrictKeys, filterWithKey, singleton, union) import Data.Map (Map, (!), empty, singleton)
import Data.Text (Text) import Data.Text (Text)
import Data.Tuple.Extra import Data.Tuple.Extra
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Aggregates.Cluster import Gargantext.Viz.Phylo.Aggregates.Cluster
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.Aggregates.Document import Gargantext.Viz.Phylo.Aggregates.Document
import Gargantext.Viz.Phylo.Aggregates.Fis import Gargantext.Viz.Phylo.Aggregates.Fis
import Gargantext.Viz.Phylo.BranchMaker import Gargantext.Viz.Phylo.BranchMaker
...@@ -61,7 +60,7 @@ instance PhyloLevelMaker PhyloCluster ...@@ -61,7 +60,7 @@ instance PhyloLevelMaker PhyloCluster
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2") | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
-------------------------------------- --------------------------------------
-- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup] -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l m p = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l toPhyloGroups lvl (d,d') l m _ = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m) $ zip [1..] l
-------------------------------------- --------------------------------------
...@@ -74,7 +73,7 @@ instance PhyloLevelMaker PhyloFis ...@@ -74,7 +73,7 @@ instance PhyloLevelMaker PhyloFis
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1") | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
-------------------------------------- --------------------------------------
-- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup] -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l m p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis m p) $ zip [1..] l toPhyloGroups lvl (d,d') l _ p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
-------------------------------------- --------------------------------------
...@@ -95,26 +94,20 @@ instance PhyloLevelMaker Document ...@@ -95,26 +94,20 @@ instance PhyloLevelMaker Document
-- | To transform a Cluster into a Phylogroup -- | To transform a Cluster into a Phylogroup
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster] -> Phylo -> PhyloGroup clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> PhyloGroup
clusterToGroup prd lvl idx lbl groups _m p = clusterToGroup prd lvl idx lbl groups _m =
PhyloGroup ((prd, lvl), idx) lbl ngrams empty cooc Nothing [] [] [] (map (\g -> (getGroupId g, 1)) groups) PhyloGroup ((prd, lvl), idx) lbl ngrams empty Nothing [] [] [] (map (\g -> (getGroupId g, 1)) groups)
where where
-------------------------------------- --------------------------------------
ngrams :: [Int] ngrams :: [Int]
ngrams = (sort . nub . concat) $ map getGroupNgrams groups ngrams = (sort . nub . concat) $ map getGroupNgrams groups
-------------------------------------- --------------------------------------
cooc :: Map (Int, Int) Double
cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
$ foldl union empty
$ map getGroupCooc
$ getGroupsWithFilters 1 prd p
--------------------------------------
-- | To transform a Clique into a PhyloGroup -- | To transform a Clique into a PhyloGroup
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Map (Date, Date) [PhyloFis] -> Phylo -> PhyloGroup cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
cliqueToGroup prd lvl idx lbl fis m p = cliqueToGroup prd lvl idx lbl fis p =
PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) cooc Nothing [] [] [] [] PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing [] [] [] []
where where
-------------------------------------- --------------------------------------
ngrams :: [Int] ngrams :: [Int]
...@@ -122,16 +115,12 @@ cliqueToGroup prd lvl idx lbl fis m p = ...@@ -122,16 +115,12 @@ cliqueToGroup prd lvl idx lbl fis m p =
$ Set.toList $ Set.toList
$ getClique fis $ getClique fis
-------------------------------------- --------------------------------------
cooc :: Map (Int, Int) Double
cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
$ fisToCooc (restrictKeys m $ Set.fromList [prd]) p
--------------------------------------
-- | To transform a list of Ngrams into a PhyloGroup -- | To transform a list of Ngrams into a PhyloGroup
ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
ngramsToGroup prd lvl idx lbl ngrams p = ngramsToGroup prd lvl idx lbl ngrams p =
PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing [] [] [] [] PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing [] [] [] []
-- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
......
...@@ -25,6 +25,7 @@ import Gargantext.Prelude ...@@ -25,6 +25,7 @@ import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity import Gargantext.Viz.Phylo.Metrics.Proximity
import Gargantext.Viz.Phylo.Aggregates.Cooc
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Maybe as Maybe import qualified Data.Maybe as Maybe
import qualified Data.Map as Map import qualified Data.Map as Map
...@@ -71,7 +72,8 @@ linkGroupToGroups (lvl,lvl') current targets ...@@ -71,7 +72,8 @@ linkGroupToGroups (lvl,lvl') current targets
-- | To set the LevelLink of all the PhyloGroups of a Phylo -- | To set the LevelLink of all the PhyloGroups of a Phylo
setLevelLinks :: (Level,Level) -> Phylo -> Phylo setLevelLinks :: (Level,Level) -> Phylo -> Phylo
setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLevel g == lvl setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLevel g == lvl
then linkGroupToGroups (lvl,lvl') g (filter (\g' -> getGroupPeriod g' == getGroupPeriod g) gs') then linkGroupToGroups (lvl,lvl') g (filterCandidates g
$ filter (\g' -> getGroupPeriod g' == getGroupPeriod g) gs')
else g) gs) p else g) gs) p
where where
-------------------------------------- --------------------------------------
...@@ -85,12 +87,10 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLeve ...@@ -85,12 +87,10 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLeve
-- | To apply the corresponding proximity function based on a given Proximity -- | To apply the corresponding proximity function based on a given Proximity
applyProximity :: Proximity -> PhyloGroup -> PhyloGroup -> (PhyloGroupId, Double) applyProximity :: Proximity -> PhyloGroup -> PhyloGroup -> Map (Int, Int) Double -> (PhyloGroupId, Double)
applyProximity prox g1 g2 = case prox of applyProximity prox g1 g2 cooc = case prox of
-- WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2),weightedLogJaccard s (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1))) WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard s (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
-- Hamming (HammingParams _) -> ((getGroupId g2),hamming (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1))) Hamming (HammingParams _) -> ((getGroupId g2), hamming (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard s (getGroupCooc g1) (getGroupCooc g2))
Hamming (HammingParams _) -> ((getGroupId g2), hamming (getGroupCooc g1) (getGroupCooc g2))
_ -> panic ("[ERR][Viz.Phylo.Example.applyProximity] Proximity function not defined") _ -> panic ("[ERR][Viz.Phylo.Example.applyProximity] Proximity function not defined")
...@@ -113,21 +113,24 @@ getNextPeriods to' id l = case to' of ...@@ -113,21 +113,24 @@ getNextPeriods to' id l = case to' of
-- | To find the best candidates regarding a given proximity -- | To find the best candidates regarding a given proximity
findBestCandidates' :: Filiation -> Int -> Int -> Proximity -> [PhyloPeriodId] -> [PhyloGroup] -> PhyloGroup -> ([Pointer],[Double]) findBestCandidates' :: Filiation -> Int -> Int -> Proximity -> [PhyloPeriodId] -> [PhyloGroup] -> PhyloGroup -> Phylo -> ([Pointer],[Double])
findBestCandidates' fil depth limit prox prds gs g findBestCandidates' fil depth limit prox prds gs g p
| depth > limit || null next = ([],[]) | depth > limit || null next = ([],[])
| (not . null) bestScores = (take 2 bestScores, map snd scores) | (not . null) bestScores = (take 2 bestScores, map snd scores)
| otherwise = findBestCandidates' fil (depth + 1) limit prox prds gs g | otherwise = findBestCandidates' fil (depth + 1) limit prox prds gs g p
where where
-------------------------------------- --------------------------------------
next :: [PhyloPeriodId] next :: [PhyloPeriodId]
next = take depth prds next = take depth prds
-------------------------------------- --------------------------------------
cooc :: Map (Int, Int) Double
cooc = getCooc next p
--------------------------------------
candidates :: [PhyloGroup] candidates :: [PhyloGroup]
candidates = filter (\g' -> elem (getGroupPeriod g') next) gs candidates = filter (\g' -> elem (getGroupPeriod g') next) gs
-------------------------------------- --------------------------------------
scores :: [(PhyloGroupId, Double)] scores :: [(PhyloGroupId, Double)]
scores = map (\g' -> applyProximity prox g g') candidates scores = map (\g' -> applyProximity prox g g' cooc) candidates
-------------------------------------- --------------------------------------
bestScores :: [(PhyloGroupId, Double)] bestScores :: [(PhyloGroupId, Double)]
bestScores = reverse bestScores = reverse
...@@ -177,7 +180,7 @@ interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) sc ...@@ -177,7 +180,7 @@ interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) sc
scores = sort $ concat $ map (snd . snd) candidates scores = sort $ concat $ map (snd . snd) candidates
-------------------------------------- --------------------------------------
candidates :: [(PhyloGroupId,([Pointer],[Double]))] candidates :: [(PhyloGroupId,([Pointer],[Double]))]
candidates = map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g)) gs candidates = map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g p)) gs
-------------------------------------- --------------------------------------
gs :: [PhyloGroup] gs :: [PhyloGroup]
gs = getGroupsWithLevel lvl p gs = getGroupsWithLevel lvl p
......
...@@ -237,11 +237,6 @@ getGroupId :: PhyloGroup -> PhyloGroupId ...@@ -237,11 +237,6 @@ getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId = _phylo_groupId getGroupId = _phylo_groupId
-- | To get the Cooc Matrix of a PhyloGroup
getGroupCooc :: PhyloGroup -> Map (Int,Int) Double
getGroupCooc = _phylo_groupCooc
-- | To get the level out of the id of a PhyloGroup -- | To get the level out of the id of a PhyloGroup
getGroupLevel :: PhyloGroup -> Int getGroupLevel :: PhyloGroup -> Int
getGroupLevel = snd . fst . getGroupId getGroupLevel = snd . fst . getGroupId
...@@ -373,7 +368,6 @@ initGroup ngrams lbl idx lvl from' to' p = PhyloGroup ...@@ -373,7 +368,6 @@ initGroup ngrams lbl idx lvl from' to' p = PhyloGroup
lbl lbl
(sort $ map (\x -> getIdxInRoots x p) ngrams) (sort $ map (\x -> getIdxInRoots x p) ngrams)
(Map.empty) (Map.empty)
(Map.empty)
Nothing Nothing
[] [] [] [] [] [] [] []
...@@ -709,11 +703,14 @@ initFis (def True -> kmf) (def 1 -> min') (def 1 -> thr) = FisParams kmf min' th ...@@ -709,11 +703,14 @@ initFis (def True -> kmf) (def 1 -> min') (def 1 -> thr) = FisParams kmf min' th
initHamming :: Maybe Double -> HammingParams initHamming :: Maybe Double -> HammingParams
initHamming (def 0.01 -> sens) = HammingParams sens initHamming (def 0.01 -> sens) = HammingParams sens
initSmallBranch' :: Maybe Int -> Maybe Int -> Maybe Int -> SBParams initLonelyBranch :: Maybe Int -> Maybe Int -> Maybe Int -> LBParams
initSmallBranch' (def 2 -> periodsInf) (def 2 -> periodsSup) (def 1 -> minNodes) = SBParams periodsInf periodsSup minNodes initLonelyBranch (def 2 -> periodsInf) (def 2 -> periodsSup) (def 1 -> minNodes) = LBParams periodsInf periodsSup minNodes
initSmallBranch :: Maybe Int -> Maybe Int -> Maybe Int -> SBParams initSizeBranch :: Maybe Int -> SBParams
initSmallBranch (def 0 -> periodsInf) (def 0 -> periodsSup) (def 1 -> minNodes) = SBParams periodsInf periodsSup minNodes initSizeBranch (def 1 -> minSize) = SBParams minSize
initLonelyBranch' :: Maybe Int -> Maybe Int -> Maybe Int -> LBParams
initLonelyBranch' (def 0 -> periodsInf) (def 0 -> periodsSup) (def 1 -> minNodes) = LBParams periodsInf periodsSup minNodes
initLouvain :: Maybe Proximity -> LouvainParams initLouvain :: Maybe Proximity -> LouvainParams
initLouvain (def defaultWeightedLogJaccard -> proxi) = LouvainParams proxi initLouvain (def defaultWeightedLogJaccard -> proxi) = LouvainParams proxi
...@@ -760,8 +757,11 @@ defaultRelatedComponents = RelatedComponents (initRelatedComponents Nothing) ...@@ -760,8 +757,11 @@ defaultRelatedComponents = RelatedComponents (initRelatedComponents Nothing)
-- Filters -- Filters
defaultSmallBranch :: Filter defaultLonelyBranch :: Filter
defaultSmallBranch = SmallBranch (initSmallBranch Nothing Nothing Nothing) defaultLonelyBranch = LonelyBranch (initLonelyBranch Nothing Nothing Nothing)
defaultSizeBranch :: Filter
defaultSizeBranch = SizeBranch (initSizeBranch Nothing)
-- Params -- Params
......
...@@ -48,9 +48,9 @@ cleanNodesEdges v v' = v' & pv_nodes %~ (filter (\n -> not $ elem (getNodeId n) ...@@ -48,9 +48,9 @@ cleanNodesEdges v v' = v' & pv_nodes %~ (filter (\n -> not $ elem (getNodeId n)
-------------------------------------- --------------------------------------
-- | To filter all the SmallBranches (ie: isolated one in time & with a small number of nodes) of a PhyloView -- | To filter all the LonelyBranches (ie: isolated one in time & with a small number of nodes) of a PhyloView
filterSmallBranch :: Int -> Int -> Int -> [PhyloPeriodId] -> PhyloView -> PhyloView filterLonelyBranch :: Int -> Int -> Int -> [PhyloPeriodId] -> PhyloView -> PhyloView
filterSmallBranch inf sup min' prds v = cleanNodesEdges v v' filterLonelyBranch inf sup min' prds v = cleanNodesEdges v v'
where where
-------------------------------------- --------------------------------------
v' :: PhyloView v' :: PhyloView
...@@ -61,15 +61,24 @@ filterSmallBranch inf sup min' prds v = cleanNodesEdges v v' ...@@ -61,15 +61,24 @@ filterSmallBranch inf sup min' prds v = cleanNodesEdges v v'
-------------------------------------- --------------------------------------
isLone :: [PhyloNode] -> [PhyloPeriodId] -> Bool isLone :: [PhyloNode] -> [PhyloPeriodId] -> Bool
isLone ns prds' = (length ns <= min') isLone ns prds' = (length ns <= min')
&& notElem (head' "filterSmallBranch1" prds') (take inf prds) && notElem (head' "filterLonelyBranch1" prds') (take inf prds)
&& notElem (head' "filterSmallBranch2" prds') (take sup $ reverse prds) && notElem (head' "filterLonelyBranch2" prds') (take sup $ reverse prds)
--------------------------------------
-- | To filter all the branches with a minimal size in a PhyloView
filterSizeBranch :: Int -> PhyloView -> PhyloView
filterSizeBranch min' v = cleanNodesEdges v v'
where
--------------------------------------
v' :: PhyloView
v' = v & pv_branches %~ (filter (\b -> (length $ filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v) > min'))
-------------------------------------- --------------------------------------
-- | To process a list of QueryFilter to a PhyloView -- | To process a list of QueryFilter to a PhyloView
processFilters :: [Filter] -> Phylo -> PhyloView -> PhyloView processFilters :: [Filter] -> Phylo -> PhyloView -> PhyloView
processFilters fs p v = foldl (\v' f -> case f of processFilters fs p v = foldl (\v' f -> case f of
SmallBranch (SBParams inf sup min') -> filterSmallBranch inf sup min' LonelyBranch (LBParams inf sup min') -> filterLonelyBranch inf sup min' (getPhyloPeriods p) v'
(getPhyloPeriods p) v' SizeBranch (SBParams min') -> filterSizeBranch min' v'
-- _ -> panic "[ERR][Viz.Phylo.View.Filters.processFilters] filter not found" -- _ -> panic "[ERR][Viz.Phylo.View.Filters.processFilters] filter not found"
) v fs ) v fs
...@@ -22,9 +22,11 @@ import Data.List (concat,nub,groupBy,sortOn,sort) ...@@ -22,9 +22,11 @@ import Data.List (concat,nub,groupBy,sortOn,sort)
import Data.Text (Text) import Data.Text (Text)
import Data.Tuple (fst, snd) import Data.Tuple (fst, snd)
import Data.Vector (Vector) import Data.Vector (Vector)
import Data.Map (Map)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Aggregates.Cooc
import qualified Data.Map as Map import qualified Data.Map as Map
...@@ -46,11 +48,14 @@ freqToLabel thr ngs l = ngramsToLabel ngs $ mostFreqNgrams thr l ...@@ -46,11 +48,14 @@ freqToLabel thr ngs l = ngramsToLabel ngs $ mostFreqNgrams thr l
-- | To get the (nth `div` 2) most cooccuring Ngrams in a PhyloGroup -- | To get the (nth `div` 2) most cooccuring Ngrams in a PhyloGroup
mostOccNgrams :: Int -> PhyloGroup -> [Int] mostOccNgrams :: Int -> Phylo -> PhyloGroup -> [Int]
mostOccNgrams thr group = (nub . concat ) mostOccNgrams thr p g = (nub . concat )
$ map (\((f,s),_d) -> [f,s]) $ map (\((f,s),_d) -> [f,s])
$ take (thr `div` 2) $ take (thr `div` 2)
$ reverse $ sortOn snd $ Map.toList $ getGroupCooc group $ reverse $ sortOn snd $ Map.toList cooc
where
cooc :: Map (Int, Int) Double
cooc = getSubCooc (getGroupNgrams g) $ getCooc [getGroupPeriod g] p
-- | To alter the peak of a PhyloBranch -- | To alter the peak of a PhyloBranch
...@@ -75,7 +80,7 @@ nodeLabelCooc :: PhyloView -> Int -> Phylo -> PhyloView ...@@ -75,7 +80,7 @@ nodeLabelCooc :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelCooc v thr p = over (pv_nodes nodeLabelCooc v thr p = over (pv_nodes
. traverse) . traverse)
(\n -> let lbl = ngramsToLabel (getFoundationsRoots p) (\n -> let lbl = ngramsToLabel (getFoundationsRoots p)
$ mostOccNgrams thr $ mostOccNgrams thr p
$ head' "nodeLabelCooc" $ getGroupsFromIds [getNodeId n] p $ head' "nodeLabelCooc" $ getGroupsFromIds [getNodeId n] p
in n & pn_label .~ lbl) v in n & pn_label .~ lbl) v
......
...@@ -18,7 +18,7 @@ module Gargantext.Viz.Phylo.View.ViewMaker ...@@ -18,7 +18,7 @@ module Gargantext.Viz.Phylo.View.ViewMaker
where where
import Control.Lens hiding (makeLenses, both, Level) import Control.Lens hiding (makeLenses, both, Level)
import Data.List (concat,nub,(++)) import Data.List (concat,nub,(++),sort)
import Data.Text (Text) import Data.Text (Text)
import Data.Map (Map, empty, elems, unionWithKey, fromList) import Data.Map (Map, empty, elems, unionWithKey, fromList)
import Data.Tuple (fst, snd) import Data.Tuple (fst, snd)
...@@ -32,6 +32,9 @@ import Gargantext.Viz.Phylo.View.Metrics ...@@ -32,6 +32,9 @@ import Gargantext.Viz.Phylo.View.Metrics
import Gargantext.Viz.Phylo.View.Sort import Gargantext.Viz.Phylo.View.Sort
import Gargantext.Viz.Phylo.View.Taggers import Gargantext.Viz.Phylo.View.Taggers
import qualified Data.Vector.Storable as VS
import Debug.Trace (trace)
import Numeric.Statistics (percentile)
-- | To init a PhyloBranch -- | To init a PhyloBranch
initPhyloBranch :: PhyloBranchId -> Text -> PhyloBranch initPhyloBranch :: PhyloBranchId -> Text -> PhyloBranch
...@@ -139,7 +142,8 @@ addChildNodes shouldDo lvl lvlMin vb fl p v = ...@@ -139,7 +142,8 @@ addChildNodes shouldDo lvl lvlMin vb fl p v =
-- | To transform a PhyloQuery into a PhyloView -- | To transform a PhyloQuery into a PhyloView
toPhyloView :: PhyloQueryView -> Phylo -> PhyloView toPhyloView :: PhyloQueryView -> Phylo -> PhyloView
toPhyloView q p = processDisplay (q ^. qv_display) (q ^. qv_export) toPhyloView q p = traceView
$ processDisplay (q ^. qv_display) (q ^. qv_export)
$ processSort (q ^. qv_sort ) p $ processSort (q ^. qv_sort ) p
$ processTaggers (q ^. qv_taggers) p $ processTaggers (q ^. qv_taggers) p
$ processFilters (q ^. qv_filters) p $ processFilters (q ^. qv_filters) p
...@@ -160,3 +164,20 @@ getPhyloTitle p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p ...@@ -160,3 +164,20 @@ getPhyloTitle p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
-- | To get the desc of a Phylo -- | To get the desc of a Phylo
getPhyloDescription :: Phylo -> Text getPhyloDescription :: Phylo -> Text
getPhyloDescription p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p getPhyloDescription p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
-----------------
-- | Taggers | --
-----------------
traceView :: PhyloView -> PhyloView
traceView pv = trace ("------------\n--| View |--\n------------\n\n"
<> "view level : " <> show (pv ^. pv_level) <> "\n"
<> show (length $ pv ^. pv_branches) <> " exported branches with " <> show (length $ pv ^. pv_nodes) <> " groups\n"
<> "groups by branches : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
<> show (percentile 50 (VS.fromList lst)) <> " (50%) "
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") pv
where
lst = sort $ map (fromIntegral . length . snd) $ getNodesByBranches pv
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment