Commit f5393047 authored by qlobbe's avatar qlobbe

fix the weightedlogjaccard

parent f49465e4
Pipeline #614 failed with stage
...@@ -133,8 +133,8 @@ defaultConfig = ...@@ -133,8 +133,8 @@ defaultConfig =
, phyloName = pack "Default Phylo" , phyloName = pack "Default Phylo"
, phyloLevel = 2 , phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 0 0.1 , phyloProximity = WeightedLogJaccard 10 0 0.1
, phyloSynchrony = ByProximityThreshold 0.1 0 SiblingBranches MergeAllGroups , phyloSynchrony = ByProximityThreshold 0.2 0 SiblingBranches MergeAllGroups
, phyloQuality = Quality 0.1 1 , phyloQuality = Quality 10 1
, timeUnit = Year 3 1 5 , timeUnit = Year 3 1 5
, clique = Fis 1 5 , clique = Fis 1 5
, exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2] , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
......
...@@ -171,11 +171,15 @@ exportToDot phylo export = ...@@ -171,11 +171,15 @@ exportToDot phylo export =
, Ratio FillRatio , Ratio FillRatio
, Style [SItem Filled []],Color [toWColor White]] , Style [SItem Filled []],Color [toWColor White]]
-- | home made attributes -- | home made attributes
<> [(toAttr (fromStrict "nbDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs)) <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo)) ,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo)) ,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo)) ,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "quaFactor") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo)) ,(toAttr (fromStrict "quaGranularity") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
]) ])
......
...@@ -91,7 +91,7 @@ cliqueToGroup fis thr pId lvl idx fdt coocs = ...@@ -91,7 +91,7 @@ cliqueToGroup fis thr pId lvl idx fdt coocs =
(fis ^. phyloClique_support) (fis ^. phyloClique_support)
ngrams ngrams
(ngramsToCooc ngrams coocs) (ngramsToCooc ngrams coocs)
(1,[0]) (1,[0]) -- | branchid (lvl,[path in the branching tree])
(singleton "thr" [thr]) (singleton "thr" [thr])
[] [] [] [] [] [] [] []
......
...@@ -227,6 +227,8 @@ sumCooc cooc cooc' = unionWith (+) cooc cooc' ...@@ -227,6 +227,8 @@ sumCooc cooc cooc' = unionWith (+) cooc cooc'
getTrace :: Cooc -> Double getTrace :: Cooc -> Double
getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
coocToDiago :: Cooc -> Cooc
coocToDiago cooc = filterWithKey (\(k,k') _ -> k == k') cooc
-- | To build the local cooc matrix of each phylogroup -- | To build the local cooc matrix of each phylogroup
ngramsToCooc :: [Int] -> [Cooc] -> Cooc ngramsToCooc :: [Int] -> [Cooc] -> Cooc
......
...@@ -18,7 +18,7 @@ module Gargantext.Viz.Phylo.SynchronicClustering where ...@@ -18,7 +18,7 @@ module Gargantext.Viz.Phylo.SynchronicClustering where
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard) import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
import Gargantext.Viz.Phylo.PhyloExport (processDynamics) import Gargantext.Viz.Phylo.PhyloExport (processDynamics)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn, init, all, group, maximum, groupBy) import Data.List ((++), null, intersect, nub, concat, sort, sortOn, init, all, group, maximum, groupBy)
...@@ -56,26 +56,16 @@ mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq) ids ...@@ -56,26 +56,16 @@ mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq) ids
groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]] groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches' groups = groupsToBranches' groups =
-- | run the related component algorithm -- | run the related component algorithm
let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs')) let egos = map (\g -> [getGroupId g]
$ sortOn (\gs -> fst $ fst $ head' "egos" gs) ++ (map fst $ g ^. phylo_groupPeriodParents)
$ map (\g -> [getGroupId g] ++ (map fst $ g ^. phylo_groupPeriodChilds) ) $ elems groups
++ (map fst $ g ^. phylo_groupPeriodParents) graph = relatedComponents egos
++ (map fst $ g ^. phylo_groupPeriodChilds) ) $ elems groups
-- | first find the related components by inside each ego's period
graph' = map relatedComponents egos
-- | then run it for the all the periods
graph = relatedComponents $ concat (graph' `using` parList rdeepseq)
-- | update each group's branch id -- | update each group's branch id
in map (\ids -> in map (\ids ->
-- intervenir ici
let groups' = elems $ restrictKeys groups (Set.fromList ids) let groups' = elems $ restrictKeys groups (Set.fromList ids)
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups' bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl + 1,bId))) groups') graph in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl + 1,bId))) groups') graph
-- toBranchId :: PhyloGroup -> PhyloBranchId
-- toBranchId child = ((child ^. phylo_groupLevel) + 1, snd (child ^. phylo_groupBranchId))
getLastThr :: [PhyloGroup] -> Double getLastThr :: [PhyloGroup] -> Double
getLastThr childs = maximum $ concat $ map (\g -> (g ^. phylo_groupMeta) ! "thr") childs getLastThr childs = maximum $ concat $ map (\g -> (g ^. phylo_groupMeta) ! "thr") childs
...@@ -157,8 +147,8 @@ toDiamonds groups = foldl' (\acc groups' -> ...@@ -157,8 +147,8 @@ toDiamonds groups = foldl' (\acc groups' ->
$ foldl' (\acc g -> acc ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodParents) ) [] groups $ foldl' (\acc g -> acc ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodParents) ) [] groups
groupsToEdges :: Proximity -> Synchrony -> Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] groupsToEdges :: Proximity -> Synchrony -> Double -> Map Int Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
groupsToEdges prox sync docs groups = groupsToEdges prox sync nbDocs diago groups =
case sync of case sync of
ByProximityThreshold thr sens _ strat -> ByProximityThreshold thr sens _ strat ->
filter (\(_,w) -> w >= thr) filter (\(_,w) -> w >= thr)
...@@ -174,8 +164,7 @@ groupsToEdges prox sync docs groups = ...@@ -174,8 +164,7 @@ groupsToEdges prox sync docs groups =
toEdges sens edges = toEdges sens edges =
case prox of case prox of
WeightedLogJaccard _ _ _ -> map (\(g,g') -> WeightedLogJaccard _ _ _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard sens docs ((g,g'), weightedLogJaccard' sens nbDocs diago
(g ^. phylo_groupCooc) (g' ^. phylo_groupCooc)
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
_ -> undefined _ -> undefined
...@@ -191,15 +180,16 @@ toParentId :: PhyloGroup -> PhyloGroupId ...@@ -191,15 +180,16 @@ toParentId :: PhyloGroup -> PhyloGroupId
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex) toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex)
reduceGroups :: Proximity -> Synchrony -> Map Date Double -> [PhyloGroup] -> [PhyloGroup] reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
reduceGroups prox sync docs branch = reduceGroups prox sync docs diagos branch =
-- | 1) reduce a branch as a set of periods & groups -- | 1) reduce a branch as a set of periods & groups
let periods = fromListWith (++) let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch $ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems) in (concat . concat . elems)
$ mapWithKey (\prd groups -> $ mapWithKey (\prd groups ->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold -- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
let edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) groups let diago = reduceDiagos $ filterDiago diagos [prd]
edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) diago groups
in map (\comp -> in map (\comp ->
-- | 4) add to each groups their futur level parent group -- | 4) add to each groups their futur level parent group
let parentId = toParentId (head' "parentId" comp) let parentId = toParentId (head' "parentId" comp)
...@@ -233,7 +223,8 @@ synchronicClustering phylo = ...@@ -233,7 +223,8 @@ synchronicClustering phylo =
let prox = phyloProximity $ getConfig phylo let prox = phyloProximity $ getConfig phylo
sync = phyloSynchrony $ getConfig phylo sync = phyloSynchrony $ getConfig phylo
docs = phylo ^. phylo_timeDocs docs = phylo ^. phylo_timeDocs
newBranches = map (\branch -> reduceGroups prox sync docs branch) diagos = map coocToDiago $ phylo ^. phylo_timeCooc
newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
$ map processDynamics $ map processDynamics
$ adjustClustering sync (getPhyloThresholdStep phylo) $ adjustClustering sync (getPhyloThresholdStep phylo)
$ phyloToLastBranches $ phyloToLastBranches
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment