Commit b0826576 authored by qlobbe's avatar qlobbe

add new synchronic clustering

parent 56636731
...@@ -39,6 +39,7 @@ import Gargantext.Viz.AdaptativePhylo ...@@ -39,6 +39,7 @@ import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloMaker (toPhylo) import Gargantext.Viz.Phylo.PhyloMaker (toPhylo)
import Gargantext.Viz.Phylo.PhyloTools (printIOMsg, printIOComment) import Gargantext.Viz.Phylo.PhyloTools (printIOMsg, printIOComment)
import Gargantext.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile) import Gargantext.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
-- import Gargantext.Viz.Phylo.SynchronicClustering (synchronicDistance')
import GHC.IO (FilePath) import GHC.IO (FilePath)
import Prelude (Either(..)) import Prelude (Either(..))
...@@ -156,6 +157,14 @@ main = do ...@@ -156,6 +157,14 @@ main = do
let phylo = toPhylo corpus mapList config let phylo = toPhylo corpus mapList config
-- | probes
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_synchronic_distance_cumu_jaccard.txt")
-- $ synchronicDistance' phylo 1
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_inflexion_points.txt")
-- $ inflexionPoints phylo 1
printIOMsg "End of reconstruction, start the export" printIOMsg "End of reconstruction, start the export"
let dot = toPhyloExport phylo let dot = toPhyloExport phylo
......
...@@ -73,6 +73,7 @@ library: ...@@ -73,6 +73,7 @@ library:
- Gargantext.Viz.Phylo.Tools - Gargantext.Viz.Phylo.Tools
- Gargantext.Viz.Phylo.PhyloTools - Gargantext.Viz.Phylo.PhyloTools
- Gargantext.Viz.Phylo.PhyloExport - Gargantext.Viz.Phylo.PhyloExport
- Gargantext.Viz.Phylo.SynchronicClustering
- Gargantext.Viz.Phylo.Example - Gargantext.Viz.Phylo.Example
- Gargantext.Viz.Phylo.LevelMaker - Gargantext.Viz.Phylo.LevelMaker
- Gargantext.Viz.Phylo.View.Export - Gargantext.Viz.Phylo.View.Export
......
...@@ -71,7 +71,8 @@ data Synchrony = ...@@ -71,7 +71,8 @@ data Synchrony =
ByProximityThreshold ByProximityThreshold
{ _bpt_threshold :: Double { _bpt_threshold :: Double
, _bpt_sensibility :: Double} , _bpt_sensibility :: Double}
| ByProximityDistribution | ByProximityDistribution
{ _bpd_sensibility :: Double}
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
...@@ -116,7 +117,7 @@ defaultConfig = ...@@ -116,7 +117,7 @@ defaultConfig =
, phyloName = pack "Default Phylo" , phyloName = pack "Default Phylo"
, phyloLevel = 1 , phyloLevel = 1
, phyloProximity = WeightedLogJaccard 10 0 0.1 , phyloProximity = WeightedLogJaccard 10 0 0.1
, phyloSynchrony = ByProximityThreshold 0.4 0 , phyloSynchrony = ByProximityDistribution 0
, timeUnit = Year 3 1 5 , timeUnit = Year 3 1 5
, contextualUnit = Fis 2 4 , contextualUnit = Fis 2 4
, exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2] , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
......
...@@ -162,7 +162,11 @@ exportToDot phylo export = ...@@ -162,7 +162,11 @@ exportToDot phylo export =
graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))] graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
<> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
, Ratio FillRatio , Ratio FillRatio
, Style [SItem Filled []],Color [toWColor White]]) , Style [SItem Filled []],Color [toWColor White]
, (toAttr (fromStrict "nbDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))])
-- toAttr (fromStrict k) $ (pack . unwords) $ map show v
-- | 2) create a layer for the branches labels -- | 2) create a layer for the branches labels
subgraph (Str "Branches peaks") $ do subgraph (Str "Branches peaks") $ do
......
...@@ -17,7 +17,7 @@ Portability : POSIX ...@@ -17,7 +17,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloTools where module Gargantext.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex) import Data.Vector (Vector, elemIndex)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition) import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy)
import Data.Set (Set, size, disjoint) import Data.Set (Set, size, disjoint)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty) import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty)
import Data.String (String) import Data.String (String)
...@@ -215,6 +215,9 @@ listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst) ...@@ -215,6 +215,9 @@ listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
listToMatrix :: [Int] -> Map (Int,Int) Double listToMatrix :: [Int] -> Map (Int,Int) Double
listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
listToSeq :: Eq a => [a] -> [(a,a)]
listToSeq l = nubBy (\x y -> fst x == fst y) $ [ (x,y) | (x:rest) <- tails l, y <- rest ]
sumCooc :: Cooc -> Cooc -> Cooc sumCooc :: Cooc -> Cooc -> Cooc
sumCooc cooc cooc' = unionWith (+) cooc cooc' sumCooc cooc cooc' = unionWith (+) cooc cooc'
...@@ -252,14 +255,6 @@ filterProximity proximity thr local = ...@@ -252,14 +255,6 @@ filterProximity proximity thr local =
WeightedLogJaccard _ _ _ -> local >= thr WeightedLogJaccard _ _ _ -> local >= thr
Hamming -> undefined Hamming -> undefined
filterPointers :: Filiation -> PointerType -> Proximity -> Double -> PhyloGroup -> PhyloGroup
filterPointers fil pty proximity thr group =
case pty of
TemporalPointer -> case fil of
ToChilds -> group & phylo_groupPeriodChilds %~ (filter (\(_,w) -> filterProximity proximity thr w))
ToParents -> group & phylo_groupPeriodParents %~ (filter (\(_,w) -> filterProximity proximity thr w))
LevelPointer -> undefined
--------------- ---------------
-- | Phylo | -- -- | Phylo | --
......
...@@ -20,12 +20,14 @@ import Gargantext.Viz.AdaptativePhylo ...@@ -20,12 +20,14 @@ import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard) import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard)
import Data.List ((++), null, intersect, nub, concat, sort) import Data.List ((++), null, intersect, nub, concat, sort, sortOn)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member) import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
import qualified Data.Map as Map
------------------------- -------------------------
-- | New Level Maker | -- -- | New Level Maker | --
...@@ -92,13 +94,42 @@ toPairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)] ...@@ -92,13 +94,42 @@ toPairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
toPairs groups = filter (\(g,g') -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)) toPairs groups = filter (\(g,g') -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))
$ listToCombi' groups $ listToCombi' groups
groupsToEdges :: Proximity -> Double -> Double -> Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
groupsToEdges prox thr sens docs groups = toDiamonds :: [PhyloGroup] -> [[PhyloGroup]]
case prox of toDiamonds groups = foldl' (\acc groups' ->
WeightedLogJaccard _ _ _ -> filter (\(_,w) -> w >= thr) acc ++ ( elems
$ map (\(g,g') -> ((g,g'), weightedLogJaccard sens docs (g ^. phylo_groupCooc) (g' ^. phylo_groupCooc) (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) $ Map.filter (\v -> length v > 1)
$ toPairs groups $ fromListWith (++)
_ -> undefined $ foldl' (\acc' g ->
acc' ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodChilds)) [] groups')) []
$ elems
$ Map.filter (\v -> length v > 1)
$ fromListWith (++)
$ foldl' (\acc g -> acc ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodParents) ) [] groups
groupsToEdges :: Proximity -> Synchrony -> Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
groupsToEdges prox sync docs groups =
case sync of
ByProximityThreshold t s -> filter (\(_,w) -> w >= t)
$ toEdges s
$ toPairs groups
ByProximityDistribution s ->
let diamonds = sortOn snd
$ toEdges s $ concat
$ map toPairs $ toDiamonds groups
in take (div (length diamonds) 2) diamonds
where
toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)]
toEdges sens edges =
case prox of
WeightedLogJaccard _ _ _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard sens docs
(g ^. phylo_groupCooc) (g' ^. phylo_groupCooc)
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
_ -> undefined
toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]] toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]]
...@@ -111,32 +142,54 @@ toParentId :: PhyloGroup -> PhyloGroupId ...@@ -111,32 +142,54 @@ toParentId :: PhyloGroup -> PhyloGroupId
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex) toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex)
reduceBranch :: Proximity -> Double -> Double -> Map Date Double -> [PhyloGroup] -> [PhyloGroup] reduceBranch :: Proximity -> Synchrony -> Map Date Double -> [PhyloGroup] -> [PhyloGroup]
reduceBranch prox thr sens docs branch = reduceBranch prox sync docs branch =
-- | 1) reduce a branch as a set of periods & groups -- | 1) reduce a branch as a set of periods & groups
let periods = fromListWith (++) let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch $ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems) in (concat . concat . elems)
$ mapWithKey (\prd groups -> $ mapWithKey (\prd groups ->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold -- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
let edges = groupsToEdges prox thr sens ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) groups let edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) groups
in map (\comp -> in map (\comp ->
-- | 4) add to each groups their futur level parent group -- | 4) add to each groups their futur level parent group
let parentId = toParentId (head' "parentId" comp) let parentId = toParentId (head' "parentId" comp)
in map (\g -> g & phylo_groupLevelParents %~ (++ [(parentId,1)]) ) comp ) in map (\g -> g & phylo_groupLevelParents %~ (++ [(parentId,1)]) ) comp )
-- |3) reduce the graph a a set of related components -- |3) reduce the graph a a set of related components
$ toRelatedComponents groups edges) periods $ toRelatedComponents groups edges) periods
synchronicClustering :: Phylo -> Phylo synchronicClustering :: Phylo -> Phylo
synchronicClustering phylo = synchronicClustering phylo =
case (phyloSynchrony $ getConfig phylo) of let prox = phyloProximity $ getConfig phylo
ByProximityThreshold t s -> sync = phyloSynchrony $ getConfig phylo
let prox = phyloProximity $ getConfig phylo docs = phylo ^. phylo_timeDocs
docs = phylo ^. phylo_timeDocs branches = map (\branch -> reduceBranch prox sync docs branch)
branches = map (\branch -> reduceBranch prox t s docs branch) $ phyloToLastBranches
$ phyloToLastBranches $ traceSynchronyStart phylo
$ traceSynchronyStart phylo branches' = branches `using` parList rdeepseq
branches' = branches `using` parList rdeepseq in toNextLevel phylo $ concat branches'
in toNextLevel phylo $ concat branches'
ByProximityDistribution -> undefined
\ No newline at end of file ----------------
-- | probes | --
----------------
-- synchronicDistance :: Phylo -> Level -> String
-- synchronicDistance phylo lvl =
-- foldl' (\acc branch ->
-- acc <> (foldl' (\acc' period ->
-- acc' <> let prox = phyloProximity $ getConfig phylo
-- sync = phyloSynchrony $ getConfig phylo
-- docs = _phylo_timeDocs phylo
-- prd = _phylo_groupPeriod $ head' "distance" period
-- edges = groupsToEdges prox 0.1 (_bpt_sensibility sync)
-- ((sum . elems) $ restrictKeys docs $ periodsToYears [_phylo_groupPeriod $ head' "distance" period]) period
-- in foldl' (\mem (_,w) ->
-- mem <> show (prd)
-- <> "\t"
-- <> show (w)
-- <> "\n"
-- ) "" edges
-- ) "" $ elems $ groupByField _phylo_groupPeriod branch)
-- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo
\ No newline at end of file
...@@ -15,7 +15,7 @@ Portability : POSIX ...@@ -15,7 +15,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.TemporalMatching where module Gargantext.Viz.Phylo.TemporalMatching where
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, elemIndex, (!!), dropWhile) import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, elemIndex, (!!), dropWhile, partition)
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, intersectionWith, findWithDefault, filterWithKey) import Data.Map (Map, fromList, elems, restrictKeys, unionWith, intersectionWith, findWithDefault, filterWithKey)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -25,6 +25,7 @@ import Gargantext.Viz.Phylo.PhyloTools ...@@ -25,6 +25,7 @@ import Gargantext.Viz.Phylo.PhyloTools
import Prelude (logBase) import Prelude (logBase)
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
-- import Debug.Trace (trace)
import qualified Data.Set as Set import qualified Data.Set as Set
...@@ -99,42 +100,78 @@ toProximity docs proximity ego target target' = ...@@ -99,42 +100,78 @@ toProximity docs proximity ego target target' =
-- | Find pairs of valuable candidates to be matched -- | Find pairs of valuable candidates to be matched
makePairs :: [PhyloGroup] -> [PhyloPeriodId] -> [(PhyloGroup,PhyloGroup)] makePairs :: [PhyloGroup] -> [PhyloPeriodId] -> [PhyloPeriodId] -> [(PhyloGroup,PhyloGroup)]
makePairs candidates periods = case null periods of makePairs candidates periods periods' = case null periods of
True -> [] True -> []
-- | at least on of the pair candidates should be from the last added period -- | at least on of the pair candidates should be from the last added period
False -> filter (\(cdt,cdt') -> (inLastPeriod cdt periods) False -> filter (\(cdt,cdt') ->
|| (inLastPeriod cdt' periods)) ((inLastPeriod cdt periods) || (inLastPeriod cdt' periods))
&& (not $ inOldPeriods cdt periods')
&& (not $ inOldPeriods cdt' periods'))
$ listToKeys candidates $ listToKeys candidates
where where
inLastPeriod :: PhyloGroup -> [PhyloPeriodId] -> Bool inLastPeriod :: PhyloGroup -> [PhyloPeriodId] -> Bool
inLastPeriod g prds = (g ^. phylo_groupPeriod) == (last' "makePairs" prds) inLastPeriod g prds = (g ^. phylo_groupPeriod) == (last' "makePairs" prds)
--------------------------------------
inOldPeriods :: PhyloGroup -> [PhyloPeriodId] -> Bool
inOldPeriods g prds = elem (g ^. phylo_groupPeriod) prds
keepOldOnes :: Filiation -> Proximity -> Double -> PhyloGroup -> Bool
keepOldOnes fil proxi thr ego = any (\(_,w) -> filterProximity proxi thr w)
$ getPeriodPointers fil ego
filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer]
filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts
findLastPeriod :: Filiation -> [Pointer] -> PhyloPeriodId
findLastPeriod fil pts = case fil of
ToParents -> head' "findLastPeriod" $ sortOn fst $ map (fst . fst . fst) pts
ToChilds -> head' "findLastPeriod" $ reverse $ sortOn fst $ map (fst . fst . fst) pts
phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Proximity -> Map Date Double -> Double-> PhyloGroup -> PhyloGroup
phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Proximity -> Map Date Double -> Double -> PhyloGroup -> PhyloGroup
phyloGroupMatching candidates fil proxi docs thr ego = phyloGroupMatching candidates fil proxi docs thr ego =
case null (getPeriodPointers fil ego) of if keepOldOnes fil proxi thr ego
False -> filterPointers fil TemporalPointer proxi thr ego -- | keep some of the old pointers
True -> case null pointers of then addPointers ego fil TemporalPointer
True -> addPointers ego fil TemporalPointer [] $ filterPointers proxi thr
False -> addPointers ego fil TemporalPointer $ getPeriodPointers fil ego
$ head' "phyloGroupMatching" else case null pointers of
-- | Keep only the best set of pointers grouped by proximity -- | let's find new pointers
$ groupBy (\pt pt' -> snd pt == snd pt') True -> addPointers ego fil TemporalPointer []
$ reverse $ sortOn snd $ head' "pointers" pointers False -> addPointers ego fil TemporalPointer
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold $ head' "phyloGroupMatching"
where -- | Keep only the best set of pointers grouped by proximity
$ groupBy (\pt pt' -> snd pt == snd pt')
$ reverse $ sortOn snd $ head' "pointers" pointers
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
where
--------------------------------------
oldPeriods :: [PhyloPeriodId] -> [PhyloPeriodId]
oldPeriods periods =
if (null $ getPeriodPointers fil ego)
then []
else
let period = findLastPeriod fil $ getPeriodPointers fil ego
in fst $ partition (\prd -> case fil of
ToChilds -> prd <= period
ToParents -> prd >= period ) periods
--------------------------------------
pointers :: [[Pointer]] pointers :: [[Pointer]]
pointers = take 1 pointers = take 1
$ dropWhile (null) $ dropWhile (null)
-- | for each time frame, process the proximity on relevant pairs of targeted groups -- | for each time frame, process the proximity on relevant pairs of targeted groups
$ scanl (\acc groups -> $ scanl (\acc groups ->
let periods = nub let periods = nub
$ concat $ map (\gs -> if null gs $ concat $ map (\gs -> if null gs
then [] then []
else [_phylo_groupPeriod $ head' "pointers" gs]) groups else [_phylo_groupPeriod $ head' "pointers" gs]) groups
pairs = makePairs (concat groups) periods periods' = oldPeriods periods
in acc ++ ( filter (\(_,proximity) -> filterProximity proxi thr proximity) pairs = makePairs (concat groups) periods periods'
in acc ++ ( filterPointers proxi thr
$ concat $ concat
$ map (\(c,c') -> $ map (\(c,c') ->
-- | process the proximity between the current group and a pair of candidates -- | process the proximity between the current group and a pair of candidates
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment