Commit 2da4252e authored by qlobbe's avatar qlobbe

add the ancestors

parent 04aa2d5c
...@@ -143,7 +143,7 @@ defaultConfig = ...@@ -143,7 +143,7 @@ defaultConfig =
, phyloName = pack "Default Phylo" , phyloName = pack "Default Phylo"
, phyloLevel = 2 , phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 , phyloProximity = WeightedLogJaccard 10
, seaElevation = Constante 0.6 1 , seaElevation = Constante 0.1 0.1
, phyloSynchrony = ByProximityThreshold 0.5 10 SiblingBranches MergeAllGroups , phyloSynchrony = ByProximityThreshold 0.5 10 SiblingBranches MergeAllGroups
, phyloQuality = Quality 100 1 , phyloQuality = Quality 100 1
, timeUnit = Year 3 1 5 , timeUnit = Year 3 1 5
...@@ -326,6 +326,7 @@ data PhyloGroup = ...@@ -326,6 +326,7 @@ data PhyloGroup =
, _phylo_groupLevelChilds :: [Pointer] , _phylo_groupLevelChilds :: [Pointer]
, _phylo_groupPeriodParents :: [Pointer] , _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer] , _phylo_groupPeriodChilds :: [Pointer]
, _phylo_groupAncestors :: [Pointer]
} }
deriving (Generic, Show, Eq, NFData) deriving (Generic, Show, Eq, NFData)
...@@ -352,24 +353,13 @@ data PhyloClique = PhyloClique ...@@ -352,24 +353,13 @@ data PhyloClique = PhyloClique
, _phyloClique_period :: (Date,Date) , _phyloClique_period :: (Date,Date)
} deriving (Generic,NFData,Show,Eq) } deriving (Generic,NFData,Show,Eq)
------------------------
-- | Phylo Ancestor | --
------------------------
data PhyloAncestor = PhyloAncestor
{ _phyloAncestor_id :: Int
, _phyloAncestor_ngrams :: [Int]
, _phyloAncestor_groups :: [PhyloGroupId]
} deriving (Generic,NFData,Show,Eq)
---------------- ----------------
-- | Export | -- -- | Export | --
---------------- ----------------
type DotId = TextLazy.Text type DotId = TextLazy.Text
data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | PeriodToPeriod deriving (Show,Generic,Eq) data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | GroupToAncestor | PeriodToPeriod deriving (Show,Generic,Eq)
data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq) data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
...@@ -405,7 +395,6 @@ data PhyloExport = ...@@ -405,7 +395,6 @@ data PhyloExport =
PhyloExport PhyloExport
{ _export_groups :: [PhyloGroup] { _export_groups :: [PhyloGroup]
, _export_branches :: [PhyloBranch] , _export_branches :: [PhyloBranch]
, _export_ancestors :: [PhyloAncestor]
} deriving (Generic, Show) } deriving (Generic, Show)
---------------- ----------------
......
...@@ -137,9 +137,9 @@ actants = [ "Cleopatre" , "Ptolemee", "Ptolemee-XIII", "Ptolemee-XIV", "Ptolem ...@@ -137,9 +137,9 @@ actants = [ "Cleopatre" , "Ptolemee", "Ptolemee-XIII", "Ptolemee-XIV", "Ptolem
corpus :: [(Date, Text)] corpus :: [(Date, Text)]
corpus = sortOn fst [ corpus = sortOn fst [
(-101,"La tutelle de sa mère lui étant difficile à endurer, en septembre 101 av. J.-C., Ptolemee-X la fait assassiner et peut enfin régner presque seul puisqu'il partage le pouvoir avec son épouse Berenice-III Cléopâtre Philopator."), (-101,"La tutelle Cesar Caesar-III de sa mère lui étant difficile à endurer, en septembre 101 av. J.-C., Ptolemee-X la fait assassiner et peut enfin régner presque seul puisqu'il partage le pouvoir avec son épouse Berenice-III Cléopâtre Philopator."),
(-99,"Caesar-III est questeur en 99 av. J.-C. ou 98 av. J.-C., et préteur en 92 av. J.-C.."), (-99,"Caesar-III est questeur en 99 av. J.-C. ou 98 av. J.-C., et préteur en 92 av. J.-C.."),
(-100,"Caius Julius Caesar-IV — dit Jules Cesar — naît vers 100 av. J.-C., il est le fils de Caius Julius Caesar-III et de Aurelia-Cotta"), (-100,"Caius Julius Caesar-IV — dit Jules Cesar Ptolemee-X — naît vers 100 av. J.-C., il est le fils de Caius Julius Caesar-III et de Aurelia-Cotta"),
(-85,"Caesar-III meurt à Pisae de cause naturelle en 85 av. J.-C. : selon Pline l'Ancien, il décède brusquement en mettant ses chaussures"), (-85,"Caesar-III meurt à Pisae de cause naturelle en 85 av. J.-C. : selon Pline l'Ancien, il décède brusquement en mettant ses chaussures"),
(-53,"Aurelia-Cotta décède peu de temps avant le meurtre de Clodius Pulcher, vers 53 av. J.-C."), (-53,"Aurelia-Cotta décède peu de temps avant le meurtre de Clodius Pulcher, vers 53 av. J.-C."),
(-51,"Cleopatre règne sur l’egypte entre 51 et 30 av. J.-C. avec ses frères-epoux Ptolemee-XIII et Ptolemee-XIV, puis aux côtes du general romain Marc-Antoine. Elle est celèbre pour avoir ete la compagne de Jules Cesar puis d'Antoine, avec lesquels elle a eu plusieurs enfants. Partie prenante dans la guerre civile opposant Antoine à Octave, elle est vaincue à la bataille d'Actium en 31 av. J.-C. Sa defaite va permettre aux Romains de mener à bien la conquête de l’egypte, evenement qui marquera la fin de l'epoque hellenistique."), (-51,"Cleopatre règne sur l’egypte entre 51 et 30 av. J.-C. avec ses frères-epoux Ptolemee-XIII et Ptolemee-XIV, puis aux côtes du general romain Marc-Antoine. Elle est celèbre pour avoir ete la compagne de Jules Cesar puis d'Antoine, avec lesquels elle a eu plusieurs enfants. Partie prenante dans la guerre civile opposant Antoine à Octave, elle est vaincue à la bataille d'Actium en 31 av. J.-C. Sa defaite va permettre aux Romains de mener à bien la conquête de l’egypte, evenement qui marquera la fin de l'epoque hellenistique."),
......
...@@ -18,15 +18,17 @@ Portability : POSIX ...@@ -18,15 +18,17 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloExport where module Gargantext.Viz.Phylo.PhyloExport where
import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList) import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy, inits, elemIndex) import Data.List ((++), sort, nub, null, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
import Data.Vector (Vector) import Data.Vector (Vector)
import Prelude (writeFile) import Prelude (writeFile)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
import Control.Lens import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.GraphViz hiding (DotGraph, Order) import Data.GraphViz hiding (DotGraph, Order)
import Data.GraphViz.Types.Generalised (DotGraph) import Data.GraphViz.Types.Generalised (DotGraph)
import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order) import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
...@@ -35,7 +37,6 @@ import Data.Text.Lazy (fromStrict, pack, unpack) ...@@ -35,7 +37,6 @@ import Data.Text.Lazy (fromStrict, pack, unpack)
import System.FilePath import System.FilePath
import Debug.Trace (trace) import Debug.Trace (trace)
import qualified Data.Map as Map
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
import qualified Data.Text.Lazy as Lazy import qualified Data.Text.Lazy as Lazy
...@@ -154,6 +155,8 @@ toDotEdge source target lbl edgeType = edge source target ...@@ -154,6 +155,8 @@ toDotEdge source target lbl edgeType = edge source target
, Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "branchLink" ] , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "branchLink" ]
BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)]) BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
, Label (StrLabel $ fromStrict lbl)] , Label (StrLabel $ fromStrict lbl)]
GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)])
, Label (StrLabel $ fromStrict lbl), PenWidth 4] <> [toAttr "edgeType" "ancestorLink" ]
PeriodToPeriod -> [ Width 5, Color [toWColor Black]]) PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
...@@ -163,6 +166,11 @@ mergePointers groups = ...@@ -163,6 +166,11 @@ mergePointers groups =
toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
in unionWith (\w w' -> max w w') toChilds toParents in unionWith (\w w' -> max w w') toChilds toParents
mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
mergeAncestors groups = concat
$ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
$ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
toBid :: PhyloGroup -> [PhyloBranch] -> Int toBid :: PhyloGroup -> [PhyloBranch] -> Int
toBid g bs = toBid g bs =
...@@ -235,7 +243,11 @@ exportToDot phylo export = ...@@ -235,7 +243,11 @@ exportToDot phylo export =
toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
) $ (toList . mergePointers) $ export ^. export_groups ) $ (toList . mergePointers) $ export ^. export_groups
-- | 7) create the edges between the periods _ <- mapM (\((k,k'),_) ->
toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToAncestor
) $ mergeAncestors $ export ^. export_groups
-- | 10) create the edges between the periods
_ <- mapM (\(prd,prd') -> _ <- mapM (\(prd,prd') ->
toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
...@@ -261,9 +273,9 @@ exportToDot phylo export = ...@@ -261,9 +273,9 @@ exportToDot phylo export =
filterByBranchSize :: Double -> PhyloExport -> PhyloExport filterByBranchSize :: Double -> PhyloExport -> PhyloExport
filterByBranchSize thr export = filterByBranchSize thr export =
let branches' = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
in export & export_branches .~ (fst branches') in export & export_branches .~ (fst splited)
& export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd branches'))) & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
...@@ -481,22 +493,77 @@ processDynamics groups = ...@@ -481,22 +493,77 @@ processDynamics groups =
-- | horizon | -- -- | horizon | --
----------------- -----------------
horizonToAncestors :: Double -> Phylo -> [PhyloAncestor] getGroupThr :: Double -> PhyloGroup -> Double
horizonToAncestors delta phylo = getGroupThr step g =
let horizon = Map.toList $ Map.filter (\v -> v > delta) $ phylo ^. phylo_horizon let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
ct0 = fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevelPeriods 1 (take 1 (getPeriodIds phylo)) phylo breaks = (g ^. phylo_groupMeta) ! "breaks"
aDelta = toRelatedComponents in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
(elems ct0)
(map (\((g,g'),v) -> ((ct0 ! g,ct0 ! g'),v)) horizon) toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
in map (\(id,groups) -> toAncestor id groups) $ zip [1..] aDelta toAncestor nbDocs diago proximity step candidates ego =
let curr = ego ^. phylo_groupAncestors
in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
$ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
$ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
$ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
headsToAncestors nbDocs diago proximity step heads acc =
if (null heads)
then acc
else
let ego = head' "headsToAncestors" heads
heads' = tail' "headsToAncestors" heads
in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
toHorizon :: Phylo -> Phylo
toHorizon phylo =
let phyloAncestor = updatePhyloGroups
level
(fromList $ map (\g -> (getGroupId g, g))
$ concat
$ tracePhyloAncestors newGroups) phylo
reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
$ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
in updatePhyloGroups level reBranched phylo
where where
-- | note : possible bug if we sync clus more than once -- | 1) for each periods
-- | horizon is calculated at level 1, ancestors have to be related to the last level periods :: [PhyloPeriodId]
toAncestor :: Int -> [PhyloGroup] -> PhyloAncestor periods = getPeriodIds phylo
toAncestor id groups = PhyloAncestor id -- --
(foldl' (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups) level :: Level
(concat $ map (\g -> map fst (g ^. phylo_groupLevelParents)) groups) level = getLastLevel phylo
-- --
frame :: Int
frame = getTimeFrame $ timeUnit $ getConfig phylo
-- | 2) find ancestors between groups without parents
mapGroups :: [[PhyloGroup]]
mapGroups = map (\prd ->
let groups = getGroupsFromLevelPeriods level [prd] phylo
childs = getPreviousChildIds level frame prd periods phylo
heads = filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
noHeads = groups \\ heads
nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
proximity = (phyloProximity $ getConfig phylo)
step = case getSeaElevation phylo of
Constante _ s -> s
Adaptative _ -> undefined
-- in headsToAncestors nbDocs diago proximity heads groups []
in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
$ headsToAncestors nbDocs diago proximity step heads []
) periods
-- | 3) process this task concurrently
newGroups :: [[PhyloGroup]]
newGroups = mapGroups `using` parList rdeepseq
--------------------------------------
getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
getPreviousChildIds lvl frame curr prds phylo =
concat $ map ((map fst) . _phylo_groupPeriodChilds)
$ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
--------------------- ---------------------
-- | phyloExport | -- -- | phyloExport | --
...@@ -510,7 +577,7 @@ toPhyloExport phylo = exportToDot phylo ...@@ -510,7 +577,7 @@ toPhyloExport phylo = exportToDot phylo
$ processMetrics export $ processMetrics export
where where
export :: PhyloExport export :: PhyloExport
export = PhyloExport groups branches (horizonToAncestors 0 phylo) export = PhyloExport groups branches
-------------------------------------- --------------------------------------
branches :: [PhyloBranch] branches :: [PhyloBranch]
branches = map (\g -> branches = map (\g ->
...@@ -533,13 +600,19 @@ toPhyloExport phylo = exportToDot phylo ...@@ -533,13 +600,19 @@ toPhyloExport phylo = exportToDot phylo
groups = traceExportGroups groups = traceExportGroups
$ processDynamics $ processDynamics
$ getGroupsFromLevel (phyloLevel $ getConfig phylo) $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
$ tracePhyloInfo phylo $ tracePhyloInfo
$ toHorizon phylo
traceExportBranches :: [PhyloBranch] -> [PhyloBranch] traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
traceExportBranches branches = trace ("\n" traceExportBranches branches = trace ("\n"
<> "-- | Export " <> show(length branches) <> " branches") branches <> "-- | Export " <> show(length branches) <> " branches") branches
tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
tracePhyloAncestors groups = trace ("\n"
<> "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors"
) groups
tracePhyloInfo :: Phylo -> Phylo tracePhyloInfo :: Phylo -> Phylo
tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = " tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = "
<> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to " <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
......
...@@ -36,7 +36,6 @@ import Control.Lens hiding (Level) ...@@ -36,7 +36,6 @@ import Control.Lens hiding (Level)
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
import qualified Data.Set as Set import qualified Data.Set as Set
------------------ ------------------
-- | To Phylo | -- -- | To Phylo | --
------------------ ------------------
...@@ -119,7 +118,7 @@ cliqueToGroup fis pId lvl idx coocs = PhyloGroup pId lvl idx "" ...@@ -119,7 +118,7 @@ cliqueToGroup fis pId lvl idx coocs = PhyloGroup pId lvl idx ""
(ngramsToCooc (fis ^. phyloClique_nodes) coocs) (ngramsToCooc (fis ^. phyloClique_nodes) coocs)
(1,[0]) -- | branchid (lvl,[path in the branching tree]) (1,[0]) -- | branchid (lvl,[path in the branching tree])
(fromList [("breaks",[0]),("seaLevels",[0])]) (fromList [("breaks",[0]),("seaLevels",[0])])
[] [] [] [] [] [] [] [] []
toPhylo1 :: [Document] -> Phylo -> Phylo toPhylo1 :: [Document] -> Phylo -> Phylo
......
...@@ -17,9 +17,9 @@ Portability : POSIX ...@@ -17,9 +17,9 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloTools where module Gargantext.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex) import Data.Vector (Vector, elemIndex)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy) import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, maximum, group)
import Data.Set (Set, disjoint) import Data.Set (Set, disjoint)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty) import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
import Data.String (String) import Data.String (String)
import Data.Text (Text, unwords) import Data.Text (Text, unwords)
...@@ -257,22 +257,19 @@ ngramsToCooc ngrams coocs = ...@@ -257,22 +257,19 @@ ngramsToCooc ngrams coocs =
-------------------- --------------------
getGroupId :: PhyloGroup -> PhyloGroupId getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex) getGroupId g = ((g ^. phylo_groupPeriod, g ^. phylo_groupLevel), g ^. phylo_groupIndex)
idToPrd :: PhyloGroupId -> PhyloPeriodId idToPrd :: PhyloGroupId -> PhyloPeriodId
idToPrd id = (fst . fst) id idToPrd id = (fst . fst) id
getGroupThr :: PhyloGroup -> Double
getGroupThr group = last' "getGroupThr" ((group ^. phylo_groupMeta) ! "breaks")
groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup] groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup]
groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups
getPeriodPointers :: Filiation -> PhyloGroup -> [Pointer] getPeriodPointers :: Filiation -> PhyloGroup -> [Pointer]
getPeriodPointers fil group = getPeriodPointers fil g =
case fil of case fil of
ToChilds -> group ^. phylo_groupPeriodChilds ToChilds -> g ^. phylo_groupPeriodChilds
ToParents -> group ^. phylo_groupPeriodParents ToParents -> g ^. phylo_groupPeriodParents
filterProximity :: Proximity -> Double -> Double -> Bool filterProximity :: Proximity -> Double -> Double -> Bool
filterProximity proximity thr local = filterProximity proximity thr local =
...@@ -291,14 +288,14 @@ getProximityName proximity = ...@@ -291,14 +288,14 @@ getProximityName proximity =
--------------- ---------------
addPointers :: Filiation -> PointerType -> [Pointer] -> PhyloGroup -> PhyloGroup addPointers :: Filiation -> PointerType -> [Pointer] -> PhyloGroup -> PhyloGroup
addPointers fil pty pointers group = addPointers fil pty pointers g =
case pty of case pty of
TemporalPointer -> case fil of TemporalPointer -> case fil of
ToChilds -> group & phylo_groupPeriodChilds .~ pointers ToChilds -> g & phylo_groupPeriodChilds .~ pointers
ToParents -> group & phylo_groupPeriodParents .~ pointers ToParents -> g & phylo_groupPeriodParents .~ pointers
LevelPointer -> case fil of LevelPointer -> case fil of
ToChilds -> group & phylo_groupLevelChilds .~ pointers ToChilds -> g & phylo_groupLevelChilds .~ pointers
ToParents -> group & phylo_groupLevelParents .~ pointers ToParents -> g & phylo_groupLevelParents .~ pointers
getPeriodIds :: Phylo -> [(Date,Date)] getPeriodIds :: Phylo -> [(Date,Date)]
...@@ -375,12 +372,12 @@ updatePhyloGroups lvl m phylo = ...@@ -375,12 +372,12 @@ updatePhyloGroups lvl m phylo =
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl) . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups . phylo_levelGroups
. traverse . traverse
) (\group -> ) (\g ->
let id = getGroupId group let id = getGroupId g
in in
if member id m if member id m
then m ! id then m ! id
else group ) phylo else g ) phylo
traceToPhylo :: Level -> Phylo -> Phylo traceToPhylo :: Level -> Phylo -> Phylo
...@@ -393,6 +390,43 @@ traceToPhylo lvl phylo = ...@@ -393,6 +390,43 @@ traceToPhylo lvl phylo =
-- | Clustering | -- -- | Clustering | --
-------------------- --------------------
mergeBranchIds :: [[Int]] -> [Int]
mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
where
-- | 2) find the most Up Left ids in the hierarchy of similarity
-- mostUpLeft :: [[Int]] -> [[Int]]
-- mostUpLeft ids' =
-- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
-- inf = (fst . minimum) groupIds
-- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
-- | 1) find the most frequent ids
mostFreq' :: [[Int]] -> [[Int]]
mostFreq' ids' =
let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
sup = (fst . maximum) groupIds
in map snd $ filter (\gIds -> fst gIds == sup) groupIds
mergeMeta :: [Int] -> [PhyloGroup] -> Map Text [Double]
mergeMeta bId groups =
let ego = head' "mergeMeta" $ filter (\g -> (snd (g ^. phylo_groupBranchId)) == bId) groups
in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")]
groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches groups =
-- | run the related component algorithm
let egos = map (\g -> [getGroupId g]
++ (map fst $ g ^. phylo_groupPeriodParents)
++ (map fst $ g ^. phylo_groupPeriodChilds)
++ (map fst $ g ^. phylo_groupAncestors)) $ elems groups
graph = relatedComponents egos
-- | update each group's branch id
in map (\ids ->
let groups' = elems $ restrictKeys groups (Set.fromList ids)
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl,bId))) groups') graph
relatedComponents :: Ord a => [[a]] -> [[a]] relatedComponents :: Ord a => [[a]] -> [[a]]
relatedComponents graph = foldl' (\acc groups -> relatedComponents graph = foldl' (\acc groups ->
if (null acc) if (null acc)
......
...@@ -21,58 +21,20 @@ import Gargantext.Viz.Phylo.PhyloTools ...@@ -21,58 +21,20 @@ import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos) import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
import Gargantext.Viz.Phylo.PhyloExport (processDynamics) import Gargantext.Viz.Phylo.PhyloExport (processDynamics)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn, all, groupBy, group, maximum) import Data.List ((++), null, intersect, nub, concat, sort, sortOn, all, groupBy)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member) import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
import Data.Text (Text)
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
-- import Debug.Trace (trace) -- import Debug.Trace (trace)
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set
------------------------- -------------------------
-- | New Level Maker | -- -- | New Level Maker | --
------------------------- -------------------------
mergeBranchIds :: [[Int]] -> [Int]
mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
where
-- | 2) find the most Up Left ids in the hierarchy of similarity
-- mostUpLeft :: [[Int]] -> [[Int]]
-- mostUpLeft ids' =
-- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
-- inf = (fst . minimum) groupIds
-- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
-- | 1) find the most frequent ids
mostFreq' :: [[Int]] -> [[Int]]
mostFreq' ids' =
let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
sup = (fst . maximum) groupIds
in map snd $ filter (\gIds -> fst gIds == sup) groupIds
mergeMeta :: [Int] -> [PhyloGroup] -> Map Text [Double]
mergeMeta bId groups =
let ego = head' "mergeMeta" $ filter (\g -> (snd (g ^. phylo_groupBranchId)) == bId) groups
in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")]
groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches' groups =
-- | run the related component algorithm
let egos = map (\g -> [getGroupId g]
++ (map fst $ g ^. phylo_groupPeriodParents)
++ (map fst $ g ^. phylo_groupPeriodChilds) ) $ elems groups
graph = relatedComponents egos
-- | update each group's branch id
in map (\ids ->
let groups' = elems $ restrictKeys groups (Set.fromList ids)
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl,bId))) groups') graph
mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
mergeGroups coocs id mapIds childs = mergeGroups coocs id mapIds childs =
...@@ -84,6 +46,7 @@ mergeGroups coocs id mapIds childs = ...@@ -84,6 +46,7 @@ mergeGroups coocs id mapIds childs =
(mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs) (mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs)
(updatePointers $ concat $ map _phylo_groupPeriodParents childs) (updatePointers $ concat $ map _phylo_groupPeriodParents childs)
(updatePointers $ concat $ map _phylo_groupPeriodChilds childs) (updatePointers $ concat $ map _phylo_groupPeriodChilds childs)
[]
where where
-------------------- --------------------
bId :: [Int] bId :: [Int]
...@@ -104,7 +67,7 @@ toNextLevel' :: Phylo -> [PhyloGroup] -> Phylo ...@@ -104,7 +67,7 @@ toNextLevel' :: Phylo -> [PhyloGroup] -> Phylo
toNextLevel' phylo groups = toNextLevel' phylo groups =
let curLvl = getLastLevel phylo let curLvl = getLastLevel phylo
oldGroups = fromList $ map (\g -> (getGroupId g, getLevelParentId g)) groups oldGroups = fromList $ map (\g -> (getGroupId g, getLevelParentId g)) groups
newGroups = concat $ groupsToBranches' newGroups = concat $ groupsToBranches
$ fromList $ map (\g -> (getGroupId g, g)) $ fromList $ map (\g -> (getGroupId g, g))
$ foldlWithKey (\acc id groups' -> $ foldlWithKey (\acc id groups' ->
-- | 4) create the parent group -- | 4) create the parent group
......
...@@ -345,8 +345,8 @@ toPhyloQuality beta freq branches = ...@@ -345,8 +345,8 @@ toPhyloQuality beta freq branches =
------------------------------------ ------------------------------------
groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]] groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches groups = groupsToBranches' groups =
-- | run the related component algorithm -- | run the related component algorithm
let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs')) let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
$ sortOn (\gs -> fst $ fst $ head' "egos" gs) $ sortOn (\gs -> fst $ fst $ head' "egos" gs)
...@@ -413,7 +413,7 @@ breakBranches proximity beta frequency minBranch thr depth elevation frame docs ...@@ -413,7 +413,7 @@ breakBranches proximity beta frequency minBranch thr depth elevation frame docs
-------------------------------------- --------------------------------------
ego' :: ([[PhyloGroup]],[[PhyloGroup]]) ego' :: ([[PhyloGroup]],[[PhyloGroup]])
ego' = ego' =
let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) let branches = groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego) $ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego)
branches' = branches `using` parList rdeepseq branches' = branches `using` parList rdeepseq
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch) in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
...@@ -470,7 +470,7 @@ constanteTemporalMatching start step phylo = updatePhyloGroups 1 ...@@ -470,7 +470,7 @@ constanteTemporalMatching start step phylo = updatePhyloGroups 1
-- | here we suppose that all the groups of level 1 are part of the same big branch -- | here we suppose that all the groups of level 1 are part of the same big branch
groups :: [([PhyloGroup],Bool)] groups :: [([PhyloGroup],Bool)]
groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo))) groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
$ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo) $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (phyloProximity $ getConfig phylo) (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
start start
...@@ -569,7 +569,7 @@ adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency min ...@@ -569,7 +569,7 @@ adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency min
-------------------------------------- --------------------------------------
ego' :: ([[PhyloGroup]],[[PhyloGroup]]) ego' :: ([[PhyloGroup]],[[PhyloGroup]])
ego' = ego' =
let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) let branches = groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups frame periods proxiConf thr docs coocs (fst ego) $ matchGroupsToGroups frame periods proxiConf thr docs coocs (fst ego)
branches' = branches `using` parList rdeepseq branches' = branches `using` parList rdeepseq
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) > minBranch) in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) > minBranch)
...@@ -627,7 +627,7 @@ adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1 ...@@ -627,7 +627,7 @@ adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
-- | here we suppose that all the groups of level 1 are part of the same big branch -- | here we suppose that all the groups of level 1 are part of the same big branch
groups :: [([PhyloGroup],(Bool,[Double]))] groups :: [([PhyloGroup],(Bool,[Double]))]
groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr]))) groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr])))
$ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo) $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (phyloProximity $ getConfig phylo) (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
thr thr
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment