Commit 8faf7d6d authored by qlobbe's avatar qlobbe

refactoring temporal matching

parent ebcee352
Pipeline #3229 failed with stage
in 72 minutes and 50 seconds
...@@ -31,6 +31,7 @@ import Control.Lens (makeLenses) ...@@ -31,6 +31,7 @@ import Control.Lens (makeLenses)
import Data.Aeson import Data.Aeson
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Map (Map) import Data.Map (Map)
import Data.Set (Set)
import Data.Swagger import Data.Swagger
import Data.Text (Text, pack) import Data.Text (Text, pack)
import Data.Vector (Vector) import Data.Vector (Vector)
...@@ -63,9 +64,9 @@ instance ToSchema ListParser ...@@ -63,9 +64,9 @@ instance ToSchema ListParser
data SeaElevation = data SeaElevation =
Constante Constante
{ _cons_start :: Double { _cons_start :: Double
, _cons_step :: Double } , _cons_gap :: Double }
| Adaptative | Adaptative
{ _adap_granularity :: Double } { _adap_steps :: Double }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
instance ToSchema SeaElevation instance ToSchema SeaElevation
...@@ -306,8 +307,8 @@ instance ToSchema Software where ...@@ -306,8 +307,8 @@ instance ToSchema Software where
defaultSoftware :: Software defaultSoftware :: Software
defaultSoftware = defaultSoftware =
Software { _software_name = pack "Gargantext" Software { _software_name = pack "GarganText"
, _software_version = pack "v4" } , _software_version = pack "v5" }
-- | Global parameters of a Phylo -- | Global parameters of a Phylo
...@@ -324,7 +325,7 @@ instance ToSchema PhyloParam where ...@@ -324,7 +325,7 @@ instance ToSchema PhyloParam where
defaultPhyloParam :: PhyloParam defaultPhyloParam :: PhyloParam
defaultPhyloParam = defaultPhyloParam =
PhyloParam { _phyloParam_version = pack "v2.adaptative" PhyloParam { _phyloParam_version = pack "v3"
, _phyloParam_software = defaultSoftware , _phyloParam_software = defaultSoftware
, _phyloParam_config = defaultConfig } , _phyloParam_config = defaultConfig }
...@@ -409,8 +410,7 @@ data Phylo = ...@@ -409,8 +410,7 @@ data Phylo =
, _phylo_timeDocs :: !(Map Date Double) , _phylo_timeDocs :: !(Map Date Double)
, _phylo_termFreq :: !(Map Int Double) , _phylo_termFreq :: !(Map Int Double)
, _phylo_lastTermFreq :: !(Map Int Double) , _phylo_lastTermFreq :: !(Map Int Double)
, _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double) , _phylo_diaSimScan :: Set Double
, _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
, _phylo_param :: PhyloParam , _phylo_param :: PhyloParam
, _phylo_periods :: Map Period PhyloPeriod , _phylo_periods :: Map Period PhyloPeriod
, _phylo_quality :: Double , _phylo_quality :: Double
......
...@@ -27,9 +27,10 @@ import Gargantext.Core.Viz.Phylo.PhyloExport ...@@ -27,9 +27,10 @@ import Gargantext.Core.Viz.Phylo.PhyloExport
import Gargantext.Core.Viz.Phylo.PhyloMaker import Gargantext.Core.Viz.Phylo.PhyloMaker
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching) import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
import qualified Data.Set as Set
--------------------------------- ---------------------------------
-- | STEP 5 | -- Export the phylo -- | STEP 5 | -- Export the phylo
...@@ -54,13 +55,14 @@ phyloCleopatre = synchronicClustering $ toHorizon flatPhylo ...@@ -54,13 +55,14 @@ phyloCleopatre = synchronicClustering $ toHorizon flatPhylo
flatPhylo :: Phylo flatPhylo :: Phylo
flatPhylo = case (getSeaElevation emptyPhylo) of flatPhylo = case (getSeaElevation emptyPhylo) of
Constante s g -> constanteTemporalMatching s g Constante s g -> temporalMatching (constDiachronicLadder s g Set.empty)
$ toGroupsProxi 1 $ scanSimilarity 1
$ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
Adaptative s -> adaptativeTemporalMatching s
$ toGroupsProxi 1
$ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo $ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
Adaptative s -> temporalMatching (adaptDiachronicLadder s (emptyPhylo' ^. phylo_diaSimScan) Set.empty) emptyPhylo'
emptyPhylo' :: Phylo
emptyPhylo' = scanSimilarity 1
$ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
--------------------------------------------- ---------------------------------------------
-- | STEP 2 | -- Build the cliques -- | STEP 2 | -- Build the cliques
...@@ -102,6 +104,7 @@ config :: PhyloConfig ...@@ -102,6 +104,7 @@ config :: PhyloConfig
config = config =
defaultConfig { phyloName = "Cesar et Cleopatre" defaultConfig { phyloName = "Cesar et Cleopatre"
, phyloScale = 2 , phyloScale = 2
, seaElevation = Adaptative 4
, exportFilter = [ByBranchSize 0] , exportFilter = [ByBranchSize 0]
, clique = MaxClique 0 15 ByNeighbours } , clique = MaxClique 0 15 ByNeighbours }
......
...@@ -546,9 +546,10 @@ processLabels labels foundations freq export = ...@@ -546,9 +546,10 @@ processLabels labels foundations freq export =
-- | Dynamics | -- -- | Dynamics | --
------------------ ------------------
-- utiliser & creer une Map FdtId [PhyloGroup]
toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double -- n = index of the current term
toDynamics n parents g m = toDynamics :: FdtId -> [PhyloGroup] -> PhyloGroup -> Map FdtId (Date,Date) -> Double
toDynamics n elders g m =
let prd = g ^. phylo_groupPeriod let prd = g ^. phylo_groupPeriod
end = last' "dynamics" (sort $ map snd $ elems m) end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end)) in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
...@@ -564,18 +565,18 @@ toDynamics n parents g m = ...@@ -564,18 +565,18 @@ toDynamics n parents g m =
where where
-------------------------------------- --------------------------------------
isNew :: Bool isNew :: Bool
isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders
type FdtId = Int
processDynamics :: [PhyloGroup] -> [PhyloGroup] processDynamics :: [PhyloGroup] -> [PhyloGroup]
processDynamics groups = processDynamics groups =
map (\g -> map (\g ->
let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId) let elders = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
&& ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n elders g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
where where
-------------------------------------- --------------------------------------
mapNgrams :: Map Int (Date,Date) mapNgrams :: Map FdtId (Date,Date)
mapNgrams = map (\dates -> mapNgrams = map (\dates ->
let dates' = sort dates let dates' = sort dates
in (head' "dynamics" dates', last' "dynamics" dates')) in (head' "dynamics" dates', last' "dynamics" dates'))
...@@ -621,7 +622,7 @@ toHorizon phylo = ...@@ -621,7 +622,7 @@ toHorizon phylo =
$ concat $ concat
$ tracePhyloAncestors newGroups) phylo $ tracePhyloAncestors newGroups) phylo
reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
$ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel scale phyloAncestor $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromScale scale phyloAncestor
in updatePhyloGroups scale reBranched phylo in updatePhyloGroups scale reBranched phylo
where where
-- | 1) for each periods -- | 1) for each periods
...@@ -636,7 +637,7 @@ toHorizon phylo = ...@@ -636,7 +637,7 @@ toHorizon phylo =
-- | 2) find ancestors between groups without parents -- | 2) find ancestors between groups without parents
mapGroups :: [[PhyloGroup]] mapGroups :: [[PhyloGroup]]
mapGroups = map (\prd -> mapGroups = map (\prd ->
let groups = getGroupsFromLevelPeriods scale [prd] phylo let groups = getGroupsFromScalePeriods scale [prd] phylo
childs = getPreviousChildIds scale frame prd periods phylo childs = getPreviousChildIds scale frame prd periods phylo
-- maybe add a better filter for non isolated ancestors -- maybe add a better filter for non isolated ancestors
heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds)) heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
...@@ -660,7 +661,7 @@ toHorizon phylo = ...@@ -660,7 +661,7 @@ toHorizon phylo =
getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId] getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
getPreviousChildIds lvl frame curr prds phylo = getPreviousChildIds lvl frame curr prds phylo =
concat $ map ((map fst) . _phylo_groupPeriodChilds) concat $ map ((map fst) . _phylo_groupPeriodChilds)
$ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo $ getGroupsFromScalePeriods lvl (getNextPeriods ToParents frame curr prds) phylo
--------------------- ---------------------
-- | phyloExport | -- -- | phyloExport | --
...@@ -695,10 +696,10 @@ toPhyloExport phylo = exportToDot phylo ...@@ -695,10 +696,10 @@ toPhyloExport phylo = exportToDot phylo
-------------------------------------- --------------------------------------
groups :: [PhyloGroup] groups :: [PhyloGroup]
groups = traceExportGroups groups = traceExportGroups
-- necessaire ?
$ processDynamics $ processDynamics
$ getGroupsFromLevel (phyloScale $ getConfig phylo) $ getGroupsFromScale (phyloScale $ getConfig phylo)
$ tracePhyloInfo phylo $ tracePhyloInfo phylo
-- \$ toHorizon phylo
traceExportBranches :: [PhyloBranch] -> [PhyloBranch] traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
......
...@@ -17,9 +17,11 @@ import Control.Lens hiding (Level) ...@@ -17,9 +17,11 @@ import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail) import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert) import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert)
import Data.Set (Set)
import Data.Text (Text) import Data.Text (Text)
import Data.Vector (Vector) import Data.Vector (Vector)
import Debug.Trace (trace) import Debug.Trace (trace)
import Prelude (floor)
import Gargantext.Core.Methods.Distances (Distance(Conditional)) import Gargantext.Core.Methods.Distances (Distance(Conditional))
import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques) import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
...@@ -29,7 +31,7 @@ import Gargantext.Core.Viz.Phylo ...@@ -29,7 +31,7 @@ import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon) import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity) import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Set as Set import qualified Data.Set as Set
...@@ -42,7 +44,7 @@ import qualified Data.Vector as Vector ...@@ -42,7 +44,7 @@ import qualified Data.Vector as Vector
{- {-
-- TODO AD -- TODO AD
data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo} data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo}
| PhyloN { _phylo'_flatPhylo :: Phylo} | PhyloN { _phylo'_flatPhylo :: Phylo}
toPhylo' :: Phylo' -> [Document] -> TermList -> PhyloConfig -> Phylo toPhylo' :: Phylo' -> [Document] -> TermList -> PhyloConfig -> Phylo
...@@ -50,12 +52,14 @@ toPhylo' (PhyloN phylo) = toPhylo' ...@@ -50,12 +52,14 @@ toPhylo' (PhyloN phylo) = toPhylo'
toPhylo' (PhyloBase phylo) = toPhylo toPhylo' (PhyloBase phylo) = toPhylo
-} -}
-- TODO an adaptative synchronic clustering with a slider
toPhylo :: Phylo -> Phylo toPhylo :: Phylo -> Phylo
toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGroupsFromLevel 1 flatPhylo)) toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGroupsFromScale 1 flatPhylo))
$ traceToPhylo (phyloScale $ getConfig phylowithoutLink) $ $ traceToPhylo (phyloScale $ getConfig phylowithoutLink) $
if (phyloScale $ getConfig phylowithoutLink) > 1 if (phyloScale $ getConfig phylowithoutLink) > 1
then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloScale $ getConfig phylowithoutLink)] then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloScale $ getConfig phylowithoutLink)]
else flatPhylo else phyloAncestors
where where
-------------------------------------- --------------------------------------
phyloAncestors :: Phylo phyloAncestors :: Phylo
...@@ -69,14 +73,42 @@ toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGrou ...@@ -69,14 +73,42 @@ toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGrou
-------------------------------------- --------------------------------------
-------------------- -----------------------------
-- | To Phylo 1 | -- -- | Create a flat Phylo | --
-------------------- -----------------------------
{-
-- create an adaptative diachronic 'sea elevation' ladder
-}
adaptDiachronicLadder :: Double -> Set Double -> Set Double -> [Double]
adaptDiachronicLadder curr similarities ladder =
if curr <= 0 || Set.null similarities
then Set.toList ladder
else
let idx = ((Set.size similarities) `div` (floor curr)) - 1
thr = Set.elemAt idx similarities
-- we use a sliding methods 1/10, then 1/9, then ... 1/2
in adaptDiachronicLadder (curr -1) (Set.filter (> thr) similarities) (Set.insert thr ladder)
{-
-- create a constante diachronic 'sea elevation' ladder
-}
constDiachronicLadder :: Double -> Double -> Set Double -> [Double]
constDiachronicLadder curr step ladder =
if curr > 1
then Set.toList ladder
else constDiachronicLadder (curr + step) step (Set.insert curr ladder)
toGroupsProxi :: Scale -> Phylo -> Phylo
toGroupsProxi lvl phylo = {-
-- process an initial scanning of the kinship links
-}
scanSimilarity :: Scale -> Phylo -> Phylo
scanSimilarity lvl phylo =
let proximity = phyloProximity $ getConfig phylo let proximity = phyloProximity $ getConfig phylo
groupsProxi = foldlWithKey (\acc pId pds -> scanning = foldlWithKey (\acc pId pds ->
-- 1) process period by period -- 1) process period by period
let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams))
$ elems $ elems
...@@ -84,7 +116,7 @@ toGroupsProxi lvl phylo = ...@@ -84,7 +116,7 @@ toGroupsProxi lvl phylo =
. traverse . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl) . traverse . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
. phylo_scaleGroups ) pds . phylo_scaleGroups ) pds
next = getNextPeriods ToParents (getTimeFrame $ timeUnit $ getConfig phylo) pId (keys $ phylo ^. phylo_periods) next = getNextPeriods ToParents (getTimeFrame $ timeUnit $ getConfig phylo) pId (keys $ phylo ^. phylo_periods)
targets = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ getGroupsFromLevelPeriods lvl next phylo targets = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ getGroupsFromScalePeriods lvl next phylo
docs = filterDocs (phylo ^. phylo_timeDocs) ([pId] ++ next) docs = filterDocs (phylo ^. phylo_timeDocs) ([pId] ++ next)
diagos = filterDiago (phylo ^. phylo_timeCooc) ([pId] ++ next) diagos = filterDiago (phylo ^. phylo_timeCooc) ([pId] ++ next)
-- 2) compute the pairs in parallel -- 2) compute the pairs in parallel
...@@ -98,7 +130,8 @@ toGroupsProxi lvl phylo = ...@@ -98,7 +130,8 @@ toGroupsProxi lvl phylo =
pairs' = pairs `using` parList rdeepseq pairs' = pairs `using` parList rdeepseq
in acc ++ (concat pairs') in acc ++ (concat pairs')
) [] $ phylo ^. phylo_periods ) [] $ phylo ^. phylo_periods
in phylo & phylo_groupsProxi .~ ((traceGroupsProxi . fromList) groupsProxi) in phylo & phylo_diaSimScan .~ Set.fromList (traceGroupsProxi $ map snd scanning)
appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo
...@@ -134,11 +167,16 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx "" ...@@ -134,11 +167,16 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
(fromList [("breaks",[0]),("seaLevels",[0])]) (fromList [("breaks",[0]),("seaLevels",[0])])
[] [] [] [] [] [] [] [] [] [] [] [] [] []
{-
-- enhance the phylo with temporal links
-}
addTemporalLinksToPhylo :: Phylo -> Phylo addTemporalLinksToPhylo :: Phylo -> Phylo
addTemporalLinksToPhylo phylowithoutLink = case (getSeaElevation phylowithoutLink) of addTemporalLinksToPhylo phylowithoutLink = case strategy of
Constante start gap -> constanteTemporalMatching start gap phylowithoutLink Constante start gap -> temporalMatching (constDiachronicLadder start gap Set.empty) phylowithoutLink
Adaptative steps -> adaptativeTemporalMatching steps phylowithoutLink Adaptative steps -> temporalMatching (adaptDiachronicLadder steps (phylowithoutLink ^. phylo_diaSimScan) Set.empty) phylowithoutLink
where
strategy :: SeaElevation
strategy = getSeaElevation phylowithoutLink
----------------------- -----------------------
-- | To Phylo Step | -- -- | To Phylo Step | --
...@@ -163,7 +201,7 @@ indexDates' m = map (\docs -> ...@@ -163,7 +201,7 @@ indexDates' m = map (\docs ->
toPhyloWithoutLink :: [Document] -> TermList -> PhyloConfig -> Phylo toPhyloWithoutLink :: [Document] -> TermList -> PhyloConfig -> Phylo
toPhyloWithoutLink docs lst conf = case (getSeaElevation phyloBase) of toPhyloWithoutLink docs lst conf = case (getSeaElevation phyloBase) of
Constante _ _ -> appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase) Constante _ _ -> appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
Adaptative _ -> toGroupsProxi 1 Adaptative _ -> scanSimilarity 1
$ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase) $ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
where where
-------------------------------------- --------------------------------------
...@@ -376,8 +414,7 @@ initPhylo docs lst conf = ...@@ -376,8 +414,7 @@ initPhylo docs lst conf =
(docsToTimeScaleNb docs) (docsToTimeScaleNb docs)
(docsToTermFreq docs (foundations ^. foundations_roots)) (docsToTermFreq docs (foundations ^. foundations_roots))
(docsToLastTermFreq (getTimePeriod $ timeUnit conf) docs (foundations ^. foundations_roots)) (docsToLastTermFreq (getTimePeriod $ timeUnit conf) docs (foundations ^. foundations_roots))
empty Set.empty
empty
params params
(fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloScales 1 prd))) periods) (fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloScales 1 prd))) periods)
0 0
...@@ -25,7 +25,6 @@ import Gargantext.Prelude ...@@ -25,7 +25,6 @@ import Gargantext.Prelude
import Prelude (floor,read) import Prelude (floor,read)
import Text.Printf import Text.Printf
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
...@@ -387,10 +386,10 @@ getLevelParentId :: PhyloGroup -> PhyloGroupId ...@@ -387,10 +386,10 @@ getLevelParentId :: PhyloGroup -> PhyloGroupId
getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupScaleParents getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupScaleParents
getLastLevel :: Phylo -> Scale getLastLevel :: Phylo -> Scale
getLastLevel phylo = last' "lastLevel" $ getLevels phylo getLastLevel phylo = last' "lastLevel" $ getScales phylo
getLevels :: Phylo -> [Scale] getScales :: Phylo -> [Scale]
getLevels phylo = nub getScales phylo = nub
$ map snd $ map snd
$ keys $ view ( phylo_periods $ keys $ view ( phylo_periods
. traverse . traverse
...@@ -431,14 +430,16 @@ getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots ...@@ -431,14 +430,16 @@ getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
getSources :: Phylo -> Vector Text getSources :: Phylo -> Vector Text
getSources phylo = _sources (phylo ^. phylo_sources) getSources phylo = _sources (phylo ^. phylo_sources)
phyloToLastBranches :: Phylo -> [[PhyloGroup]]
phyloToLastBranches phylo = elems -- get the groups distributed by branches at the last scale
phyloLastScale :: Phylo -> [[PhyloGroup]]
phyloLastScale phylo = elems
$ fromListWith (++) $ fromListWith (++)
$ map (\g -> (g ^. phylo_groupBranchId, [g])) $ map (\g -> (g ^. phylo_groupBranchId, [g]))
$ getGroupsFromLevel (last' "byBranches" $ getLevels phylo) phylo $ getGroupsFromScale (last' "byBranches" $ getScales phylo) phylo
getGroupsFromLevel :: Scale -> Phylo -> [PhyloGroup] getGroupsFromScale :: Scale -> Phylo -> [PhyloGroup]
getGroupsFromLevel lvl phylo = getGroupsFromScale lvl phylo =
elems $ view ( phylo_periods elems $ view ( phylo_periods
. traverse . traverse
. phylo_periodScales . phylo_periodScales
...@@ -447,8 +448,8 @@ getGroupsFromLevel lvl phylo = ...@@ -447,8 +448,8 @@ getGroupsFromLevel lvl phylo =
. phylo_scaleGroups ) phylo . phylo_scaleGroups ) phylo
getGroupsFromLevelPeriods :: Scale -> [Period] -> Phylo -> [PhyloGroup] getGroupsFromScalePeriods :: Scale -> [Period] -> Phylo -> [PhyloGroup]
getGroupsFromLevelPeriods lvl periods phylo = getGroupsFromScalePeriods lvl periods phylo =
elems $ view ( phylo_periods elems $ view ( phylo_periods
. traverse . traverse
. filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods) . filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
...@@ -500,8 +501,8 @@ updateQuality quality phylo = phylo { _phylo_quality = quality } ...@@ -500,8 +501,8 @@ updateQuality quality phylo = phylo { _phylo_quality = quality }
traceToPhylo :: Scale -> Phylo -> Phylo traceToPhylo :: Scale -> Phylo -> Phylo
traceToPhylo lvl phylo = traceToPhylo lvl phylo =
trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with " trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with "
<> show (length $ getGroupsFromLevel lvl phylo) <> " groups and " <> show (length $ getGroupsFromScale lvl phylo) <> " groups and "
<> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel lvl phylo) <> " branches" <> "\n") phylo <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale lvl phylo) <> " branches" <> "\n") phylo
-------------------- --------------------
-- | Clustering | -- -- | Clustering | --
...@@ -564,15 +565,15 @@ toRelatedComponents nodes edges = ...@@ -564,15 +565,15 @@ toRelatedComponents nodes edges =
traceSynchronyEnd :: Phylo -> Phylo traceSynchronyEnd :: Phylo -> Phylo
traceSynchronyEnd phylo = traceSynchronyEnd phylo =
trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo) trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups" <> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches" <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo <> "\n" ) phylo
traceSynchronyStart :: Phylo -> Phylo traceSynchronyStart :: Phylo -> Phylo
traceSynchronyStart phylo = traceSynchronyStart phylo =
trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo) trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups" <> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches" <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo <> "\n" ) phylo
...@@ -659,6 +660,6 @@ traceTemporalMatching groups = ...@@ -659,6 +660,6 @@ traceTemporalMatching groups =
trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups
traceGroupsProxi :: Map (PhyloGroupId,PhyloGroupId) Double -> Map (PhyloGroupId,PhyloGroupId) Double traceGroupsProxi :: [Double] -> [Double]
traceGroupsProxi m = traceGroupsProxi l =
trace ( "\n" <> "-- | " <> show(Map.size m) <> " computed pairs of groups proximity" <> "\n") m trace ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups proximity" <> "\n") l
...@@ -159,6 +159,7 @@ reduceGroups prox sync docs diagos branch = ...@@ -159,6 +159,7 @@ reduceGroups prox sync docs diagos branch =
let periods = fromListWith (++) let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch $ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems) in (concat . concat . elems)
-- TODO : ajouter un parallelisme
$ mapWithKey (\prd groups -> $ mapWithKey (\prd groups ->
-- 2) for each period, transform the groups as a proximity graph filtered by a threshold -- 2) for each period, transform the groups as a proximity graph filtered by a threshold
let diago = reduceDiagos $ filterDiago diagos [prd] let diago = reduceDiagos $ filterDiago diagos [prd]
...@@ -171,12 +172,12 @@ reduceGroups prox sync docs diagos branch = ...@@ -171,12 +172,12 @@ reduceGroups prox sync docs diagos branch =
$ toRelatedComponents groups edges) periods $ toRelatedComponents groups edges) periods
adjustClustering :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]] chooseClusteringStrategy :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]]
adjustClustering sync branches = case sync of chooseClusteringStrategy sync branches = case sync of
ByProximityThreshold _ _ scope _ -> case scope of ByProximityThreshold _ _ scope _ -> case scope of
SingleBranch -> branches SingleBranch -> branches
SiblingBranches -> groupBy (\g g' -> (last' "adjustClustering" $ (g ^. phylo_groupMeta) ! "breaks") SiblingBranches -> groupBy (\g g' -> (last' "chooseClusteringStrategy" $ (g ^. phylo_groupMeta) ! "breaks")
== (last' "adjustClustering" $ (g' ^. phylo_groupMeta) ! "breaks")) == (last' "chooseClusteringStrategy" $ (g' ^. phylo_groupMeta) ! "breaks"))
$ sortOn _phylo_groupBranchId $ concat branches $ sortOn _phylo_groupBranchId $ concat branches
AllBranches -> [concat branches] AllBranches -> [concat branches]
ByProximityDistribution _ _ -> branches ByProximityDistribution _ _ -> branches
...@@ -202,8 +203,8 @@ synchronicClustering phylo = ...@@ -202,8 +203,8 @@ synchronicClustering phylo =
diagos = map coocToDiago $ phylo ^. phylo_timeCooc diagos = map coocToDiago $ phylo ^. phylo_timeCooc
newBranches = map (\branch -> reduceGroups prox sync docs diagos branch) newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
$ map processDynamics $ map processDynamics
$ adjustClustering sync $ chooseClusteringStrategy sync
$ phyloToLastBranches $ phyloLastScale
$ traceSynchronyStart phylo $ traceSynchronyStart phylo
newBranches' = newBranches `using` parList rdeepseq newBranches' = newBranches `using` parList rdeepseq
in toNextScale phylo $ levelUpAncestors $ concat newBranches' in toNextScale phylo $ levelUpAncestors $ concat newBranches'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment