Commit 8faf7d6d authored by qlobbe's avatar qlobbe

refactoring temporal matching

parent ebcee352
Pipeline #3229 failed with stage
in 72 minutes and 50 seconds
......@@ -31,6 +31,7 @@ import Control.Lens (makeLenses)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Map (Map)
import Data.Set (Set)
import Data.Swagger
import Data.Text (Text, pack)
import Data.Vector (Vector)
......@@ -63,9 +64,9 @@ instance ToSchema ListParser
data SeaElevation =
Constante
{ _cons_start :: Double
, _cons_step :: Double }
, _cons_gap :: Double }
| Adaptative
{ _adap_granularity :: Double }
{ _adap_steps :: Double }
deriving (Show,Generic,Eq)
instance ToSchema SeaElevation
......@@ -306,8 +307,8 @@ instance ToSchema Software where
defaultSoftware :: Software
defaultSoftware =
Software { _software_name = pack "Gargantext"
, _software_version = pack "v4" }
Software { _software_name = pack "GarganText"
, _software_version = pack "v5" }
-- | Global parameters of a Phylo
......@@ -324,7 +325,7 @@ instance ToSchema PhyloParam where
defaultPhyloParam :: PhyloParam
defaultPhyloParam =
PhyloParam { _phyloParam_version = pack "v2.adaptative"
PhyloParam { _phyloParam_version = pack "v3"
, _phyloParam_software = defaultSoftware
, _phyloParam_config = defaultConfig }
......@@ -409,8 +410,7 @@ data Phylo =
, _phylo_timeDocs :: !(Map Date Double)
, _phylo_termFreq :: !(Map Int Double)
, _phylo_lastTermFreq :: !(Map Int Double)
, _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double)
, _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
, _phylo_diaSimScan :: Set Double
, _phylo_param :: PhyloParam
, _phylo_periods :: Map Period PhyloPeriod
, _phylo_quality :: Double
......
......@@ -27,9 +27,10 @@ import Gargantext.Core.Viz.Phylo.PhyloExport
import Gargantext.Core.Viz.Phylo.PhyloMaker
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching)
import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching)
import Gargantext.Prelude
import qualified Data.Vector as Vector
import qualified Data.Set as Set
---------------------------------
-- | STEP 5 | -- Export the phylo
......@@ -54,13 +55,14 @@ phyloCleopatre = synchronicClustering $ toHorizon flatPhylo
flatPhylo :: Phylo
flatPhylo = case (getSeaElevation emptyPhylo) of
Constante s g -> constanteTemporalMatching s g
$ toGroupsProxi 1
$ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
Adaptative s -> adaptativeTemporalMatching s
$ toGroupsProxi 1
Constante s g -> temporalMatching (constDiachronicLadder s g Set.empty)
$ scanSimilarity 1
$ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
Adaptative s -> temporalMatching (adaptDiachronicLadder s (emptyPhylo' ^. phylo_diaSimScan) Set.empty) emptyPhylo'
emptyPhylo' :: Phylo
emptyPhylo' = scanSimilarity 1
$ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
---------------------------------------------
-- | STEP 2 | -- Build the cliques
......@@ -102,6 +104,7 @@ config :: PhyloConfig
config =
defaultConfig { phyloName = "Cesar et Cleopatre"
, phyloScale = 2
, seaElevation = Adaptative 4
, exportFilter = [ByBranchSize 0]
, clique = MaxClique 0 15 ByNeighbours }
......
......@@ -546,9 +546,10 @@ processLabels labels foundations freq export =
-- | Dynamics | --
------------------
toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
toDynamics n parents g m =
-- utiliser & creer une Map FdtId [PhyloGroup]
-- n = index of the current term
toDynamics :: FdtId -> [PhyloGroup] -> PhyloGroup -> Map FdtId (Date,Date) -> Double
toDynamics n elders g m =
let prd = g ^. phylo_groupPeriod
end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
......@@ -564,18 +565,18 @@ toDynamics n parents g m =
where
--------------------------------------
isNew :: Bool
isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders
type FdtId = Int
processDynamics :: [PhyloGroup] -> [PhyloGroup]
processDynamics groups =
map (\g ->
let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
let elders = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
&& ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n elders g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
where
--------------------------------------
mapNgrams :: Map Int (Date,Date)
mapNgrams :: Map FdtId (Date,Date)
mapNgrams = map (\dates ->
let dates' = sort dates
in (head' "dynamics" dates', last' "dynamics" dates'))
......@@ -621,7 +622,7 @@ toHorizon phylo =
$ concat
$ tracePhyloAncestors newGroups) phylo
reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
$ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel scale phyloAncestor
$ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromScale scale phyloAncestor
in updatePhyloGroups scale reBranched phylo
where
-- | 1) for each periods
......@@ -636,7 +637,7 @@ toHorizon phylo =
-- | 2) find ancestors between groups without parents
mapGroups :: [[PhyloGroup]]
mapGroups = map (\prd ->
let groups = getGroupsFromLevelPeriods scale [prd] phylo
let groups = getGroupsFromScalePeriods scale [prd] phylo
childs = getPreviousChildIds scale frame prd periods phylo
-- maybe add a better filter for non isolated ancestors
heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
......@@ -660,7 +661,7 @@ toHorizon phylo =
getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
getPreviousChildIds lvl frame curr prds phylo =
concat $ map ((map fst) . _phylo_groupPeriodChilds)
$ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
$ getGroupsFromScalePeriods lvl (getNextPeriods ToParents frame curr prds) phylo
---------------------
-- | phyloExport | --
......@@ -695,10 +696,10 @@ toPhyloExport phylo = exportToDot phylo
--------------------------------------
groups :: [PhyloGroup]
groups = traceExportGroups
-- necessaire ?
$ processDynamics
$ getGroupsFromLevel (phyloScale $ getConfig phylo)
$ getGroupsFromScale (phyloScale $ getConfig phylo)
$ tracePhyloInfo phylo
-- \$ toHorizon phylo
traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
......
......@@ -17,9 +17,11 @@ import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert)
import Data.Set (Set)
import Data.Text (Text)
import Data.Vector (Vector)
import Debug.Trace (trace)
import Prelude (floor)
import Gargantext.Core.Methods.Distances (Distance(Conditional))
import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
......@@ -29,7 +31,7 @@ import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity)
import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity)
import Gargantext.Prelude
import qualified Data.Set as Set
......@@ -42,7 +44,7 @@ import qualified Data.Vector as Vector
{-
-- TODO AD
data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo}
| PhyloN { _phylo'_flatPhylo :: Phylo}
| PhyloN { _phylo'_flatPhylo :: Phylo}
toPhylo' :: Phylo' -> [Document] -> TermList -> PhyloConfig -> Phylo
......@@ -50,12 +52,14 @@ toPhylo' (PhyloN phylo) = toPhylo'
toPhylo' (PhyloBase phylo) = toPhylo
-}
-- TODO an adaptative synchronic clustering with a slider
toPhylo :: Phylo -> Phylo
toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGroupsFromLevel 1 flatPhylo))
toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGroupsFromScale 1 flatPhylo))
$ traceToPhylo (phyloScale $ getConfig phylowithoutLink) $
if (phyloScale $ getConfig phylowithoutLink) > 1
then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloScale $ getConfig phylowithoutLink)]
else flatPhylo
else phyloAncestors
where
--------------------------------------
phyloAncestors :: Phylo
......@@ -69,14 +73,42 @@ toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGrou
--------------------------------------
--------------------
-- | To Phylo 1 | --
--------------------
-----------------------------
-- | Create a flat Phylo | --
-----------------------------
{-
-- create an adaptative diachronic 'sea elevation' ladder
-}
adaptDiachronicLadder :: Double -> Set Double -> Set Double -> [Double]
adaptDiachronicLadder curr similarities ladder =
if curr <= 0 || Set.null similarities
then Set.toList ladder
else
let idx = ((Set.size similarities) `div` (floor curr)) - 1
thr = Set.elemAt idx similarities
-- we use a sliding methods 1/10, then 1/9, then ... 1/2
in adaptDiachronicLadder (curr -1) (Set.filter (> thr) similarities) (Set.insert thr ladder)
{-
-- create a constante diachronic 'sea elevation' ladder
-}
constDiachronicLadder :: Double -> Double -> Set Double -> [Double]
constDiachronicLadder curr step ladder =
if curr > 1
then Set.toList ladder
else constDiachronicLadder (curr + step) step (Set.insert curr ladder)
toGroupsProxi :: Scale -> Phylo -> Phylo
toGroupsProxi lvl phylo =
{-
-- process an initial scanning of the kinship links
-}
scanSimilarity :: Scale -> Phylo -> Phylo
scanSimilarity lvl phylo =
let proximity = phyloProximity $ getConfig phylo
groupsProxi = foldlWithKey (\acc pId pds ->
scanning = foldlWithKey (\acc pId pds ->
-- 1) process period by period
let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams))
$ elems
......@@ -84,7 +116,7 @@ toGroupsProxi lvl phylo =
. traverse . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
. phylo_scaleGroups ) pds
next = getNextPeriods ToParents (getTimeFrame $ timeUnit $ getConfig phylo) pId (keys $ phylo ^. phylo_periods)
targets = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ getGroupsFromLevelPeriods lvl next phylo
targets = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ getGroupsFromScalePeriods lvl next phylo
docs = filterDocs (phylo ^. phylo_timeDocs) ([pId] ++ next)
diagos = filterDiago (phylo ^. phylo_timeCooc) ([pId] ++ next)
-- 2) compute the pairs in parallel
......@@ -98,7 +130,8 @@ toGroupsProxi lvl phylo =
pairs' = pairs `using` parList rdeepseq
in acc ++ (concat pairs')
) [] $ phylo ^. phylo_periods
in phylo & phylo_groupsProxi .~ ((traceGroupsProxi . fromList) groupsProxi)
in phylo & phylo_diaSimScan .~ Set.fromList (traceGroupsProxi $ map snd scanning)
appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo
......@@ -134,11 +167,16 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
(fromList [("breaks",[0]),("seaLevels",[0])])
[] [] [] [] [] [] []
{-
-- enhance the phylo with temporal links
-}
addTemporalLinksToPhylo :: Phylo -> Phylo
addTemporalLinksToPhylo phylowithoutLink = case (getSeaElevation phylowithoutLink) of
Constante start gap -> constanteTemporalMatching start gap phylowithoutLink
Adaptative steps -> adaptativeTemporalMatching steps phylowithoutLink
addTemporalLinksToPhylo phylowithoutLink = case strategy of
Constante start gap -> temporalMatching (constDiachronicLadder start gap Set.empty) phylowithoutLink
Adaptative steps -> temporalMatching (adaptDiachronicLadder steps (phylowithoutLink ^. phylo_diaSimScan) Set.empty) phylowithoutLink
where
strategy :: SeaElevation
strategy = getSeaElevation phylowithoutLink
-----------------------
-- | To Phylo Step | --
......@@ -163,7 +201,7 @@ indexDates' m = map (\docs ->
toPhyloWithoutLink :: [Document] -> TermList -> PhyloConfig -> Phylo
toPhyloWithoutLink docs lst conf = case (getSeaElevation phyloBase) of
Constante _ _ -> appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
Adaptative _ -> toGroupsProxi 1
Adaptative _ -> scanSimilarity 1
$ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
where
--------------------------------------
......@@ -376,8 +414,7 @@ initPhylo docs lst conf =
(docsToTimeScaleNb docs)
(docsToTermFreq docs (foundations ^. foundations_roots))
(docsToLastTermFreq (getTimePeriod $ timeUnit conf) docs (foundations ^. foundations_roots))
empty
empty
Set.empty
params
(fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloScales 1 prd))) periods)
0
......@@ -25,7 +25,6 @@ import Gargantext.Prelude
import Prelude (floor,read)
import Text.Printf
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Text as Text
import qualified Data.Vector as Vector
......@@ -387,10 +386,10 @@ getLevelParentId :: PhyloGroup -> PhyloGroupId
getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupScaleParents
getLastLevel :: Phylo -> Scale
getLastLevel phylo = last' "lastLevel" $ getLevels phylo
getLastLevel phylo = last' "lastLevel" $ getScales phylo
getLevels :: Phylo -> [Scale]
getLevels phylo = nub
getScales :: Phylo -> [Scale]
getScales phylo = nub
$ map snd
$ keys $ view ( phylo_periods
. traverse
......@@ -431,14 +430,16 @@ getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
getSources :: Phylo -> Vector Text
getSources phylo = _sources (phylo ^. phylo_sources)
phyloToLastBranches :: Phylo -> [[PhyloGroup]]
phyloToLastBranches phylo = elems
-- get the groups distributed by branches at the last scale
phyloLastScale :: Phylo -> [[PhyloGroup]]
phyloLastScale phylo = elems
$ fromListWith (++)
$ map (\g -> (g ^. phylo_groupBranchId, [g]))
$ getGroupsFromLevel (last' "byBranches" $ getLevels phylo) phylo
$ getGroupsFromScale (last' "byBranches" $ getScales phylo) phylo
getGroupsFromLevel :: Scale -> Phylo -> [PhyloGroup]
getGroupsFromLevel lvl phylo =
getGroupsFromScale :: Scale -> Phylo -> [PhyloGroup]
getGroupsFromScale lvl phylo =
elems $ view ( phylo_periods
. traverse
. phylo_periodScales
......@@ -447,8 +448,8 @@ getGroupsFromLevel lvl phylo =
. phylo_scaleGroups ) phylo
getGroupsFromLevelPeriods :: Scale -> [Period] -> Phylo -> [PhyloGroup]
getGroupsFromLevelPeriods lvl periods phylo =
getGroupsFromScalePeriods :: Scale -> [Period] -> Phylo -> [PhyloGroup]
getGroupsFromScalePeriods lvl periods phylo =
elems $ view ( phylo_periods
. traverse
. filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
......@@ -500,8 +501,8 @@ updateQuality quality phylo = phylo { _phylo_quality = quality }
traceToPhylo :: Scale -> Phylo -> Phylo
traceToPhylo lvl phylo =
trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with "
<> show (length $ getGroupsFromLevel lvl phylo) <> " groups and "
<> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel lvl phylo) <> " branches" <> "\n") phylo
<> show (length $ getGroupsFromScale lvl phylo) <> " groups and "
<> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale lvl phylo) <> " branches" <> "\n") phylo
--------------------
-- | Clustering | --
......@@ -564,15 +565,15 @@ toRelatedComponents nodes edges =
traceSynchronyEnd :: Phylo -> Phylo
traceSynchronyEnd phylo =
trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
<> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo
traceSynchronyStart :: Phylo -> Phylo
traceSynchronyStart phylo =
trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
<> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo
......@@ -659,6 +660,6 @@ traceTemporalMatching groups =
trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups
traceGroupsProxi :: Map (PhyloGroupId,PhyloGroupId) Double -> Map (PhyloGroupId,PhyloGroupId) Double
traceGroupsProxi m =
trace ( "\n" <> "-- | " <> show(Map.size m) <> " computed pairs of groups proximity" <> "\n") m
traceGroupsProxi :: [Double] -> [Double]
traceGroupsProxi l =
trace ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups proximity" <> "\n") l
......@@ -159,6 +159,7 @@ reduceGroups prox sync docs diagos branch =
let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems)
-- TODO : ajouter un parallelisme
$ mapWithKey (\prd groups ->
-- 2) for each period, transform the groups as a proximity graph filtered by a threshold
let diago = reduceDiagos $ filterDiago diagos [prd]
......@@ -171,12 +172,12 @@ reduceGroups prox sync docs diagos branch =
$ toRelatedComponents groups edges) periods
adjustClustering :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]]
adjustClustering sync branches = case sync of
chooseClusteringStrategy :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]]
chooseClusteringStrategy sync branches = case sync of
ByProximityThreshold _ _ scope _ -> case scope of
SingleBranch -> branches
SiblingBranches -> groupBy (\g g' -> (last' "adjustClustering" $ (g ^. phylo_groupMeta) ! "breaks")
== (last' "adjustClustering" $ (g' ^. phylo_groupMeta) ! "breaks"))
SiblingBranches -> groupBy (\g g' -> (last' "chooseClusteringStrategy" $ (g ^. phylo_groupMeta) ! "breaks")
== (last' "chooseClusteringStrategy" $ (g' ^. phylo_groupMeta) ! "breaks"))
$ sortOn _phylo_groupBranchId $ concat branches
AllBranches -> [concat branches]
ByProximityDistribution _ _ -> branches
......@@ -202,8 +203,8 @@ synchronicClustering phylo =
diagos = map coocToDiago $ phylo ^. phylo_timeCooc
newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
$ map processDynamics
$ adjustClustering sync
$ phyloToLastBranches
$ chooseClusteringStrategy sync
$ phyloLastScale
$ traceSynchronyStart phylo
newBranches' = newBranches `using` parList rdeepseq
in toNextScale phylo $ levelUpAncestors $ concat newBranches'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment