Commit 66f96f84 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[MERGE]

parent 54d94963
......@@ -30,8 +30,7 @@ import Control.DeepSeq (NFData)
import Control.Lens (makeLenses)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Map.Strict (Map)
import Data.Set (Set)
import Data.Map (Map)
import Data.Swagger
import Data.Text (Text, pack)
import Data.Vector (Vector)
......@@ -64,9 +63,9 @@ instance ToSchema ListParser
data SeaElevation =
Constante
{ _cons_start :: Double
, _cons_gap :: Double }
, _cons_step :: Double }
| Adaptative
{ _adap_steps :: Double }
{ _adap_granularity :: Double }
deriving (Show,Generic,Eq)
instance ToSchema SeaElevation
......@@ -307,8 +306,8 @@ instance ToSchema Software where
defaultSoftware :: Software
defaultSoftware =
Software { _software_name = pack "GarganText"
, _software_version = pack "v5" }
Software { _software_name = pack "Gargantext"
, _software_version = pack "v4" }
-- | Global parameters of a Phylo
......@@ -325,7 +324,7 @@ instance ToSchema PhyloParam where
defaultPhyloParam :: PhyloParam
defaultPhyloParam =
PhyloParam { _phyloParam_version = pack "v3"
PhyloParam { _phyloParam_version = pack "v2.adaptative"
, _phyloParam_software = defaultSoftware
, _phyloParam_config = defaultConfig }
......@@ -410,7 +409,8 @@ data Phylo =
, _phylo_timeDocs :: !(Map Date Double)
, _phylo_termFreq :: !(Map Int Double)
, _phylo_lastTermFreq :: !(Map Int Double)
, _phylo_diaSimScan :: Set Double
, _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double)
, _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
, _phylo_param :: PhyloParam
, _phylo_periods :: Map Period PhyloPeriod
, _phylo_quality :: Double
......
......@@ -18,7 +18,7 @@ module Gargantext.Core.Viz.Phylo.Example where
import Control.Lens
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.List (sortOn, nub, sort)
import Data.Map.Strict (Map)
import Data.Map (Map)
import Data.Text (Text, toLower)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Terms.Mono (monoTexts)
......@@ -27,10 +27,9 @@ import Gargantext.Core.Viz.Phylo.PhyloExport
import Gargantext.Core.Viz.Phylo.PhyloMaker
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching)
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching)
import Gargantext.Prelude
import qualified Data.Vector as Vector
import qualified Data.Set as Set
---------------------------------
-- | STEP 5 | -- Export the phylo
......@@ -55,15 +54,14 @@ phyloCleopatre = synchronicClustering $ toHorizon flatPhylo
flatPhylo :: Phylo
flatPhylo = case (getSeaElevation emptyPhylo) of
Constante s g -> temporalMatching (constDiachronicLadder s g Set.empty)
$ scanSimilarity 1
Constante s g -> constanteTemporalMatching s g
$ toGroupsProxi 1
$ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
Adaptative s -> temporalMatching (adaptDiachronicLadder s (emptyPhylo' ^. phylo_diaSimScan) Set.empty) emptyPhylo'
emptyPhylo' :: Phylo
emptyPhylo' = scanSimilarity 1
Adaptative s -> adaptativeTemporalMatching s
$ toGroupsProxi 1
$ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
---------------------------------------------
-- | STEP 2 | -- Build the cliques
---------------------------------------------
......@@ -104,7 +102,6 @@ config :: PhyloConfig
config =
defaultConfig { phyloName = "Cesar et Cleopatre"
, phyloScale = 2
, seaElevation = Adaptative 4
, exportFilter = [ByBranchSize 0]
, clique = MaxClique 0 15 ByNeighbours }
......
......@@ -19,7 +19,7 @@ import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.GraphViz.Types.Monadic
import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
import Data.Map.Strict (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
import Data.Text.Lazy (fromStrict, pack, unpack)
import Data.Vector (Vector)
import Debug.Trace (trace)
......@@ -546,10 +546,9 @@ processLabels labels foundations freq export =
-- | Dynamics | --
------------------
-- utiliser & creer une Map FdtId [PhyloGroup]
-- n = index of the current term
toDynamics :: FdtId -> [PhyloGroup] -> PhyloGroup -> Map FdtId (Date,Date) -> Double
toDynamics n elders g m =
toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
toDynamics n parents g m =
let prd = g ^. phylo_groupPeriod
end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
......@@ -565,18 +564,18 @@ toDynamics n elders g m =
where
--------------------------------------
isNew :: Bool
isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders
isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
type FdtId = Int
processDynamics :: [PhyloGroup] -> [PhyloGroup]
processDynamics groups =
map (\g ->
let elders = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
&& ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n elders g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
where
--------------------------------------
mapNgrams :: Map FdtId (Date,Date)
mapNgrams :: Map Int (Date,Date)
mapNgrams = map (\dates ->
let dates' = sort dates
in (head' "dynamics" dates', last' "dynamics" dates'))
......@@ -622,7 +621,7 @@ toHorizon phylo =
$ concat
$ tracePhyloAncestors newGroups) phylo
reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
$ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromScale scale phyloAncestor
$ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel scale phyloAncestor
in updatePhyloGroups scale reBranched phylo
where
-- | 1) for each periods
......@@ -637,7 +636,7 @@ toHorizon phylo =
-- | 2) find ancestors between groups without parents
mapGroups :: [[PhyloGroup]]
mapGroups = map (\prd ->
let groups = getGroupsFromScalePeriods scale [prd] phylo
let groups = getGroupsFromLevelPeriods scale [prd] phylo
childs = getPreviousChildIds scale frame prd periods phylo
-- maybe add a better filter for non isolated ancestors
heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
......@@ -661,7 +660,7 @@ toHorizon phylo =
getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
getPreviousChildIds lvl frame curr prds phylo =
concat $ map ((map fst) . _phylo_groupPeriodChilds)
$ getGroupsFromScalePeriods lvl (getNextPeriods ToParents frame curr prds) phylo
$ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
---------------------
-- | phyloExport | --
......@@ -696,10 +695,10 @@ toPhyloExport phylo = exportToDot phylo
--------------------------------------
groups :: [PhyloGroup]
groups = traceExportGroups
-- necessaire ?
$ processDynamics
$ getGroupsFromScale (phyloScale $ getConfig phylo)
$ getGroupsFromLevel (phyloScale $ getConfig phylo)
$ tracePhyloInfo phylo
-- \$ toHorizon phylo
traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
......@@ -722,3 +721,4 @@ traceExportGroups groups = trace ("\n" <> "-- | Export "
<> show(length groups) <> " groups and "
<> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
) groups
......@@ -8,18 +8,18 @@ Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.PhyloMaker where
import Control.DeepSeq (NFData)
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail)
import Data.Map.Strict (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert)
import Data.Set (Set)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert)
import Data.Text (Text)
import Data.Vector (Vector)
import Debug.Trace (trace)
import Prelude (floor)
import Gargantext.Core.Methods.Similarities (Similarity(Conditional))
import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
......@@ -29,7 +29,7 @@ import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity)
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity)
import Gargantext.Prelude
import qualified Data.Set as Set
......@@ -50,14 +50,12 @@ toPhylo' (PhyloN phylo) = toPhylo'
toPhylo' (PhyloBase phylo) = toPhylo
-}
-- TODO an adaptative synchronic clustering with a slider
toPhylo :: Phylo -> Phylo
toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGroupsFromScale 1 flatPhylo))
toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGroupsFromLevel 1 flatPhylo))
$ traceToPhylo (phyloScale $ getConfig phylowithoutLink) $
if (phyloScale $ getConfig phylowithoutLink) > 1
then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloScale $ getConfig phylowithoutLink)]
else phyloAncestors
else flatPhylo
where
--------------------------------------
phyloAncestors :: Phylo
......@@ -71,41 +69,14 @@ toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGrou
--------------------------------------
-----------------------------
-- | Create a flat Phylo | --
-----------------------------
{-
-- create an adaptative diachronic 'sea elevation' ladder
-}
adaptDiachronicLadder :: Double -> Set Double -> Set Double -> [Double]
adaptDiachronicLadder curr similarities ladder =
if curr <= 0 || Set.null similarities
then Set.toList ladder
else
let idx = ((Set.size similarities) `div` (floor curr)) - 1
thr = Set.elemAt idx similarities
-- we use a sliding methods 1/10, then 1/9, then ... 1/2
in adaptDiachronicLadder (curr -1) (Set.filter (> thr) similarities) (Set.insert thr ladder)
{-
-- create a constante diachronic 'sea elevation' ladder
-}
constDiachronicLadder :: Double -> Double -> Set Double -> [Double]
constDiachronicLadder curr step ladder =
if curr > 1
then Set.toList ladder
else constDiachronicLadder (curr + step) step (Set.insert curr ladder)
--------------------
-- | To Phylo 1 | --
--------------------
{-
-- process an initial scanning of the kinship links
-}
scanSimilarity :: Scale -> Phylo -> Phylo
scanSimilarity lvl phylo =
toGroupsProxi :: Scale -> Phylo -> Phylo
toGroupsProxi lvl phylo =
let proximity = phyloProximity $ getConfig phylo
scanning = foldlWithKey (\acc pId pds ->
groupsProxi = foldlWithKey (\acc pId pds ->
-- 1) process period by period
let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams))
$ elems
......@@ -113,7 +84,7 @@ scanSimilarity lvl phylo =
. traverse . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
. phylo_scaleGroups ) pds
next = getNextPeriods ToParents (getTimeFrame $ timeUnit $ getConfig phylo) pId (keys $ phylo ^. phylo_periods)
targets = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ getGroupsFromScalePeriods lvl next phylo
targets = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ getGroupsFromLevelPeriods lvl next phylo
docs = filterDocs (phylo ^. phylo_timeDocs) ([pId] ++ next)
diagos = filterDiago (phylo ^. phylo_timeCooc) ([pId] ++ next)
-- 2) compute the pairs in parallel
......@@ -127,8 +98,7 @@ scanSimilarity lvl phylo =
pairs' = pairs `using` parList rdeepseq
in acc ++ (concat pairs')
) [] $ phylo ^. phylo_periods
in phylo & phylo_diaSimScan .~ Set.fromList (traceGroupsProxi $ map snd scanning)
in phylo & phylo_groupsProxi .~ ((traceGroupsProxi . fromList) groupsProxi)
appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo
......@@ -164,16 +134,11 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
(fromList [("breaks",[0]),("seaLevels",[0])])
[] [] [] [] [] [] []
{-
-- enhance the phylo with temporal links
-}
addTemporalLinksToPhylo :: Phylo -> Phylo
addTemporalLinksToPhylo phylowithoutLink = case strategy of
Constante start gap -> temporalMatching (constDiachronicLadder start gap Set.empty) phylowithoutLink
Adaptative steps -> temporalMatching (adaptDiachronicLadder steps (phylowithoutLink ^. phylo_diaSimScan) Set.empty) phylowithoutLink
where
strategy :: SeaElevation
strategy = getSeaElevation phylowithoutLink
addTemporalLinksToPhylo phylowithoutLink = case (getSeaElevation phylowithoutLink) of
Constante start gap -> constanteTemporalMatching start gap phylowithoutLink
Adaptative steps -> adaptativeTemporalMatching steps phylowithoutLink
-----------------------
-- | To Phylo Step | --
......@@ -198,7 +163,7 @@ indexDates' m = map (\docs ->
toPhyloWithoutLink :: [Document] -> TermList -> PhyloConfig -> Phylo
toPhyloWithoutLink docs lst conf = case (getSeaElevation phyloBase) of
Constante _ _ -> appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
Adaptative _ -> scanSimilarity 1
Adaptative _ -> toGroupsProxi 1
$ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
where
--------------------------------------
......@@ -411,7 +376,8 @@ initPhylo docs lst conf =
(docsToTimeScaleNb docs)
(docsToTermFreq docs (foundations ^. foundations_roots))
(docsToLastTermFreq (getTimePeriod $ timeUnit conf) docs (foundations ^. foundations_roots))
Set.empty
empty
empty
params
(fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloScales 1 prd))) periods)
0
......@@ -14,7 +14,7 @@ module Gargantext.Core.Viz.Phylo.PhyloTools where
import Control.Lens hiding (Level)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group, notElem)
import Data.Map.Strict (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
import Data.Set (Set, disjoint)
import Data.String (String)
import Data.Text (Text,unpack)
......@@ -25,6 +25,7 @@ import Gargantext.Prelude
import Prelude (floor,read)
import Text.Printf
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Text as Text
import qualified Data.Vector as Vector
......@@ -139,7 +140,6 @@ periodsToYears periods = (Set.fromList . sort . concat)
findBounds :: [Date] -> (Date,Date)
findBounds [] = panic "[G.C.V.P.PhyloTools] empty dates for find bounds"
findBounds dates =
let dates' = sort dates
in (head' "findBounds" dates', last' "findBounds" dates')
......@@ -387,10 +387,10 @@ getLevelParentId :: PhyloGroup -> PhyloGroupId
getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupScaleParents
getLastLevel :: Phylo -> Scale
getLastLevel phylo = last' "lastLevel" $ getScales phylo
getLastLevel phylo = last' "lastLevel" $ getLevels phylo
getScales :: Phylo -> [Scale]
getScales phylo = nub
getLevels :: Phylo -> [Scale]
getLevels phylo = nub
$ map snd
$ keys $ view ( phylo_periods
. traverse
......@@ -431,16 +431,14 @@ getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
getSources :: Phylo -> Vector Text
getSources phylo = _sources (phylo ^. phylo_sources)
-- get the groups distributed by branches at the last scale
phyloLastScale :: Phylo -> [[PhyloGroup]]
phyloLastScale phylo = elems
phyloToLastBranches :: Phylo -> [[PhyloGroup]]
phyloToLastBranches phylo = elems
$ fromListWith (++)
$ map (\g -> (g ^. phylo_groupBranchId, [g]))
$ getGroupsFromScale (last' "byBranches" $ getScales phylo) phylo
$ getGroupsFromLevel (last' "byBranches" $ getLevels phylo) phylo
getGroupsFromScale :: Scale -> Phylo -> [PhyloGroup]
getGroupsFromScale lvl phylo =
getGroupsFromLevel :: Scale -> Phylo -> [PhyloGroup]
getGroupsFromLevel lvl phylo =
elems $ view ( phylo_periods
. traverse
. phylo_periodScales
......@@ -449,8 +447,8 @@ getGroupsFromScale lvl phylo =
. phylo_scaleGroups ) phylo
getGroupsFromScalePeriods :: Scale -> [Period] -> Phylo -> [PhyloGroup]
getGroupsFromScalePeriods lvl periods phylo =
getGroupsFromLevelPeriods :: Scale -> [Period] -> Phylo -> [PhyloGroup]
getGroupsFromLevelPeriods lvl periods phylo =
elems $ view ( phylo_periods
. traverse
. filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
......@@ -502,8 +500,8 @@ updateQuality quality phylo = phylo { _phylo_quality = quality }
traceToPhylo :: Scale -> Phylo -> Phylo
traceToPhylo lvl phylo =
trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with "
<> show (length $ getGroupsFromScale lvl phylo) <> " groups and "
<> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale lvl phylo) <> " branches" <> "\n") phylo
<> show (length $ getGroupsFromLevel lvl phylo) <> " groups and "
<> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel lvl phylo) <> " branches" <> "\n") phylo
--------------------
-- | Clustering | --
......@@ -566,15 +564,15 @@ toRelatedComponents nodes edges =
traceSynchronyEnd :: Phylo -> Phylo
traceSynchronyEnd phylo =
trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
<> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo
traceSynchronyStart :: Phylo -> Phylo
traceSynchronyStart phylo =
trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
<> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo
......@@ -661,6 +659,6 @@ traceTemporalMatching groups =
trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups
traceGroupsProxi :: [Double] -> [Double]
traceGroupsProxi l =
trace ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups proximity" <> "\n") l
traceGroupsProxi :: Map (PhyloGroupId,PhyloGroupId) Double -> Map (PhyloGroupId,PhyloGroupId) Double
traceGroupsProxi m =
trace ( "\n" <> "-- | " <> show(Map.size m) <> " computed pairs of groups proximity" <> "\n") m
......@@ -16,13 +16,13 @@ import Control.Lens hiding (Level)
import Control.Monad (sequence)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn, groupBy)
import Data.Map.Strict (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (processDynamics)
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
import Gargantext.Prelude
import qualified Data.Map.Strict as Map
import qualified Data.Map as Map
-------------------------
......@@ -159,7 +159,6 @@ reduceGroups prox sync docs diagos branch =
let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems)
-- TODO : ajouter un parallelisme
$ mapWithKey (\prd groups ->
-- 2) for each period, transform the groups as a proximity graph filtered by a threshold
let diago = reduceDiagos $ filterDiago diagos [prd]
......@@ -172,12 +171,12 @@ reduceGroups prox sync docs diagos branch =
$ toRelatedComponents groups edges) periods
chooseClusteringStrategy :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]]
chooseClusteringStrategy sync branches = case sync of
adjustClustering :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]]
adjustClustering sync branches = case sync of
ByProximityThreshold _ _ scope _ -> case scope of
SingleBranch -> branches
SiblingBranches -> groupBy (\g g' -> (last' "chooseClusteringStrategy" $ (g ^. phylo_groupMeta) ! "breaks")
== (last' "chooseClusteringStrategy" $ (g' ^. phylo_groupMeta) ! "breaks"))
SiblingBranches -> groupBy (\g g' -> (last' "adjustClustering" $ (g ^. phylo_groupMeta) ! "breaks")
== (last' "adjustClustering" $ (g' ^. phylo_groupMeta) ! "breaks"))
$ sortOn _phylo_groupBranchId $ concat branches
AllBranches -> [concat branches]
ByProximityDistribution _ _ -> branches
......@@ -203,15 +202,15 @@ synchronicClustering phylo =
diagos = map coocToDiago $ phylo ^. phylo_timeCooc
newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
$ map processDynamics
$ chooseClusteringStrategy sync
$ phyloLastScale
$ adjustClustering sync
$ phyloToLastBranches
$ traceSynchronyStart phylo
newBranches' = newBranches `using` parList rdeepseq
in toNextScale phylo $ levelUpAncestors $ concat newBranches'
-- synchronicSimilarity :: Phylo -> Level -> String
-- synchronicSimilarity phylo lvl =
-- synchronicDistance :: Phylo -> Level -> String
-- synchronicDistance phylo lvl =
-- foldl' (\acc branch ->
-- acc <> (foldl' (\acc' period ->
-- acc' <> let prox = phyloProximity $ getConfig phylo
......
......@@ -6,62 +6,47 @@ License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Reference : Chavalarias, D., Lobbé, Q. & Delanoë, A. Draw me Science. Scientometrics 127, 545–575 (2022). https://doi.org/10.1007/s11192-021-04186-5
-}
module Gargantext.Core.Viz.Phylo.TemporalMatching where
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.Ord
import Data.List (concat, splitAt, tail, sortOn, sortBy, (++), intersect, null, inits, groupBy, scanl, nub, nubBy, union, dropWhile, partition, or)
import Data.Map.Strict (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), empty, mapKeys, adjust)
import Data.List (concat, splitAt, tail, sortOn, sortBy, (++), intersect, null, inits, groupBy, scanl, nub, nubBy, union, dropWhile, partition, or, sort, (!!))
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust)
import Debug.Trace (trace)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Prelude
import Prelude (tan,pi)
import Prelude (floor,tan,pi)
import Text.Printf
import qualified Data.Map.Strict as Map
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Vector as Vector
type Branch = [PhyloGroup]
type FinalQuality = Double
type LocalQuality = Double
type ShouldTry = Bool
----------------------------
-- | Similarity Measure | --
----------------------------
-------------------
-- | Proximity | --
-------------------
{-
-- compute a jaccard similarity between two lists
-}
-- | To compute a jaccard similarity between two lists
jaccard :: [Int] -> [Int] -> Double
jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
{-
-- process the inverse sumLog
-}
-- | Process the inverse sumLog
sumInvLog' :: Double -> Double -> [Double] -> Double
sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + 1/ tan (s * pi / 2)) / log (nb + 1/ tan (s * pi / 2))))) 0 diago
{-
-- process the sumLog
-}
-- | Process the sumLog
sumLog' :: Double -> Double -> [Double] -> Double
sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + 1/ tan (s * pi / 2)) / log (nb + 1/ tan (s * pi / 2)))) 0 diago
{-
-- compute the weightedLogJaccard
-}
weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
weightedLogJaccard' sens nbDocs diago ngrams ngrams'
| null ngramsInter = 0
......@@ -84,12 +69,8 @@ weightedLogJaccard' sens nbDocs diago ngrams ngrams'
diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
--------------------------------------
{-
-- compute the weightedLogSim
-- Adapted from Wang, X., Cheng, Q., Lu, W., 2014. Analyzing evolution of research topics with NEViewer: a new method based on dynamic co-word networks. Scientometrics 101, 1253–1271. https://doi.org/10.1007/s11192-014-1347-y (log added in the formula + pair comparison)
-- | Process the weighted similarity between clusters. Adapted from Wang, X., Cheng, Q., Lu, W., 2014. Analyzing evolution of research topics with NEViewer: a new method based on dynamic co-word networks. Scientometrics 101, 1253–1271. https://doi.org/10.1007/s11192-014-1347-y (log added in the formula + pair comparison)
-- tests not conclusive
-}
weightedLogSim' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
weightedLogSim' sens nbDocs diago ego_ngrams target_ngrams
| null ngramsInter = 0
......@@ -115,11 +96,8 @@ weightedLogSim' sens nbDocs diago ego_ngrams target_ngrams
diagoTarget = elems $ restrictKeys diago (Set.fromList target_ngrams)
--------------------------------------
{-
-- perform a seamilarity measure between a given group and a pair of targeted groups
-}
toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
-- | To process the proximity between a current group and a pair of targets group using the adapted Wang et al. Similarity
toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
case proximity of
WeightedLogJaccard sens _ ->
......@@ -134,11 +112,9 @@ toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
in weightedLogSim' sens nbDocs diago egoNgrams pairNgrams
Hamming _ _ -> undefined
-----------------------------
-- | Pointers & Matrices | --
-----------------------------
------------------------
-- | Local Matching | --
------------------------
findLastPeriod :: Filiation -> [Period] -> Period
findLastPeriod fil periods = case fil of
......@@ -147,6 +123,8 @@ findLastPeriod fil periods = case fil of
ToChildsMemory -> undefined
ToParentsMemory -> undefined
-- | To filter pairs of candidates related to old pointers periods
removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> Period
-> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
-> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
......@@ -166,71 +144,8 @@ removeOldPointers oldPointers fil thr prox prd pairs
|| (((fst . fst . fst) id') > (fst lastMatchedPrd))) pairs
| otherwise = []
filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer]
filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts
filterPointers' :: Proximity -> Double -> [(Pointer,[Int])] -> [(Pointer,[Int])]
filterPointers' proxi thr pts = filter (\((_,w),_) -> filterProximity proxi thr w) pts
reduceDiagos :: Map Date Cooc -> Map Int Double
reduceDiagos diagos = mapKeys (\(k,_) -> k)
$ foldl (\acc diago -> unionWith (+) acc diago) empty (elems diagos)
filterPointersByPeriod :: Filiation -> [(Pointer,[Int])] -> [Pointer]
filterPointersByPeriod fil pts =
let pts' = sortOn (fst . fst . fst . fst) pts
inf = (fst . fst . fst . fst) $ head' "filterPointersByPeriod" pts'
sup = (fst . fst . fst . fst) $ last' "filterPointersByPeriod" pts'
in map fst
$ nubBy (\pt pt' -> snd pt == snd pt')
$ filter (\pt -> ((fst . fst . fst . fst) pt == inf) || ((fst . fst . fst . fst) pt == sup))
$ case fil of
ToParents -> reverse pts'
ToChilds -> pts'
ToChildsMemory -> undefined
ToParentsMemory -> undefined
filterDocs :: Map Date Double -> [Period] -> Map Date Double
filterDocs d pds = restrictKeys d $ periodsToYears pds
filterDiago :: Map Date Cooc -> [Period] -> Map Date Cooc
filterDiago diago pds = restrictKeys diago $ periodsToYears pds
---------------------------------
-- | Inter-temporal matching | --
---------------------------------
{-
-- perform the related component algorithm, construct the resulting branch id and update the corresponding group's branch id
-}
groupsToBranches :: Map PhyloGroupId PhyloGroup -> [Branch]
groupsToBranches groups =
{- run the related component algorithm -}
let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
$ sortOn (\gs -> fst $ fst $ head' "egos" gs)
$ map (\group -> [getGroupId group]
++ (map fst $ group ^. phylo_groupPeriodParents)
++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
-- first find the related components by inside each ego's period
-- a supprimer
graph' = map relatedComponents egos
-- then run it for the all the periods
branches = zip [1..]
$ relatedComponents $ concat (graph' `using` parList rdeepseq)
-- update each group's branch id
in map (\(bId,branch) ->
let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
$ elems $ restrictKeys groups (Set.fromList branch)
in groups' `using` parList rdeepseq
) branches `using` parList rdeepseq
{-
-- find the best pair/singleton of parents/childs for a given group
-}
makePairs :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [Period] -> [Pointer] -> Filiation -> Double -> Proximity
-> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos =
......@@ -255,12 +170,36 @@ makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs di
lastPrd = findLastPeriod fil periods
--------------------------------------
{-
-- find the best temporal links between a given group and its parents/childs
-}
phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer]
filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts
filterPointers' :: Proximity -> Double -> [(Pointer,[Int])] -> [(Pointer,[Int])]
filterPointers' proxi thr pts = filter (\((_,w),_) -> filterProximity proxi thr w) pts
reduceDiagos :: Map Date Cooc -> Map Int Double
reduceDiagos diagos = mapKeys (\(k,_) -> k)
$ foldl (\acc diago -> unionWith (+) acc diago) empty (elems diagos)
filterPointersByPeriod :: Filiation -> [(Pointer,[Int])] -> [Pointer]
filterPointersByPeriod fil pts =
let pts' = sortOn (fst . fst . fst . fst) pts
inf = (fst . fst . fst . fst) $ head' "filterPointersByPeriod" pts'
sup = (fst . fst . fst . fst) $ last' "filterPointersByPeriod" pts'
in map fst
$ nubBy (\pt pt' -> snd pt == snd pt')
$ filter (\pt -> ((fst . fst . fst . fst) pt == inf) || ((fst . fst . fst . fst) pt == sup))
$ case fil of
ToParents -> reverse pts'
ToChilds -> pts'
ToChildsMemory -> undefined
ToParentsMemory -> undefined
phyloGroupMatching' :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
-> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ngrams) =
phyloGroupMatching' candidates filiation proxi docs diagos thr oldPointers (id,ngrams) =
if (null $ filterPointers proxi thr oldPointers)
-- if no previous pointers satisfy the current threshold then let's find new pointers
then if null nextPointers
......@@ -302,9 +241,55 @@ phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ng
else [((fst c,proximity),snd c),((fst c',proximity),snd c')] ) targets
{-
-- get the upstream/downstream timescale of a given period
-}
phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
-> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ngrams) =
if (null $ filterPointers proxi thr oldPointers)
{- let's find new pointers -}
then if null nextPointers
then []
else filterPointersByPeriod filiation
$ head' "phyloGroupMatching"
-- Keep only the best set of pointers grouped by proximity
$ groupBy (\pt pt' -> (snd . fst) pt == (snd . fst) pt')
-- verifier que l on garde bien les plus importants
$ sortBy (comparing (Down . snd . fst)) $ head' "pointers" nextPointers
-- Find the first time frame where at leats one pointer satisfies the proximity threshold
else oldPointers
where
nextPointers :: [[(Pointer,[Int])]]
nextPointers = take 1
$ dropWhile (null)
{- for each time frame, process the proximity on relevant pairs of targeted groups -}
$ scanl (\acc groups ->
let periods = nub $ map (fst . fst . fst) $ concat groups
nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods))
diago = reduceDiagos
$ filterDiago diagos ([(fst . fst) id] ++ periods)
pairs = makePairs (id,ngrams) (concat groups) periods oldPointers filiation thr proxi docs diagos
in acc ++ ( filterPointers' proxi thr
$ concat
$ map (\(c,c') ->
{- process the proximity between the current group and a pair of candidates -}
let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c')
in if ((c == c') || (snd c == snd c'))
then [((fst c,proximity),snd c)]
else [((fst c,proximity),snd c),((fst c',proximity),snd c')] ) pairs )) []
$ inits candidates -- groups from [[1900],[1900,1901],[1900,1901,1902],...]
filterDocs :: Map Date Double -> [Period] -> Map Date Double
filterDocs d pds = restrictKeys d $ periodsToYears pds
filterDiago :: Map Date Cooc -> [Period] -> Map Date Cooc
filterDiago diago pds = restrictKeys diago $ periodsToYears pds
-----------------------------
-- | Matching Processing | --
-----------------------------
getNextPeriods :: Filiation -> Int -> Period -> [Period] -> [Period]
getNextPeriods fil max' pId pIds =
case fil of
......@@ -314,9 +299,6 @@ getNextPeriods fil max' pId pIds =
ToParentsMemory -> undefined
{-
-- find all the candidates parents/childs of ego
-}
getCandidates :: Int -> PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]]
getCandidates minNgrams ego targets =
if (length (ego ^. phylo_groupNgrams)) > 1
......@@ -326,11 +308,8 @@ getCandidates minNgrams ego targets =
map (\groups' -> filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')) groups') targets
{-
-- set up and start performing the upstream/downstream inter‐temporal matching period by period
-}
reconstructTemporalLinks :: Int -> [Period] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
reconstructTemporalLinks frame periods proximity thr docs coocs groups =
matchGroupsToGroups :: Int -> [Period] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
matchGroupsToGroups frame periods proximity thr docs coocs groups =
let groups' = groupByField _phylo_groupPeriod groups
in foldl' (\acc prd ->
let -- 1) find the parents/childs matching periods
......@@ -339,7 +318,7 @@ reconstructTemporalLinks frame periods proximity thr docs coocs groups =
-- 2) find the parents/childs matching candidates
candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar
candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi
-- 3) find the parents/childs number of docs by years
-- 3) find the parents/child number of docs by years
docsPar = filterDocs docs ([prd] ++ periodsPar)
docsChi = filterDocs docs ([prd] ++ periodsChi)
-- 4) find the parents/child diago by years
......@@ -347,9 +326,9 @@ reconstructTemporalLinks frame periods proximity thr docs coocs groups =
diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
-- 5) match in parallel all the groups (egos) to their possible candidates
egos = map (\ego ->
let pointersPar = phyloGroupMatching (getCandidates (getMinSharedNgrams proximity) ego candidatesPar) ToParents proximity docsPar diagoPar
let pointersPar = phyloGroupMatching' (getCandidates (getMinSharedNgrams proximity) ego candidatesPar) ToParents proximity docsPar diagoPar
thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
pointersChi = phyloGroupMatching (getCandidates (getMinSharedNgrams proximity) ego candidatesChi) ToChilds proximity docsChi diagoChi
pointersChi = phyloGroupMatching' (getCandidates (getMinSharedNgrams proximity) ego candidatesChi) ToChilds proximity docsChi diagoChi
thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams)
in addPointers ToChilds TemporalPointer pointersChi
$ addPointers ToParents TemporalPointer pointersPar
......@@ -361,51 +340,27 @@ reconstructTemporalLinks frame periods proximity thr docs coocs groups =
) [] periods
{-
-- reconstruct a phylomemetic network from a list of groups and from a given threshold
-}
toPhylomemeticNetwork :: Int -> [Period] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [Branch]
toPhylomemeticNetwork timescale periods similarity thr docs coocs groups =
groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ reconstructTemporalLinks timescale periods similarity thr docs coocs groups
-----------------------
-- | Phylo Quality | --
-----------------------
----------------------------
-- | Quality Assessment | --
----------------------------
relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]]
relevantBranches term branches =
filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches
{-
-- filter the branches containing x
-}
relevantBranches :: Int -> [Branch] -> [Branch]
relevantBranches x branches =
filter (\groups -> (any (\group -> elem x $ group ^. phylo_groupNgrams) groups)) branches
{-
-- compute the accuracy ξ
-- the accuracy of a branch relatively to a root x is computed only over the periods where clusters mentionning x in the phylo do exist
-}
accuracy :: Int -> [(Date,Date)] -> Branch -> Double
accuracy x periods bk = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk') / (fromIntegral $ length bk'))
accuracy :: Int -> [(Date,Date)] -> [PhyloGroup] -> Double
-- The accuracy of a branch relatively to a term x is computed only over the periods there exist some cluster mentionning x in the phylomemy
accuracy x periods bk = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk')
/ (fromIntegral $ length bk'))
where
---
bk' :: [PhyloGroup]
bk' = filter (\g -> elem (g ^. phylo_groupPeriod) periods) bk
{-
-- compute the recall ρ
-}
recall :: Int -> Branch -> [Branch] -> Double
recall :: Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
recall x bk bx = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk)
/ (fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx))
{-
-- compute the F-score function
-}
fScore :: Double -> Int -> [(Date,Date)] -> [PhyloGroup] -> [[PhyloGroup]] -> Double
fScore lambda x periods bk bx =
let rec = recall x bk bx
......@@ -414,18 +369,11 @@ fScore lambda x periods bk bx =
/ (((lambda ** 2) * acc + rec))
{-
-- compute the number of groups
-}
wk :: [PhyloGroup] -> Double
wk bk = fromIntegral $ length bk
{-
-- compute the recall ρ for all the branches
-}
globalRecall :: Map Int Double -> [Branch] -> Double
globalRecall freq branches =
toRecall :: Map Int Double -> [[PhyloGroup]] -> Double
toRecall freq branches =
if (null branches)
then 0
else sum
......@@ -440,11 +388,8 @@ globalRecall freq branches =
pys = sum (elems freq)
{-
-- compute the accuracy ξ for all the branches
-}
globalAccuracy :: Map Int Double -> [Branch] -> Double
globalAccuracy freq branches =
toAccuracy :: Map Int Double -> [[PhyloGroup]] -> Double
toAccuracy freq branches =
if (null branches)
then 0
else sum
......@@ -461,9 +406,7 @@ globalAccuracy freq branches =
pys = sum (elems freq)
{-
-- compute the quality score F(λ)
-}
-- | here we do the average of all the local f_scores
toPhyloQuality :: Double -> Double -> Map Int Double -> [[PhyloGroup]] -> Double
toPhyloQuality fdt lambda freq branches =
if (null branches)
......@@ -483,166 +426,307 @@ toPhyloQuality fdt lambda freq branches =
-- pys :: Double
-- pys = sum (elems freq)
-- 1 / nb de foundation
-------------------------
-- | Sea-level Rise | --
-------------------------
{-
-- attach a rise value to branches & groups metadata
-}
riseToMeta :: Double -> [Branch] -> [Branch]
riseToMeta rise branches =
let break = length branches > 1
in map (\b ->
map (\g ->
if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [rise]) "breaks"(g ^. phylo_groupMeta))
else g) b) branches
------------------------------------
-- | Constant Temporal Matching | --
------------------------------------
-- add a branch id within each group
groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches groups =
{- run the related component algorithm -}
let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
$ sortOn (\gs -> fst $ fst $ head' "egos" gs)
$ map (\group -> [getGroupId group]
++ (map fst $ group ^. phylo_groupPeriodParents)
++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
-- first find the related components by inside each ego's period
-- a supprimer
graph' = map relatedComponents egos
-- then run it for the all the periods
branches = zip [1..]
$ relatedComponents $ concat (graph' `using` parList rdeepseq)
-- update each group's branch id
in map (\(bId,branch) ->
let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
$ elems $ restrictKeys groups (Set.fromList branch)
in groups' `using` parList rdeepseq
) branches `using` parList rdeepseq
{-
-- attach a thr value to branches & groups metadata
-}
thrToMeta :: Double -> [Branch] -> [Branch]
thrToMeta thr branches =
map (\b ->
map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
reduceFrequency frequency branches =
restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
{-
-- TODO
-- 1) try the zipper structure https://wiki.haskell.org/Zipper to performe the sea-level rise algorithme
-- 2) investigate how the branches order influences the 'separateBranches' function
-}
updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
updateThr thr branches = map (\b -> map (\g ->
g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches
{-
-- sequentially separate each branch for a given threshold and check if it locally increases the quality score
-- sequence = [done] | currentBranch | [rest]
-- done = all the already separated branches
-- rest = all the branches we still have to separate
-}
separateBranches :: Double -> Proximity -> Double -> Map Int Double -> Int -> Double -> Double
-> Int -> Map Date Double -> Map Date Cooc -> [Period]
-> [(Branch,ShouldTry)] -> (Branch,ShouldTry) -> [(Branch,ShouldTry)]
-> [(Branch,ShouldTry)]
separateBranches fdt similarity lambda frequency minBranch thr rise timescale docs coocs periods done currentBranch rest =
let done' = done ++ (if snd currentBranch
-- Sequentially break each branch of a phylo where
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
breakBranches :: Double -> Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
-> Int -> Map Date Double -> Map Date Cooc -> [Period] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
-- 1) keep or not the new division of ego
let done' = done ++ (if snd ego
then
(if ((null (fst branches')) || (quality > quality'))
---- 5) if the quality is not increased by the new branches or if the new branches are all small
---- then undo the separation and localy stop the sea rise
---- else validate the separation and authorise next sea rise in the long new branches
(if ((null (fst ego')) || (quality > quality'))
then
-- trace (" ✗ F(λ) = " <> show(quality) <> " (vs) " <> show(quality')
-- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
-- <> " | " <> show(length $ fst ego) <> " groups : "
-- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
-- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
[(fst currentBranch,False)]
[(fst ego,False)]
else
-- trace (" ✓ F(λ) = " <> show(quality) <> " (vs) " <> show(quality')
-- trace (" ✓ level = " <> printf "%.1f" thr <> "")
-- trace (" ✓ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
-- <> " | " <> show(length $ fst ego) <> " groups : "
-- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
-- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
((map (\e -> (e,True)) (fst branches')) ++ (map (\e -> (e,False)) (snd branches'))))
else [currentBranch])
((map (\e -> (e,True)) (fst ego')) ++ (map (\e -> (e,False)) (snd ego'))))
else [ego])
in
-- 6) if there is no more branch to separate tne return [done'] else continue with [rest]
-- 2) if there is no more branches in rest then return else continue
if null rest
then done'
else separateBranches fdt similarity lambda frequency minBranch thr rise timescale docs coocs periods
done' (List.head rest) (List.tail rest)
else breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods
done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
where
------- 1) compute the quality before splitting any branch
quality :: LocalQuality
quality = toPhyloQuality fdt lambda frequency ((map fst done) ++ [fst currentBranch] ++ (map fst rest))
------------------- 2) split the current branch and create a new phylomemetic network
phylomemeticNetwork :: [Branch]
phylomemeticNetwork = toPhylomemeticNetwork timescale periods similarity thr docs coocs (fst currentBranch)
--------- 3) change the new phylomemetic network into a tuple of new branches
--------- on the left : the long branches, on the right : the small ones
branches' :: ([Branch],[Branch])
branches' = partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
--------------------------------------
quality :: Double
quality = toPhyloQuality fdt lambda frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
--------------------------------------
ego' :: ([[PhyloGroup]],[[PhyloGroup]])
ego' =
let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego)
branches' = branches `using` parList rdeepseq
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
$ thrToMeta thr
$ riseToMeta rise phylomemeticNetwork
-------- 4) compute again the quality by considering the new branches
quality' :: LocalQuality
$ depthToMeta (elevation - depth) branches'
--------------------------------------
quality' :: Double
quality' = toPhyloQuality fdt lambda frequency
((map fst done) ++ (fst branches') ++ (snd branches') ++ (map fst rest))
((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
{-
-- perform the sea-level rise algorithm, browse the similarity ladder and check that we can try out the next step
-}
seaLevelRise :: Double -> Proximity -> Double -> Int -> Map Int Double
-> [Double] -> Double
-> Int -> [Period]
-> Map Date Double -> Map Date Cooc
-> [(Branch,ShouldTry)]
-> ([(Branch,ShouldTry)],FinalQuality)
seaLevelRise fdt proximity lambda minBranch frequency ladder rise frame periods docs coocs branches =
-- if the ladder is empty or thr > 1 or there is no branch to break then stop
if (null ladder) || ((List.head ladder) > 1) || (stopRise branches)
seaLevelMatching :: Double -> Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
-> Int -> [Period] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> ([([PhyloGroup],Bool)],Double)
seaLevelMatching fdt proximity lambda minBranch frequency thr step depth elevation frame periods docs coocs branches =
-- if there is no branch to break or if seaLvl level > 1 then end
if (thr >= 1) || ((not . or) $ map snd branches)
then (branches, toPhyloQuality fdt lambda frequency (map fst branches))
else
-- start breaking up all the possible branches for the current similarity threshold
let thr = List.head ladder
branches' = trace ("threshold = " <> printf "%.3f" thr
<> " F(λ) = " <> printf "%.5f" (toPhyloQuality fdt lambda frequency (map fst branches))
<> " ξ = " <> printf "%.5f" (globalAccuracy frequency (map fst branches))
<> " ρ = " <> printf "%.5f" (globalRecall frequency (map fst branches))
<> " branches = " <> show(length branches))
$ separateBranches fdt proximity lambda frequency minBranch thr rise frame docs coocs periods
[] (List.head branches) (List.tail branches)
in seaLevelRise fdt proximity lambda minBranch frequency (List.tail ladder) (rise + 1) frame periods docs coocs branches'
-- break all the possible branches at the current seaLvl level
let quality = toPhyloQuality fdt lambda frequency (map fst branches)
acc = toAccuracy frequency (map fst branches)
rec = toRecall frequency (map fst branches)
branches' = trace ("↑ level = " <> printf "%.3f" thr <> " F(λ) = " <> printf "%.5f" quality
<> " ξ = " <> printf "%.5f" acc
<> " ρ = " <> printf "%.5f" rec
<> " branches = " <> show(length branches) <> " ↴")
$ breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches')
in seaLevelMatching fdt proximity lambda minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches'
constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo
constanteTemporalMatching start step phylo = updatePhyloGroups 1
(fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat (map fst $ (fst branches)))
(toPhyloHorizon (updateQuality (snd branches) phylo))
where
--------
stopRise :: [(Branch,ShouldTry)] -> Bool
stopRise bs = ((not . or) $ map snd bs)
-- 2) process the temporal matching by elevating seaLvl level
-- branches :: ([([groups in the same branch],should westill break the branch?)],final quality)
branches :: ([([PhyloGroup],Bool)],Double)
branches = seaLevelMatching (fromIntegral $ Vector.length $ getRoots phylo)
(phyloProximity $ getConfig phylo)
(_qua_granularity $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo)
(phylo ^. phylo_termFreq)
start step
((((1 - start) / step) - 1))
(((1 - start) / step))
(getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo)
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
(reverse $ sortOn (length . fst) initBranches)
-- 1) for each group process an initial temporal Matching
-- here we suppose that all the groups of level 1 are part of the same big branch
-- the Bool param determines weither you should apply the sealevel within the branch
-- creer un type [PhyloGroup] <=> Branch
initBranches :: [([PhyloGroup],Bool)]
initBranches = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
$ groupsToBranches $ Map.fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (phyloProximity $ getConfig phylo)
start
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
(traceTemporalMatching $ getGroupsFromLevel 1 phylo)
-----------------
-- | Horizon | --
-----------------
{-
-- start the temporal matching process up, recover the resulting branches and update the groups (at scale 1) consequently
-}
temporalMatching :: [Double] -> Phylo -> Phylo
temporalMatching ladder phylo = updatePhyloGroups 1
(Map.fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
(updateQuality quality phylo)
toPhyloHorizon :: Phylo -> Phylo
toPhyloHorizon phylo =
let t0 = take 1 (getPeriodIds phylo)
groups = getGroupsFromLevelPeriods 1 t0 phylo
sens = getSensibility (phyloProximity $ getConfig phylo)
nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) t0
diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) t0
in phylo & phylo_horizon .~ (fromList $ map (\(g,g') ->
((getGroupId g,getGroupId g'),weightedLogJaccard' sens nbDocs diago (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) $ listToCombi' groups)
--------------------------------------
-- | Adaptative Temporal Matching | --
--------------------------------------
thrToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
thrToMeta thr branches =
map (\b ->
map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
depthToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
depthToMeta depth branches =
let break = length branches > 1
in map (\b ->
map (\g ->
if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [depth]) "breaks"(g ^. phylo_groupMeta))
else g) b) branches
reduceTupleMapByKeys :: Eq a => [a] -> Map (a,a) Double -> Map (a,a) Double
reduceTupleMapByKeys ks m = filterWithKey (\(k,k') _ -> (elem k ks) && (elem k' ks)) m
getInTupleMap :: Ord a => Map (a,a) Double -> a -> a -> Double
getInTupleMap m k k'
| isJust (m !? ( k ,k')) = m ! ( k ,k')
| isJust (m !? ( k',k )) = m ! ( k',k )
| otherwise = 0
toThreshold :: Double -> Map (PhyloGroupId,PhyloGroupId) Double -> Double
toThreshold nbSteps proxiGroups =
let idx = ((Map.size proxiGroups) `div` (floor nbSteps)) - 1
in if idx >= 0
then (sort $ elems proxiGroups) !! idx
else 1
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
adaptativeBreakBranches :: Double -> Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double
-> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc
-> [Period] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
-> [([PhyloGroup],(Bool,[Double]))]
adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods done ego rest =
-- 1) keep or not the new division of ego
let done' = done ++ (if (fst . snd) ego
then (if ((null (fst ego')) || (quality > quality'))
then
[(concat $ thrToMeta thr $ [fst ego],(False, ((snd . snd) ego)))]
else
( (map (\e -> (e,(True, ((snd . snd) ego) ++ [thr]))) (fst ego'))
++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego'))))
else [(concat $ thrToMeta thr $ [fst ego], snd ego)])
in
-- uncomment let .. in for debugging
-- let part1 = partition (snd) done'
-- part2 = partition (snd) rest
-- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
-- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
-- ) $
-- 2) if there is no more branches in rest then return else continue
if null rest
then done'
else adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods
done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
where
-------
quality :: FinalQuality
quality = snd sea
--------------------------------------
thr :: Double
thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi
--------------------------------------
quality :: Double
quality = toPhyloQuality fdt lambda frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
--------------------------------------
ego' :: ([[PhyloGroup]],[[PhyloGroup]])
ego' =
let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups frame periods proxiConf thr docs coocs (fst ego)
branches' = branches `using` parList rdeepseq
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) > minBranch)
$ thrToMeta thr
$ depthToMeta (elevation - depth) branches'
--------------------------------------
quality' :: Double
quality' = toPhyloQuality fdt lambda frequency
((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
--------
branches :: [Branch]
branches = map fst $ fst sea
--- 2) process the temporal matching by elevating the similarity ladder
sea :: ([(Branch,ShouldTry)],FinalQuality)
sea = seaLevelRise (fromIntegral $ Vector.length $ getRoots phylo)
adaptativeSeaLevelMatching :: Double -> Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double
-> Double -> Int -> Map Int Double
-> Int -> [Period] -> Map Date Double -> Map Date Cooc
-> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
adaptativeSeaLevelMatching fdt proxiConf depth elevation groupsProxi lambda minBranch frequency frame periods docs coocs branches =
-- if there is no branch to break or if seaLvl level >= depth then end
if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
then branches
else
-- break all the possible branches at the current seaLvl level
let branches' = adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches')
groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi
thr = toThreshold depth groupsProxi
in trace("\n " <> foldl (\acc _ -> acc <> "🌊 ") "" [0..(elevation - depth)]
<> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches')
<> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]"
<> " thr = " <> show(thr))
$ adaptativeSeaLevelMatching fdt proxiConf (depth - 1) elevation groupsProxi' lambda minBranch frequency' frame periods docs coocs branches'
adaptativeTemporalMatching :: Double -> Phylo -> Phylo
adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
(fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
(toPhyloHorizon phylo)
where
-- 2) process the temporal matching by elevating seaLvl level
branches :: [[PhyloGroup]]
branches = map fst
$ adaptativeSeaLevelMatching (fromIntegral $ Vector.length $ getRoots phylo)
(phyloProximity $ getConfig phylo)
(elevation - 1)
elevation
(phylo ^. phylo_groupsProxi)
(_qua_granularity $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo)
(phylo ^. phylo_termFreq)
ladder 1
(getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo)
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
(reverse $ sortOn (length . fst) seabed)
------ 1) for each group, process an initial temporal Matching and create a 'seabed'
------ ShouldTry determines if you should apply the seaLevelRise function again within each branch
seabed :: [(Branch,ShouldTry)]
seabed = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
$ toPhylomemeticNetwork (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo)
(phyloProximity $ getConfig phylo)
(List.head ladder)
groups
-- 1) for each group process an initial temporal Matching
-- here we suppose that all the groups of level 1 are part of the same big branch
groups :: [([PhyloGroup],(Bool,[Double]))]
groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr])))
$ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (phyloProximity $ getConfig phylo)
thr
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
(traceTemporalMatching $ getGroupsFromScale 1 phylo)
(traceTemporalMatching $ getGroupsFromLevel 1 phylo)
--------------------------------------
thr :: Double
thr = toThreshold elevation (phylo ^. phylo_groupsProxi)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment