Commit 66f96f84 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[MERGE]

parent 54d94963
...@@ -5,7 +5,7 @@ cabal-version: 1.12 ...@@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
name: gargantext name: gargantext
version: 0.0.6.9.5 version: 0.0.6.9.5
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
......
...@@ -30,8 +30,7 @@ import Control.DeepSeq (NFData) ...@@ -30,8 +30,7 @@ import Control.DeepSeq (NFData)
import Control.Lens (makeLenses) import Control.Lens (makeLenses)
import Data.Aeson import Data.Aeson
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Map.Strict (Map) import Data.Map (Map)
import Data.Set (Set)
import Data.Swagger import Data.Swagger
import Data.Text (Text, pack) import Data.Text (Text, pack)
import Data.Vector (Vector) import Data.Vector (Vector)
...@@ -64,9 +63,9 @@ instance ToSchema ListParser ...@@ -64,9 +63,9 @@ instance ToSchema ListParser
data SeaElevation = data SeaElevation =
Constante Constante
{ _cons_start :: Double { _cons_start :: Double
, _cons_gap :: Double } , _cons_step :: Double }
| Adaptative | Adaptative
{ _adap_steps :: Double } { _adap_granularity :: Double }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
instance ToSchema SeaElevation instance ToSchema SeaElevation
...@@ -78,8 +77,8 @@ data Proximity = ...@@ -78,8 +77,8 @@ data Proximity =
| WeightedLogSim | WeightedLogSim
{ _wls_sensibility :: Double { _wls_sensibility :: Double
, _wls_minSharedNgrams :: Int } , _wls_minSharedNgrams :: Int }
| Hamming | Hamming
{ _hmg_sensibility :: Double { _hmg_sensibility :: Double
, _hmg_minSharedNgrams :: Int} , _hmg_minSharedNgrams :: Int}
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
...@@ -205,7 +204,7 @@ data PhyloSubConfig = ...@@ -205,7 +204,7 @@ data PhyloSubConfig =
subConfig2config :: PhyloSubConfig -> PhyloConfig subConfig2config :: PhyloSubConfig -> PhyloConfig
subConfig2config subConfig = defaultConfig { phyloProximity = WeightedLogJaccard (_sc_phyloProximity subConfig) 1 subConfig2config subConfig = defaultConfig { phyloProximity = WeightedLogJaccard (_sc_phyloProximity subConfig) 1
, phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups , phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups
, phyloQuality = Quality (_sc_phyloQuality subConfig) 1 , phyloQuality = Quality (_sc_phyloQuality subConfig) 1
, timeUnit = _sc_timeUnit subConfig , timeUnit = _sc_timeUnit subConfig
...@@ -307,8 +306,8 @@ instance ToSchema Software where ...@@ -307,8 +306,8 @@ instance ToSchema Software where
defaultSoftware :: Software defaultSoftware :: Software
defaultSoftware = defaultSoftware =
Software { _software_name = pack "GarganText" Software { _software_name = pack "Gargantext"
, _software_version = pack "v5" } , _software_version = pack "v4" }
-- | Global parameters of a Phylo -- | Global parameters of a Phylo
...@@ -325,7 +324,7 @@ instance ToSchema PhyloParam where ...@@ -325,7 +324,7 @@ instance ToSchema PhyloParam where
defaultPhyloParam :: PhyloParam defaultPhyloParam :: PhyloParam
defaultPhyloParam = defaultPhyloParam =
PhyloParam { _phyloParam_version = pack "v3" PhyloParam { _phyloParam_version = pack "v2.adaptative"
, _phyloParam_software = defaultSoftware , _phyloParam_software = defaultSoftware
, _phyloParam_config = defaultConfig } , _phyloParam_config = defaultConfig }
...@@ -410,7 +409,8 @@ data Phylo = ...@@ -410,7 +409,8 @@ data Phylo =
, _phylo_timeDocs :: !(Map Date Double) , _phylo_timeDocs :: !(Map Date Double)
, _phylo_termFreq :: !(Map Int Double) , _phylo_termFreq :: !(Map Int Double)
, _phylo_lastTermFreq :: !(Map Int Double) , _phylo_lastTermFreq :: !(Map Int Double)
, _phylo_diaSimScan :: Set Double , _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double)
, _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
, _phylo_param :: PhyloParam , _phylo_param :: PhyloParam
, _phylo_periods :: Map Period PhyloPeriod , _phylo_periods :: Map Period PhyloPeriod
, _phylo_quality :: Double , _phylo_quality :: Double
......
...@@ -18,7 +18,7 @@ module Gargantext.Core.Viz.Phylo.Example where ...@@ -18,7 +18,7 @@ module Gargantext.Core.Viz.Phylo.Example where
import Control.Lens import Control.Lens
import Data.GraphViz.Types.Generalised (DotGraph) import Data.GraphViz.Types.Generalised (DotGraph)
import Data.List (sortOn, nub, sort) import Data.List (sortOn, nub, sort)
import Data.Map.Strict (Map) import Data.Map (Map)
import Data.Text (Text, toLower) import Data.Text (Text, toLower)
import Gargantext.Core.Text.Context (TermList) import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Terms.Mono (monoTexts) import Gargantext.Core.Text.Terms.Mono (monoTexts)
...@@ -27,17 +27,16 @@ import Gargantext.Core.Viz.Phylo.PhyloExport ...@@ -27,17 +27,16 @@ import Gargantext.Core.Viz.Phylo.PhyloExport
import Gargantext.Core.Viz.Phylo.PhyloMaker import Gargantext.Core.Viz.Phylo.PhyloMaker
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching) import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
import qualified Data.Set as Set
--------------------------------- ---------------------------------
-- | STEP 5 | -- Export the phylo -- | STEP 5 | -- Export the phylo
--------------------------------- ---------------------------------
phyloExport :: IO () phyloExport :: IO ()
phyloExport = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot" phyloDot phyloExport = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot" phyloDot
phyloDot :: DotGraph DotId phyloDot :: DotGraph DotId
phyloDot = toPhyloExport phyloCleopatre phyloDot = toPhyloExport phyloCleopatre
...@@ -54,15 +53,14 @@ phyloCleopatre = synchronicClustering $ toHorizon flatPhylo ...@@ -54,15 +53,14 @@ phyloCleopatre = synchronicClustering $ toHorizon flatPhylo
----------------------------------------------- -----------------------------------------------
flatPhylo :: Phylo flatPhylo :: Phylo
flatPhylo = case (getSeaElevation emptyPhylo) of flatPhylo = case (getSeaElevation emptyPhylo) of
Constante s g -> temporalMatching (constDiachronicLadder s g Set.empty) Constante s g -> constanteTemporalMatching s g
$ scanSimilarity 1 $ toGroupsProxi 1
$ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
Adaptative s -> adaptativeTemporalMatching s
$ toGroupsProxi 1
$ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo $ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
Adaptative s -> temporalMatching (adaptDiachronicLadder s (emptyPhylo' ^. phylo_diaSimScan) Set.empty) emptyPhylo'
emptyPhylo' :: Phylo
emptyPhylo' = scanSimilarity 1
$ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
--------------------------------------------- ---------------------------------------------
-- | STEP 2 | -- Build the cliques -- | STEP 2 | -- Build the cliques
...@@ -101,10 +99,9 @@ nbDocsByYear = docsToTimeScaleNb docs ...@@ -101,10 +99,9 @@ nbDocsByYear = docsToTimeScaleNb docs
config :: PhyloConfig config :: PhyloConfig
config = config =
defaultConfig { phyloName = "Cesar et Cleopatre" defaultConfig { phyloName = "Cesar et Cleopatre"
, phyloScale = 2 , phyloScale = 2
, seaElevation = Adaptative 4
, exportFilter = [ByBranchSize 0] , exportFilter = [ByBranchSize 0]
, clique = MaxClique 0 15 ByNeighbours } , clique = MaxClique 0 15 ByNeighbours }
...@@ -113,14 +110,14 @@ docs :: [Document] ...@@ -113,14 +110,14 @@ docs :: [Document]
docs = map (\(d,t) docs = map (\(d,t)
-> Document (d+102) -> Document (d+102)
"" ""
(filter (\n -> isRoots n (foundations ^. foundations_roots)) $ monoTexts t) (filter (\n -> isRoots n (foundations ^. foundations_roots)) $ monoTexts t)
Nothing Nothing
[] []
) corpus ) corpus
foundations :: PhyloFoundations foundations :: PhyloFoundations
foundations = PhyloFoundations (Vector.fromList $ map toLower actants) mapList foundations = PhyloFoundations (Vector.fromList $ map toLower actants) mapList
-------------------------------------------- --------------------------------------------
...@@ -138,26 +135,26 @@ actants = [ "Cleopatre" , "Ptolemee", "Ptolemee-XIII", "Ptolemee-XIV", "Ptolem ...@@ -138,26 +135,26 @@ actants = [ "Cleopatre" , "Ptolemee", "Ptolemee-XIII", "Ptolemee-XIV", "Ptolem
, "Alexandrie" , "Auguste" , "Pompee" , "Cassius" , "Brutus", "Caesar-III", "Aurelia-Cotta", "Pisae", "Pline"] , "Alexandrie" , "Auguste" , "Pompee" , "Cassius" , "Brutus", "Caesar-III", "Aurelia-Cotta", "Pisae", "Pline"]
corpus :: [(Date, Text)] corpus :: [(Date, Text)]
corpus = sortOn fst [ corpus = sortOn fst [
(-101,"La tutelle Cesar Caesar-III de sa mère lui étant difficile à endurer, en septembre 101 av. J.-C., Ptolemee-X la fait assassiner et peut enfin régner presque seul puisqu'il partage le pouvoir avec son épouse Berenice-III Cléopâtre Philopator."), (-101,"La tutelle Cesar Caesar-III de sa mère lui étant difficile à endurer, en septembre 101 av. J.-C., Ptolemee-X la fait assassiner et peut enfin régner presque seul puisqu'il partage le pouvoir avec son épouse Berenice-III Cléopâtre Philopator."),
(-99,"Caesar-III est questeur en 99 av. J.-C. ou 98 av. J.-C., et préteur en 92 av. J.-C.."), (-99,"Caesar-III est questeur en 99 av. J.-C. ou 98 av. J.-C., et préteur en 92 av. J.-C.."),
(-100,"Caius Julius Caesar-IV — dit Jules Cesar Ptolemee-X — naît vers 100 av. J.-C., il est le fils de Caius Julius Caesar-III et de Aurelia-Cotta"), (-100,"Caius Julius Caesar-IV — dit Jules Cesar Ptolemee-X — naît vers 100 av. J.-C., il est le fils de Caius Julius Caesar-III et de Aurelia-Cotta"),
(-85,"Caesar-III meurt à Pisae de cause naturelle en 85 av. J.-C. : selon Pline l'Ancien, il décède brusquement en mettant ses chaussures"), (-85,"Caesar-III meurt à Pisae de cause naturelle en 85 av. J.-C. : selon Pline l'Ancien, il décède brusquement en mettant ses chaussures"),
(-53,"Aurelia-Cotta décède peu de temps avant le meurtre de Clodius Pulcher, vers 53 av. J.-C."), (-53,"Aurelia-Cotta décède peu de temps avant le meurtre de Clodius Pulcher, vers 53 av. J.-C."),
(-51,"Cleopatre règne sur l’egypte entre 51 et 30 av. J.-C. avec ses frères-epoux Ptolemee-XIII et Ptolemee-XIV, puis aux côtes du general romain Marc-Antoine. Elle est celèbre pour avoir ete la compagne de Jules Cesar puis d'Antoine, avec lesquels elle a eu plusieurs enfants. Partie prenante dans la guerre civile opposant Antoine à Octave, elle est vaincue à la bataille d'Actium en 31 av. J.-C. Sa defaite va permettre aux Romains de mener à bien la conquête de l’egypte, evenement qui marquera la fin de l'epoque hellenistique."), (-51,"Cleopatre règne sur l’egypte entre 51 et 30 av. J.-C. avec ses frères-epoux Ptolemee-XIII et Ptolemee-XIV, puis aux côtes du general romain Marc-Antoine. Elle est celèbre pour avoir ete la compagne de Jules Cesar puis d'Antoine, avec lesquels elle a eu plusieurs enfants. Partie prenante dans la guerre civile opposant Antoine à Octave, elle est vaincue à la bataille d'Actium en 31 av. J.-C. Sa defaite va permettre aux Romains de mener à bien la conquête de l’egypte, evenement qui marquera la fin de l'epoque hellenistique."),
(-40,"Il existe relativement peu d'informations sur son sejour à Rome, au lendemain de l'assassinat de Cesar, ou sur la periode passee à Alexandrie durant l'absence d'Antoine, entre -40 et -37."), (-40,"Il existe relativement peu d'informations sur son sejour à Rome, au lendemain de l'assassinat de Cesar, ou sur la periode passee à Alexandrie durant l'absence d'Antoine, entre -40 et -37."),
(-48,"L'historiographie antique lui est globalement defavorable car inspiree par son vainqueur, l'empereur Auguste, et par son entourage, dont l'interêt est de la noircir, afin d'en faire l'adversaire malfaisant de Rome et le mauvais genie d'Antoine. On observe par ailleurs que Cesar ne fait aucune mention de sa liaison avec elle dans les Commentaires sur la Guerre civile"), (-48,"L'historiographie antique lui est globalement defavorable car inspiree par son vainqueur, l'empereur Auguste, et par son entourage, dont l'interêt est de la noircir, afin d'en faire l'adversaire malfaisant de Rome et le mauvais genie d'Antoine. On observe par ailleurs que Cesar ne fait aucune mention de sa liaison avec elle dans les Commentaires sur la Guerre civile"),
(-69,"Cleopatre est nee au cours de l'hiver -69/-686 probablement à Alexandrie."), (-69,"Cleopatre est nee au cours de l'hiver -69/-686 probablement à Alexandrie."),
(-48,"Pompee a en effet ete le protecteur de Ptolemee XII, le père de Cleopatre et de Ptolemee-XIII dont il se considère comme le tuteur."), (-48,"Pompee a en effet ete le protecteur de Ptolemee XII, le père de Cleopatre et de Ptolemee-XIII dont il se considère comme le tuteur."),
(-48,"Ptolemee-XIII et Cleopatre auraient d'ailleurs aide Pompee par l'envoi d'une flotte de soixante navires."), (-48,"Ptolemee-XIII et Cleopatre auraient d'ailleurs aide Pompee par l'envoi d'une flotte de soixante navires."),
(-48,"Mais le jeune roi Ptolemee-XIII et ses conseillers jugent sa cause perdue et pensent s'attirer les bonnes graces du vainqueur en le faisant assassiner à peine a-t-il pose le pied sur le sol egyptien, près de Peluse, le 30 juillet 48 av. J.-C., sous les yeux de son entourage."), (-48,"Mais le jeune roi Ptolemee-XIII et ses conseillers jugent sa cause perdue et pensent s'attirer les bonnes graces du vainqueur en le faisant assassiner à peine a-t-il pose le pied sur le sol egyptien, près de Peluse, le 30 juillet 48 av. J.-C., sous les yeux de son entourage."),
(-48,"Cesar fait enterrer la tête de Pompee dans le bosquet de Nemesis en bordure du mur est de l'enceinte d'Alexandrie. Pour autant la mort de Pompee est une aubaine pour Cesar qui tente par ailleurs de profiter des querelles dynastiques pour annexer l’egypte."), (-48,"Cesar fait enterrer la tête de Pompee dans le bosquet de Nemesis en bordure du mur est de l'enceinte d'Alexandrie. Pour autant la mort de Pompee est une aubaine pour Cesar qui tente par ailleurs de profiter des querelles dynastiques pour annexer l’egypte."),
(-48,"Il est difficile de se prononcer clairement sur les raisons qui ont pousse Cesar à s'attarder à Alexandrie. Il y a des raisons politiques, mais aussi des raisons plus sentimentales (Cleopatre ?). Il tente d'abord d'obtenir le remboursement de dettes que Ptolemee XII"), (-48,"Il est difficile de se prononcer clairement sur les raisons qui ont pousse Cesar à s'attarder à Alexandrie. Il y a des raisons politiques, mais aussi des raisons plus sentimentales (Cleopatre ?). Il tente d'abord d'obtenir le remboursement de dettes que Ptolemee XII"),
(-46,"Les deux souverains sont convoques par Cesar au palais royal d'Alexandrie. Ptolemee-XIII s'y rend après diverses tergiversations ainsi que Cleopatre."), (-46,"Les deux souverains sont convoques par Cesar au palais royal d'Alexandrie. Ptolemee-XIII s'y rend après diverses tergiversations ainsi que Cleopatre."),
(-47,"A Rome, Cleopatre epouse alors un autre de ses frères cadets, à Alexandrie, Ptolemee-XIV, sur l'injonction de Jules Cesar"), (-47,"A Rome, Cleopatre epouse alors un autre de ses frères cadets, à Alexandrie, Ptolemee-XIV, sur l'injonction de Jules Cesar"),
(-46,"Cesar a-t-il comme objectif de montrer ce qu'il en coûte de se revolter contre Rome en faisant figurer dans son triomphe la sœur de Cleopatre et de Ptolemee-XIV, Arsinoe, qui s'est fait reconnaître reine par les troupes de Ptolemee-XIII ?"), (-46,"Cesar a-t-il comme objectif de montrer ce qu'il en coûte de se revolter contre Rome en faisant figurer dans son triomphe la sœur de Cleopatre et de Ptolemee-XIV, Arsinoe, qui s'est fait reconnaître reine par les troupes de Ptolemee-XIII ?"),
(-44,"Au debut de l'annee -44, Cesar est assassine par Brutus. Profitant de la situation confuse qui s'ensuit, Cleopatre quitte alors Rome à la mi-avril, faisant escale en Grèce. Elle parvient à Alexandrie en juillet -44."), (-44,"Au debut de l'annee -44, Cesar est assassine par Brutus. Profitant de la situation confuse qui s'ensuit, Cleopatre quitte alors Rome à la mi-avril, faisant escale en Grèce. Elle parvient à Alexandrie en juillet -44."),
(-44,"La guerre que se livrent les assassins de Cesar, Cassius et Brutus et ses heritiers, Octave et Marc-Antoine, oblige Cleopatre à des contorsions diplomatiques."), (-44,"La guerre que se livrent les assassins de Cesar, Cassius et Brutus et ses heritiers, Octave et Marc-Antoine, oblige Cleopatre à des contorsions diplomatiques."),
(-41,"Nous ignorons depuis quand Cleopatre, agee de 29 ans en -41, et Marc-Antoine, qui a une quarantaine d'annees, se connaissent. Marc-Antoine est l'un des officiers qui ont participe au retablissement de Ptolemee XII. Il est plus vraisemblable qu'ils se soient frequentes lors du sejour à Rome de Cleopatre."), (-41,"Nous ignorons depuis quand Cleopatre, agee de 29 ans en -41, et Marc-Antoine, qui a une quarantaine d'annees, se connaissent. Marc-Antoine est l'un des officiers qui ont participe au retablissement de Ptolemee XII. Il est plus vraisemblable qu'ils se soient frequentes lors du sejour à Rome de Cleopatre."),
(-42,"Brutus tient la Grèce tandis que Cassius s'installe en Syrie. Le gouverneur de Cleopatre à Chypre, Serapion, vient en aide à Cassius."), (-42,"Brutus tient la Grèce tandis que Cassius s'installe en Syrie. Le gouverneur de Cleopatre à Chypre, Serapion, vient en aide à Cassius."),
(-42,"Cassius aurait envisage de s'emparer d'Alexandrie quand le 'debarquement' en Grèce d'Antoine et d'Octave l'oblige à renoncer à ses projets")] (-42,"Cassius aurait envisage de s'emparer d'Alexandrie quand le 'debarquement' en Grèce d'Antoine et d'Octave l'oblige à renoncer à ses projets")]
...@@ -19,7 +19,7 @@ import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order) ...@@ -19,7 +19,7 @@ import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
import Data.GraphViz.Types.Generalised (DotGraph) import Data.GraphViz.Types.Generalised (DotGraph)
import Data.GraphViz.Types.Monadic import Data.GraphViz.Types.Monadic
import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex) import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
import Data.Map.Strict (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member) import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
import Data.Text.Lazy (fromStrict, pack, unpack) import Data.Text.Lazy (fromStrict, pack, unpack)
import Data.Vector (Vector) import Data.Vector (Vector)
import Debug.Trace (trace) import Debug.Trace (trace)
...@@ -375,7 +375,7 @@ processSort sort' elev export = case sort' of ...@@ -375,7 +375,7 @@ processSort sort' elev export = case sort' of
ByBirthDate o -> sortByBirthDate o export ByBirthDate o -> sortByBirthDate o export
ByHierarchy _ -> case elev of ByHierarchy _ -> case elev of
Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches)) Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches))
Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches)) Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
----------------- -----------------
-- | Metrics | -- -- | Metrics | --
...@@ -546,10 +546,9 @@ processLabels labels foundations freq export = ...@@ -546,10 +546,9 @@ processLabels labels foundations freq export =
-- | Dynamics | -- -- | Dynamics | --
------------------ ------------------
-- utiliser & creer une Map FdtId [PhyloGroup]
-- n = index of the current term toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
toDynamics :: FdtId -> [PhyloGroup] -> PhyloGroup -> Map FdtId (Date,Date) -> Double toDynamics n parents g m =
toDynamics n elders g m =
let prd = g ^. phylo_groupPeriod let prd = g ^. phylo_groupPeriod
end = last' "dynamics" (sort $ map snd $ elems m) end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end)) in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
...@@ -565,18 +564,18 @@ toDynamics n elders g m = ...@@ -565,18 +564,18 @@ toDynamics n elders g m =
where where
-------------------------------------- --------------------------------------
isNew :: Bool isNew :: Bool
isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
type FdtId = Int
processDynamics :: [PhyloGroup] -> [PhyloGroup] processDynamics :: [PhyloGroup] -> [PhyloGroup]
processDynamics groups = processDynamics groups =
map (\g -> map (\g ->
let elders = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId) let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
&& ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n elders g mapNgrams) $ g ^. phylo_groupNgrams) ) groups in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
where where
-------------------------------------- --------------------------------------
mapNgrams :: Map FdtId (Date,Date) mapNgrams :: Map Int (Date,Date)
mapNgrams = map (\dates -> mapNgrams = map (\dates ->
let dates' = sort dates let dates' = sort dates
in (head' "dynamics" dates', last' "dynamics" dates')) in (head' "dynamics" dates', last' "dynamics" dates'))
...@@ -622,7 +621,7 @@ toHorizon phylo = ...@@ -622,7 +621,7 @@ toHorizon phylo =
$ concat $ concat
$ tracePhyloAncestors newGroups) phylo $ tracePhyloAncestors newGroups) phylo
reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
$ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromScale scale phyloAncestor $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel scale phyloAncestor
in updatePhyloGroups scale reBranched phylo in updatePhyloGroups scale reBranched phylo
where where
-- | 1) for each periods -- | 1) for each periods
...@@ -637,7 +636,7 @@ toHorizon phylo = ...@@ -637,7 +636,7 @@ toHorizon phylo =
-- | 2) find ancestors between groups without parents -- | 2) find ancestors between groups without parents
mapGroups :: [[PhyloGroup]] mapGroups :: [[PhyloGroup]]
mapGroups = map (\prd -> mapGroups = map (\prd ->
let groups = getGroupsFromScalePeriods scale [prd] phylo let groups = getGroupsFromLevelPeriods scale [prd] phylo
childs = getPreviousChildIds scale frame prd periods phylo childs = getPreviousChildIds scale frame prd periods phylo
-- maybe add a better filter for non isolated ancestors -- maybe add a better filter for non isolated ancestors
heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds)) heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
...@@ -661,7 +660,7 @@ toHorizon phylo = ...@@ -661,7 +660,7 @@ toHorizon phylo =
getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId] getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
getPreviousChildIds lvl frame curr prds phylo = getPreviousChildIds lvl frame curr prds phylo =
concat $ map ((map fst) . _phylo_groupPeriodChilds) concat $ map ((map fst) . _phylo_groupPeriodChilds)
$ getGroupsFromScalePeriods lvl (getNextPeriods ToParents frame curr prds) phylo $ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
--------------------- ---------------------
-- | phyloExport | -- -- | phyloExport | --
...@@ -696,10 +695,10 @@ toPhyloExport phylo = exportToDot phylo ...@@ -696,10 +695,10 @@ toPhyloExport phylo = exportToDot phylo
-------------------------------------- --------------------------------------
groups :: [PhyloGroup] groups :: [PhyloGroup]
groups = traceExportGroups groups = traceExportGroups
-- necessaire ?
$ processDynamics $ processDynamics
$ getGroupsFromScale (phyloScale $ getConfig phylo) $ getGroupsFromLevel (phyloScale $ getConfig phylo)
$ tracePhyloInfo phylo $ tracePhyloInfo phylo
-- \$ toHorizon phylo
traceExportBranches :: [PhyloBranch] -> [PhyloBranch] traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
...@@ -722,3 +721,4 @@ traceExportGroups groups = trace ("\n" <> "-- | Export " ...@@ -722,3 +721,4 @@ traceExportGroups groups = trace ("\n" <> "-- | Export "
<> show(length groups) <> " groups and " <> show(length groups) <> " groups and "
<> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms" <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
) groups ) groups
...@@ -8,18 +8,18 @@ Stability : experimental ...@@ -8,18 +8,18 @@ Stability : experimental
Portability : POSIX Portability : POSIX
-} -}
module Gargantext.Core.Viz.Phylo.PhyloMaker where module Gargantext.Core.Viz.Phylo.PhyloMaker where
import Control.DeepSeq (NFData) import Control.DeepSeq (NFData)
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail) import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail)
import Data.Map.Strict (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert) import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert)
import Data.Set (Set)
import Data.Text (Text) import Data.Text (Text)
import Data.Vector (Vector) import Data.Vector (Vector)
import Debug.Trace (trace) import Debug.Trace (trace)
import Prelude (floor)
import Gargantext.Core.Methods.Similarities (Similarity(Conditional)) import Gargantext.Core.Methods.Similarities (Similarity(Conditional))
import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques) import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
...@@ -29,7 +29,7 @@ import Gargantext.Core.Viz.Phylo ...@@ -29,7 +29,7 @@ import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon) import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity) import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Set as Set import qualified Data.Set as Set
...@@ -42,26 +42,24 @@ import qualified Data.Vector as Vector ...@@ -42,26 +42,24 @@ import qualified Data.Vector as Vector
{- {-
-- TODO AD -- TODO AD
data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo} data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo}
| PhyloN { _phylo'_flatPhylo :: Phylo} | PhyloN { _phylo'_flatPhylo :: Phylo}
toPhylo' :: Phylo' -> [Document] -> TermList -> PhyloConfig -> Phylo toPhylo' :: Phylo' -> [Document] -> TermList -> PhyloConfig -> Phylo
toPhylo' (PhyloN phylo) = toPhylo' toPhylo' (PhyloN phylo) = toPhylo'
toPhylo' (PhyloBase phylo) = toPhylo toPhylo' (PhyloBase phylo) = toPhylo
-} -}
-- TODO an adaptative synchronic clustering with a slider
toPhylo :: Phylo -> Phylo toPhylo :: Phylo -> Phylo
toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGroupsFromScale 1 flatPhylo)) toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGroupsFromLevel 1 flatPhylo))
$ traceToPhylo (phyloScale $ getConfig phylowithoutLink) $ $ traceToPhylo (phyloScale $ getConfig phylowithoutLink) $
if (phyloScale $ getConfig phylowithoutLink) > 1 if (phyloScale $ getConfig phylowithoutLink) > 1
then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloScale $ getConfig phylowithoutLink)] then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloScale $ getConfig phylowithoutLink)]
else phyloAncestors else flatPhylo
where where
-------------------------------------- --------------------------------------
phyloAncestors :: Phylo phyloAncestors :: Phylo
phyloAncestors = phyloAncestors =
if (findAncestors $ getConfig phylowithoutLink) if (findAncestors $ getConfig phylowithoutLink)
then toHorizon flatPhylo then toHorizon flatPhylo
else flatPhylo else flatPhylo
...@@ -71,64 +69,36 @@ toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGrou ...@@ -71,64 +69,36 @@ toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGrou
-------------------------------------- --------------------------------------
----------------------------- --------------------
-- | Create a flat Phylo | -- -- | To Phylo 1 | --
----------------------------- --------------------
{-
-- create an adaptative diachronic 'sea elevation' ladder
-}
adaptDiachronicLadder :: Double -> Set Double -> Set Double -> [Double]
adaptDiachronicLadder curr similarities ladder =
if curr <= 0 || Set.null similarities
then Set.toList ladder
else
let idx = ((Set.size similarities) `div` (floor curr)) - 1
thr = Set.elemAt idx similarities
-- we use a sliding methods 1/10, then 1/9, then ... 1/2
in adaptDiachronicLadder (curr -1) (Set.filter (> thr) similarities) (Set.insert thr ladder)
{-
-- create a constante diachronic 'sea elevation' ladder
-}
constDiachronicLadder :: Double -> Double -> Set Double -> [Double]
constDiachronicLadder curr step ladder =
if curr > 1
then Set.toList ladder
else constDiachronicLadder (curr + step) step (Set.insert curr ladder)
{- toGroupsProxi :: Scale -> Phylo -> Phylo
-- process an initial scanning of the kinship links toGroupsProxi lvl phylo =
-}
scanSimilarity :: Scale -> Phylo -> Phylo
scanSimilarity lvl phylo =
let proximity = phyloProximity $ getConfig phylo let proximity = phyloProximity $ getConfig phylo
scanning = foldlWithKey (\acc pId pds -> groupsProxi = foldlWithKey (\acc pId pds ->
-- 1) process period by period -- 1) process period by period
let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams))
$ elems $ elems
$ view ( phylo_periodScales $ view ( phylo_periodScales
. traverse . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl) . traverse . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
. phylo_scaleGroups ) pds . phylo_scaleGroups ) pds
next = getNextPeriods ToParents (getTimeFrame $ timeUnit $ getConfig phylo) pId (keys $ phylo ^. phylo_periods) next = getNextPeriods ToParents (getTimeFrame $ timeUnit $ getConfig phylo) pId (keys $ phylo ^. phylo_periods)
targets = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ getGroupsFromScalePeriods lvl next phylo targets = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ getGroupsFromLevelPeriods lvl next phylo
docs = filterDocs (phylo ^. phylo_timeDocs) ([pId] ++ next) docs = filterDocs (phylo ^. phylo_timeDocs) ([pId] ++ next)
diagos = filterDiago (phylo ^. phylo_timeCooc) ([pId] ++ next) diagos = filterDiago (phylo ^. phylo_timeCooc) ([pId] ++ next)
-- 2) compute the pairs in parallel -- 2) compute the pairs in parallel
pairs = map (\(id,ngrams) -> pairs = map (\(id,ngrams) ->
map (\(id',ngrams') -> map (\(id',ngrams') ->
let nbDocs = (sum . elems) $ filterDocs docs ([idToPrd id, idToPrd id']) let nbDocs = (sum . elems) $ filterDocs docs ([idToPrd id, idToPrd id'])
diago = reduceDiagos $ filterDiago diagos ([idToPrd id, idToPrd id']) diago = reduceDiagos $ filterDiago diagos ([idToPrd id, idToPrd id'])
in ((id,id'),toProximity nbDocs diago proximity ngrams ngrams' ngrams') in ((id,id'),toProximity nbDocs diago proximity ngrams ngrams' ngrams')
) $ filter (\(_,ngrams') -> (not . null) $ intersect ngrams ngrams') targets ) $ filter (\(_,ngrams') -> (not . null) $ intersect ngrams ngrams') targets
) egos ) egos
pairs' = pairs `using` parList rdeepseq pairs' = pairs `using` parList rdeepseq
in acc ++ (concat pairs') in acc ++ (concat pairs')
) [] $ phylo ^. phylo_periods ) [] $ phylo ^. phylo_periods
in phylo & phylo_diaSimScan .~ Set.fromList (traceGroupsProxi $ map snd scanning) in phylo & phylo_groupsProxi .~ ((traceGroupsProxi . fromList) groupsProxi)
appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo
...@@ -142,15 +112,15 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co ...@@ -142,15 +112,15 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
let pId = phyloLvl ^. phylo_scalePeriod let pId = phyloLvl ^. phylo_scalePeriod
pId' = phyloLvl ^. phylo_scalePeriodStr pId' = phyloLvl ^. phylo_scalePeriodStr
phyloCUnit = m ! pId phyloCUnit = m ! pId
in phyloLvl in phyloLvl
& phylo_scaleGroups .~ (fromList $ foldl (\groups obj -> & phylo_scaleGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups) groups ++ [ (((pId,lvl),length groups)
, f obj pId pId' lvl (length groups) , f obj pId pId' lvl (length groups)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId])) (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
] ) [] phyloCUnit) ] ) [] phyloCUnit)
else else
phyloLvl ) phyloLvl )
phylo phylo
clusterToGroup :: Clustering -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> PhyloGroup clusterToGroup :: Clustering -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> PhyloGroup
...@@ -164,29 +134,24 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx "" ...@@ -164,29 +134,24 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
(fromList [("breaks",[0]),("seaLevels",[0])]) (fromList [("breaks",[0]),("seaLevels",[0])])
[] [] [] [] [] [] [] [] [] [] [] [] [] []
{-
-- enhance the phylo with temporal links
-}
addTemporalLinksToPhylo :: Phylo -> Phylo addTemporalLinksToPhylo :: Phylo -> Phylo
addTemporalLinksToPhylo phylowithoutLink = case strategy of addTemporalLinksToPhylo phylowithoutLink = case (getSeaElevation phylowithoutLink) of
Constante start gap -> temporalMatching (constDiachronicLadder start gap Set.empty) phylowithoutLink Constante start gap -> constanteTemporalMatching start gap phylowithoutLink
Adaptative steps -> temporalMatching (adaptDiachronicLadder steps (phylowithoutLink ^. phylo_diaSimScan) Set.empty) phylowithoutLink Adaptative steps -> adaptativeTemporalMatching steps phylowithoutLink
where
strategy :: SeaElevation
strategy = getSeaElevation phylowithoutLink
----------------------- -----------------------
-- | To Phylo Step | -- -- | To Phylo Step | --
----------------------- -----------------------
indexDates' :: Map (Date,Date) [Document] -> Map (Date,Date) (Text,Text) indexDates' :: Map (Date,Date) [Document] -> Map (Date,Date) (Text,Text)
indexDates' m = map (\docs -> indexDates' m = map (\docs ->
let ds = map (\d -> date' d) docs let ds = map (\d -> date' d) docs
f = if (null ds) f = if (null ds)
then "" then ""
else toFstDate ds else toFstDate ds
l = if (null ds) l = if (null ds)
then "" then ""
else toLstDate ds else toLstDate ds
in (f,l)) m in (f,l)) m
...@@ -196,9 +161,9 @@ indexDates' m = map (\docs -> ...@@ -196,9 +161,9 @@ indexDates' m = map (\docs ->
-- QL: backend entre phyloBase et Clustering -- QL: backend entre phyloBase et Clustering
-- tophylowithoutLink -- tophylowithoutLink
toPhyloWithoutLink :: [Document] -> TermList -> PhyloConfig -> Phylo toPhyloWithoutLink :: [Document] -> TermList -> PhyloConfig -> Phylo
toPhyloWithoutLink docs lst conf = case (getSeaElevation phyloBase) of toPhyloWithoutLink docs lst conf = case (getSeaElevation phyloBase) of
Constante _ _ -> appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase) Constante _ _ -> appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
Adaptative _ -> scanSimilarity 1 Adaptative _ -> toGroupsProxi 1
$ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase) $ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
where where
-------------------------------------- --------------------------------------
...@@ -237,23 +202,23 @@ filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. clustering_roots) >= ...@@ -237,23 +202,23 @@ filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. clustering_roots) >=
-- To filter nested Fis -- To filter nested Fis
filterCliqueByNested :: Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering] filterCliqueByNested :: Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
filterCliqueByNested m = filterCliqueByNested m =
let clq = map (\l -> let clq = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (f' ^. clustering_roots) (f ^. clustering_roots)) mem) foldl (\mem f -> if (any (\f' -> isNested (f' ^. clustering_roots) (f ^. clustering_roots)) mem)
then mem then mem
else else
let fMax = filter (\f' -> not $ isNested (f ^. clustering_roots) (f' ^. clustering_roots)) mem let fMax = filter (\f' -> not $ isNested (f ^. clustering_roots) (f' ^. clustering_roots)) mem
in fMax ++ [f] ) [] l) in fMax ++ [f] ) [] l)
$ elems m $ elems m
clq' = clq `using` parList rdeepseq clq' = clq `using` parList rdeepseq
in fromList $ zip (keys m) clq' in fromList $ zip (keys m) clq'
-- | To transform a time map of docs into a time map of Fis with some filters -- | To transform a time map of docs into a time map of Fis with some filters
toSeriesOfClustering :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [Clustering] toSeriesOfClustering :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [Clustering]
toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
Fis s s' -> -- traceFis "Filtered Fis" Fis s s' -> -- traceFis "Filtered Fis"
filterCliqueByNested filterCliqueByNested
{- \$ traceFis "Filtered by clique size" -} {- \$ traceFis "Filtered by clique size" -}
$ filterClique True s' (filterCliqueBySize) $ filterClique True s' (filterCliqueBySize)
{- \$ traceFis "Filtered by support" -} {- \$ traceFis "Filtered by support" -}
...@@ -263,33 +228,33 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of ...@@ -263,33 +228,33 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
MaxClique s _ _ -> filterClique True s (filterCliqueBySize) MaxClique s _ _ -> filterClique True s (filterCliqueBySize)
seriesOfClustering seriesOfClustering
where where
-------------------------------------- --------------------------------------
seriesOfClustering :: Map (Date,Date) [Clustering] seriesOfClustering :: Map (Date,Date) [Clustering]
seriesOfClustering = case (clique $ getConfig phylo) of seriesOfClustering = case (clique $ getConfig phylo) of
Fis _ _ -> Fis _ _ ->
let fis = map (\(prd,docs) -> let fis = map (\(prd,docs) ->
case (corpusParser $ getConfig phylo) of case (corpusParser $ getConfig phylo) of
Csv' _ -> let lst = toList Csv' _ -> let lst = toList
$ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs) $ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs)
in (prd, map (\f -> Clustering (Set.toList $ fst f) ((fst . snd) f) prd ((fst . snd . snd) f) (((snd . snd . snd) f))) lst) in (prd, map (\f -> Clustering (Set.toList $ fst f) ((fst . snd) f) prd ((fst . snd . snd) f) (((snd . snd . snd) f))) lst)
_ -> let lst = toList _ -> let lst = toList
$ fisWithSizePolyMap (Segment 1 20) 1 (map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs) $ fisWithSizePolyMap (Segment 1 20) 1 (map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs)
in (prd, map (\f -> Clustering (Set.toList $ fst f) (snd f) prd (Just $ fromIntegral $ snd f) []) lst) in (prd, map (\f -> Clustering (Set.toList $ fst f) (snd f) prd (Just $ fromIntegral $ snd f) []) lst)
) )
$ toList phyloDocs $ toList phyloDocs
fis' = fis `using` parList rdeepseq fis' = fis `using` parList rdeepseq
in fromList fis' in fromList fis'
MaxClique _ thr filterType -> MaxClique _ thr filterType ->
let mcl = map (\(prd,docs) -> let mcl = map (\(prd,docs) ->
let cooc = map round let cooc = map round
$ foldl sumCooc empty $ foldl sumCooc empty
$ map listToMatrix $ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs $ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> Clustering cl 0 prd Nothing []) $ getMaxCliques filterType Conditional thr cooc)) in (prd, map (\cl -> Clustering cl 0 prd Nothing []) $ getMaxCliques filterType Conditional thr cooc))
$ toList phyloDocs $ toList phyloDocs
mcl' = mcl `using` parList rdeepseq mcl' = mcl `using` parList rdeepseq
in fromList mcl' in fromList mcl'
-------------------------------------- --------------------------------------
-- dev viz graph maxClique getMaxClique -- dev viz graph maxClique getMaxClique
...@@ -299,9 +264,9 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of ...@@ -299,9 +264,9 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
-------------------- --------------------
-- To transform the docs into a time map of coocurency matrix -- To transform the docs into a time map of coocurency matrix
docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc
docsToTimeScaleCooc docs fdt = docsToTimeScaleCooc docs fdt =
let mCooc = fromListWith sumCooc let mCooc = fromListWith sumCooc
$ map (\(_d,l) -> (_d, listToMatrix l)) $ map (\(_d,l) -> (_d, listToMatrix l))
$ map (\doc -> (date doc, sort $ ngramsToIdx (text doc) fdt)) docs $ map (\doc -> (date doc, sort $ ngramsToIdx (text doc) fdt)) docs
...@@ -319,8 +284,8 @@ docsToTimeScaleCooc docs fdt = ...@@ -319,8 +284,8 @@ docsToTimeScaleCooc docs fdt =
groupDocsByPeriodRec :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] -> Map (date, date) [doc] groupDocsByPeriodRec :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] -> Map (date, date) [doc]
groupDocsByPeriodRec f prds docs acc = groupDocsByPeriodRec f prds docs acc =
if ((null prds) || (null docs)) if ((null prds) || (null docs))
then acc then acc
else else
let prd = head' "groupBy" prds let prd = head' "groupBy" prds
docs' = partition (\d -> (f d >= fst prd) && (f d <= snd prd)) docs docs' = partition (\d -> (f d >= fst prd) && (f d <= snd prd)) docs
in groupDocsByPeriodRec f (tail prds) (snd docs') (insert prd (fst docs') acc) in groupDocsByPeriodRec f (tail prds) (snd docs') (insert prd (fst docs') acc)
...@@ -332,7 +297,7 @@ groupDocsByPeriod' f pds docs = ...@@ -332,7 +297,7 @@ groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
periods = map (inPeriode f docs') pds periods = map (inPeriode f docs') pds
periods' = periods `using` parList rdeepseq periods' = periods `using` parList rdeepseq
in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length pds) <> " periods" <> "\n") in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length pds) <> " periods" <> "\n")
$ fromList $ zip pds periods' $ fromList $ zip pds periods'
where where
-------------------------------------- --------------------------------------
...@@ -349,14 +314,14 @@ groupDocsByPeriod f pds es = ...@@ -349,14 +314,14 @@ groupDocsByPeriod f pds es =
let periods = map (inPeriode f es) pds let periods = map (inPeriode f es) pds
periods' = periods `using` parList rdeepseq periods' = periods `using` parList rdeepseq
in trace ("\n" <> "-- | Group " <> show(length es) <> " docs by " <> show(length pds) <> " periods" <> "\n") in trace ("\n" <> "-- | Group " <> show(length es) <> " docs by " <> show(length pds) <> " periods" <> "\n")
$ fromList $ zip pds periods' $ fromList $ zip pds periods'
where where
-------------------------------------- --------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t] inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) = inPeriode f' h (start,end) =
fst $ partition (\d -> f' d >= start && f' d <= end) h fst $ partition (\d -> f' d >= start && f' d <= end) h
-------------------------------------- --------------------------------------
docsToTermFreq :: [Document] -> Vector Ngrams -> Map Int Double docsToTermFreq :: [Document] -> Vector Ngrams -> Map Int Double
...@@ -364,34 +329,34 @@ docsToTermFreq docs fdt = ...@@ -364,34 +329,34 @@ docsToTermFreq docs fdt =
let nbDocs = fromIntegral $ length docs let nbDocs = fromIntegral $ length docs
freqs = map (/(nbDocs)) freqs = map (/(nbDocs))
$ fromList $ fromList
$ map (\lst -> (head' "docsToTermFreq" lst, fromIntegral $ length lst)) $ map (\lst -> (head' "docsToTermFreq" lst, fromIntegral $ length lst))
$ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) docs $ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) docs
sumFreqs = sum $ elems freqs sumFreqs = sum $ elems freqs
in map (/sumFreqs) freqs in map (/sumFreqs) freqs
docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double
docsToLastTermFreq n docs fdt = docsToLastTermFreq n docs fdt =
let last = take n $ reverse $ sort $ map date docs let last = take n $ reverse $ sort $ map date docs
nbDocs = fromIntegral $ length $ filter (\d -> elem (date d) last) docs nbDocs = fromIntegral $ length $ filter (\d -> elem (date d) last) docs
freqs = map (/(nbDocs)) freqs = map (/(nbDocs))
$ fromList $ fromList
$ map (\lst -> (head' "docsToLastTermFreq" lst, fromIntegral $ length lst)) $ map (\lst -> (head' "docsToLastTermFreq" lst, fromIntegral $ length lst))
$ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) $ filter (\d -> elem (date d) last) docs $ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) $ filter (\d -> elem (date d) last) docs
sumFreqs = sum $ elems freqs sumFreqs = sum $ elems freqs
in map (/sumFreqs) freqs in map (/sumFreqs) freqs
-- To count the number of docs by unit of time -- To count the number of docs by unit of time
docsToTimeScaleNb :: [Document] -> Map Date Double docsToTimeScaleNb :: [Document] -> Map Date Double
docsToTimeScaleNb docs = docsToTimeScaleNb docs =
let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs
time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') 1 time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') 1
in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length time) <> " unit of time" <> "\n") in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length time) <> " unit of time" <> "\n")
$ unionWith (+) time docs' $ unionWith (+) time docs'
initPhyloScales :: Int -> Period -> Map PhyloScaleId PhyloScale initPhyloScales :: Int -> Period -> Map PhyloScaleId PhyloScale
initPhyloScales lvlMax pId = initPhyloScales lvlMax pId =
fromList $ map (\lvl -> ((pId,lvl),PhyloScale pId ("","") lvl empty)) [1..lvlMax] fromList $ map (\lvl -> ((pId,lvl),PhyloScale pId ("","") lvl empty)) [1..lvlMax]
...@@ -399,19 +364,20 @@ initPhyloScales lvlMax pId = ...@@ -399,19 +364,20 @@ initPhyloScales lvlMax pId =
-- Init the basic elements of a Phylo -- Init the basic elements of a Phylo
-- --
initPhylo :: [Document] -> TermList -> PhyloConfig -> Phylo initPhylo :: [Document] -> TermList -> PhyloConfig -> Phylo
initPhylo docs lst conf = initPhylo docs lst conf =
let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst
docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs) docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs)
params = defaultPhyloParam { _phyloParam_config = conf } params = defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf) periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf)
in trace ("\n" <> "-- | Init a phylo out of " <> show(length docs) <> " docs \n") in trace ("\n" <> "-- | Init a phylo out of " <> show(length docs) <> " docs \n")
$ Phylo foundations $ Phylo foundations
docsSources docsSources
(docsToTimeScaleCooc docs (foundations ^. foundations_roots)) (docsToTimeScaleCooc docs (foundations ^. foundations_roots))
(docsToTimeScaleNb docs) (docsToTimeScaleNb docs)
(docsToTermFreq docs (foundations ^. foundations_roots)) (docsToTermFreq docs (foundations ^. foundations_roots))
(docsToLastTermFreq (getTimePeriod $ timeUnit conf) docs (foundations ^. foundations_roots)) (docsToLastTermFreq (getTimePeriod $ timeUnit conf) docs (foundations ^. foundations_roots))
Set.empty empty
empty
params params
(fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloScales 1 prd))) periods) (fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloScales 1 prd))) periods)
0 0
...@@ -14,7 +14,7 @@ module Gargantext.Core.Viz.Phylo.PhyloTools where ...@@ -14,7 +14,7 @@ module Gargantext.Core.Viz.Phylo.PhyloTools where
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group, notElem) import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group, notElem)
import Data.Map.Strict (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys) import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
import Data.Set (Set, disjoint) import Data.Set (Set, disjoint)
import Data.String (String) import Data.String (String)
import Data.Text (Text,unpack) import Data.Text (Text,unpack)
...@@ -25,6 +25,7 @@ import Gargantext.Prelude ...@@ -25,6 +25,7 @@ import Gargantext.Prelude
import Prelude (floor,read) import Prelude (floor,read)
import Text.Printf import Text.Printf
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
...@@ -139,7 +140,6 @@ periodsToYears periods = (Set.fromList . sort . concat) ...@@ -139,7 +140,6 @@ periodsToYears periods = (Set.fromList . sort . concat)
findBounds :: [Date] -> (Date,Date) findBounds :: [Date] -> (Date,Date)
findBounds [] = panic "[G.C.V.P.PhyloTools] empty dates for find bounds"
findBounds dates = findBounds dates =
let dates' = sort dates let dates' = sort dates
in (head' "findBounds" dates', last' "findBounds" dates') in (head' "findBounds" dates', last' "findBounds" dates')
...@@ -387,10 +387,10 @@ getLevelParentId :: PhyloGroup -> PhyloGroupId ...@@ -387,10 +387,10 @@ getLevelParentId :: PhyloGroup -> PhyloGroupId
getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupScaleParents getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupScaleParents
getLastLevel :: Phylo -> Scale getLastLevel :: Phylo -> Scale
getLastLevel phylo = last' "lastLevel" $ getScales phylo getLastLevel phylo = last' "lastLevel" $ getLevels phylo
getScales :: Phylo -> [Scale] getLevels :: Phylo -> [Scale]
getScales phylo = nub getLevels phylo = nub
$ map snd $ map snd
$ keys $ view ( phylo_periods $ keys $ view ( phylo_periods
. traverse . traverse
...@@ -408,7 +408,7 @@ getPhyloSeaRiseStart phylo = case (getSeaElevation phylo) of ...@@ -408,7 +408,7 @@ getPhyloSeaRiseStart phylo = case (getSeaElevation phylo) of
getPhyloSeaRiseSteps :: Phylo -> Double getPhyloSeaRiseSteps :: Phylo -> Double
getPhyloSeaRiseSteps phylo = case (getSeaElevation phylo) of getPhyloSeaRiseSteps phylo = case (getSeaElevation phylo) of
Constante _ s -> s Constante _ s -> s
Adaptative s -> s Adaptative s -> s
getConfig :: Phylo -> PhyloConfig getConfig :: Phylo -> PhyloConfig
...@@ -431,16 +431,14 @@ getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots ...@@ -431,16 +431,14 @@ getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
getSources :: Phylo -> Vector Text getSources :: Phylo -> Vector Text
getSources phylo = _sources (phylo ^. phylo_sources) getSources phylo = _sources (phylo ^. phylo_sources)
phyloToLastBranches :: Phylo -> [[PhyloGroup]]
-- get the groups distributed by branches at the last scale phyloToLastBranches phylo = elems
phyloLastScale :: Phylo -> [[PhyloGroup]]
phyloLastScale phylo = elems
$ fromListWith (++) $ fromListWith (++)
$ map (\g -> (g ^. phylo_groupBranchId, [g])) $ map (\g -> (g ^. phylo_groupBranchId, [g]))
$ getGroupsFromScale (last' "byBranches" $ getScales phylo) phylo $ getGroupsFromLevel (last' "byBranches" $ getLevels phylo) phylo
getGroupsFromScale :: Scale -> Phylo -> [PhyloGroup] getGroupsFromLevel :: Scale -> Phylo -> [PhyloGroup]
getGroupsFromScale lvl phylo = getGroupsFromLevel lvl phylo =
elems $ view ( phylo_periods elems $ view ( phylo_periods
. traverse . traverse
. phylo_periodScales . phylo_periodScales
...@@ -449,8 +447,8 @@ getGroupsFromScale lvl phylo = ...@@ -449,8 +447,8 @@ getGroupsFromScale lvl phylo =
. phylo_scaleGroups ) phylo . phylo_scaleGroups ) phylo
getGroupsFromScalePeriods :: Scale -> [Period] -> Phylo -> [PhyloGroup] getGroupsFromLevelPeriods :: Scale -> [Period] -> Phylo -> [PhyloGroup]
getGroupsFromScalePeriods lvl periods phylo = getGroupsFromLevelPeriods lvl periods phylo =
elems $ view ( phylo_periods elems $ view ( phylo_periods
. traverse . traverse
. filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods) . filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
...@@ -496,14 +494,14 @@ updatePeriods periods' phylo = ...@@ -496,14 +494,14 @@ updatePeriods periods' phylo =
) phylo ) phylo
updateQuality :: Double -> Phylo -> Phylo updateQuality :: Double -> Phylo -> Phylo
updateQuality quality phylo = phylo { _phylo_quality = quality } updateQuality quality phylo = phylo { _phylo_quality = quality }
traceToPhylo :: Scale -> Phylo -> Phylo traceToPhylo :: Scale -> Phylo -> Phylo
traceToPhylo lvl phylo = traceToPhylo lvl phylo =
trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with " trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with "
<> show (length $ getGroupsFromScale lvl phylo) <> " groups and " <> show (length $ getGroupsFromLevel lvl phylo) <> " groups and "
<> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale lvl phylo) <> " branches" <> "\n") phylo <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel lvl phylo) <> " branches" <> "\n") phylo
-------------------- --------------------
-- | Clustering | -- -- | Clustering | --
...@@ -566,15 +564,15 @@ toRelatedComponents nodes edges = ...@@ -566,15 +564,15 @@ toRelatedComponents nodes edges =
traceSynchronyEnd :: Phylo -> Phylo traceSynchronyEnd :: Phylo -> Phylo
traceSynchronyEnd phylo = traceSynchronyEnd phylo =
trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo) trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups" <> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches" <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo <> "\n" ) phylo
traceSynchronyStart :: Phylo -> Phylo traceSynchronyStart :: Phylo -> Phylo
traceSynchronyStart phylo = traceSynchronyStart phylo =
trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo) trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups" <> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches" <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo <> "\n" ) phylo
...@@ -592,7 +590,7 @@ getMinSharedNgrams :: Proximity -> Int ...@@ -592,7 +590,7 @@ getMinSharedNgrams :: Proximity -> Int
getMinSharedNgrams proxi = case proxi of getMinSharedNgrams proxi = case proxi of
WeightedLogJaccard _ m -> m WeightedLogJaccard _ m -> m
WeightedLogSim _ m -> m WeightedLogSim _ m -> m
Hamming _ _ -> undefined Hamming _ _ -> undefined
---------------- ----------------
-- | Branch | -- -- | Branch | --
...@@ -661,6 +659,6 @@ traceTemporalMatching groups = ...@@ -661,6 +659,6 @@ traceTemporalMatching groups =
trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups
traceGroupsProxi :: [Double] -> [Double] traceGroupsProxi :: Map (PhyloGroupId,PhyloGroupId) Double -> Map (PhyloGroupId,PhyloGroupId) Double
traceGroupsProxi l = traceGroupsProxi m =
trace ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups proximity" <> "\n") l trace ( "\n" <> "-- | " <> show(Map.size m) <> " computed pairs of groups proximity" <> "\n") m
...@@ -16,13 +16,13 @@ import Control.Lens hiding (Level) ...@@ -16,13 +16,13 @@ import Control.Lens hiding (Level)
import Control.Monad (sequence) import Control.Monad (sequence)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn, groupBy) import Data.List ((++), null, intersect, nub, concat, sort, sortOn, groupBy)
import Data.Map.Strict (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member) import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
import Gargantext.Core.Viz.Phylo import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (processDynamics) import Gargantext.Core.Viz.Phylo.PhyloExport (processDynamics)
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos) import Gargantext.Core.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Map.Strict as Map import qualified Data.Map as Map
------------------------- -------------------------
...@@ -30,16 +30,16 @@ import qualified Data.Map.Strict as Map ...@@ -30,16 +30,16 @@ import qualified Data.Map.Strict as Map
------------------------- -------------------------
mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
mergeGroups coocs id mapIds childs = mergeGroups coocs id mapIds childs =
let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs
in PhyloGroup (fst $ fst id) (_phylo_groupPeriod' $ head' "mergeGroups" childs) in PhyloGroup (fst $ fst id) (_phylo_groupPeriod' $ head' "mergeGroups" childs)
(snd $ fst id) (snd id) "" (snd $ fst id) (snd id) ""
(sum $ map _phylo_groupSupport childs) (sum $ map _phylo_groupSupport childs)
(fmap sum $ sequence (fmap sum $ sequence
$ map _phylo_groupWeight childs) $ map _phylo_groupWeight childs)
(concat $ map _phylo_groupSources childs) (concat $ map _phylo_groupSources childs)
ngrams ngrams
(ngramsToCooc ngrams coocs) (ngramsToCooc ngrams coocs)
((snd $ fst id),bId) ((snd $ fst id),bId)
(mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs) (mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs)
(updatePointers $ concat $ map _phylo_groupPeriodParents childs) (updatePointers $ concat $ map _phylo_groupPeriodParents childs)
...@@ -61,10 +61,10 @@ mergeGroups coocs id mapIds childs = ...@@ -61,10 +61,10 @@ mergeGroups coocs id mapIds childs =
mergeAncestors pointers = Map.toList $ fromListWith max pointers mergeAncestors pointers = Map.toList $ fromListWith max pointers
addPhyloScale :: Scale -> Phylo -> Phylo addPhyloScale :: Scale -> Phylo -> Phylo
addPhyloScale lvl phylo = addPhyloScale lvl phylo =
over ( phylo_periods . traverse ) over ( phylo_periods . traverse )
(\phyloPrd -> phyloPrd & phylo_periodScales (\phyloPrd -> phyloPrd & phylo_periodScales
%~ (insert (phyloPrd ^. phylo_periodPeriod, lvl) %~ (insert (phyloPrd ^. phylo_periodPeriod, lvl)
(PhyloScale (phyloPrd ^. phylo_periodPeriod) (phyloPrd ^. phylo_periodPeriodStr) lvl empty))) phylo (PhyloScale (phyloPrd ^. phylo_periodPeriod) (phyloPrd ^. phylo_periodPeriodStr) lvl empty))) phylo
...@@ -82,12 +82,12 @@ toNextScale phylo groups = ...@@ -82,12 +82,12 @@ toNextScale phylo groups =
$ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups $ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups
newPeriods = fromListWith (++) $ map (\g -> (g ^. phylo_groupPeriod, [g])) newGroups newPeriods = fromListWith (++) $ map (\g -> (g ^. phylo_groupPeriod, [g])) newGroups
in traceSynchronyEnd in traceSynchronyEnd
$ over ( phylo_periods . traverse . phylo_periodScales . traverse $ over ( phylo_periods . traverse . phylo_periodScales . traverse
-- 6) update each period at curLvl + 1 -- 6) update each period at curLvl + 1
. filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == (curLvl + 1))) . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == (curLvl + 1)))
-- 7) by adding the parents -- 7) by adding the parents
(\phyloLvl -> (\phyloLvl ->
if member (phyloLvl ^. phylo_scalePeriod) newPeriods if member (phyloLvl ^. phylo_scalePeriod) newPeriods
then phyloLvl & phylo_scaleGroups then phyloLvl & phylo_scaleGroups
.~ fromList (map (\g -> (getGroupId g, g)) $ newPeriods ! (phyloLvl ^. phylo_scalePeriod)) .~ fromList (map (\g -> (getGroupId g, g)) $ newPeriods ! (phyloLvl ^. phylo_scalePeriod))
...@@ -95,18 +95,18 @@ toNextScale phylo groups = ...@@ -95,18 +95,18 @@ toNextScale phylo groups =
-- 2) add the curLvl + 1 PhyloScale to the phylo -- 2) add the curLvl + 1 PhyloScale to the phylo
$ addPhyloScale (curLvl + 1) $ addPhyloScale (curLvl + 1)
-- 1) update the current groups (with level parent pointers) in the phylo -- 1) update the current groups (with level parent pointers) in the phylo
$ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo $ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo
-------------------- --------------------
-- | Clustering | -- -- | Clustering | --
-------------------- --------------------
toPairs :: SynchronyStrategy -> [PhyloGroup] -> [(PhyloGroup,PhyloGroup)] toPairs :: SynchronyStrategy -> [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
toPairs strategy groups = case strategy of toPairs strategy groups = case strategy of
MergeRegularGroups -> pairs MergeRegularGroups -> pairs
$ filter (\g -> all (== 3) $ (g ^. phylo_groupMeta) ! "dynamics") groups $ filter (\g -> all (== 3) $ (g ^. phylo_groupMeta) ! "dynamics") groups
MergeAllGroups -> pairs groups MergeAllGroups -> pairs groups
where where
pairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)] pairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
pairs gs = filter (\(g,g') -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)) (listToCombi' gs) pairs gs = filter (\(g,g') -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)) (listToCombi' gs)
...@@ -116,7 +116,7 @@ toDiamonds groups = foldl' (\acc groups' -> ...@@ -116,7 +116,7 @@ toDiamonds groups = foldl' (\acc groups' ->
acc ++ ( elems acc ++ ( elems
$ Map.filter (\v -> length v > 1) $ Map.filter (\v -> length v > 1)
$ fromListWith (++) $ fromListWith (++)
$ foldl' (\acc' g -> $ foldl' (\acc' g ->
acc' ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodChilds)) [] groups')) [] acc' ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodChilds)) [] groups')) []
$ elems $ elems
$ Map.filter (\v -> length v > 1) $ Map.filter (\v -> length v > 1)
...@@ -130,27 +130,27 @@ groupsToEdges prox sync nbDocs diago groups = ...@@ -130,27 +130,27 @@ groupsToEdges prox sync nbDocs diago groups =
ByProximityThreshold thr sens _ strat -> ByProximityThreshold thr sens _ strat ->
filter (\(_,w) -> w >= thr) filter (\(_,w) -> w >= thr)
$ toEdges sens $ toEdges sens
$ toPairs strat groups $ toPairs strat groups
ByProximityDistribution sens strat -> ByProximityDistribution sens strat ->
let diamonds = sortOn snd let diamonds = sortOn snd
$ toEdges sens $ concat $ toEdges sens $ concat
$ map (\gs -> toPairs strat gs) $ toDiamonds groups $ map (\gs -> toPairs strat gs) $ toDiamonds groups
in take (div (length diamonds) 2) diamonds in take (div (length diamonds) 2) diamonds
where where
toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)] toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)]
toEdges sens edges = toEdges sens edges =
case prox of case prox of
WeightedLogJaccard _ _ -> map (\(g,g') -> WeightedLogJaccard _ _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard' (sens) nbDocs diago ((g,g'), weightedLogJaccard' (sens) nbDocs diago
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
WeightedLogSim _ _ -> map (\(g,g') -> WeightedLogSim _ _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard' (1 / sens) nbDocs diago ((g,g'), weightedLogJaccard' (1 / sens) nbDocs diago
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
_ -> undefined _ -> undefined
toParentId :: PhyloGroup -> PhyloGroupId toParentId :: PhyloGroup -> PhyloGroupId
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupScale + 1), child ^. phylo_groupIndex) toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupScale + 1), child ^. phylo_groupIndex)
reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup] reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
...@@ -159,25 +159,24 @@ reduceGroups prox sync docs diagos branch = ...@@ -159,25 +159,24 @@ reduceGroups prox sync docs diagos branch =
let periods = fromListWith (++) let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch $ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems) in (concat . concat . elems)
-- TODO : ajouter un parallelisme $ mapWithKey (\prd groups ->
$ mapWithKey (\prd groups ->
-- 2) for each period, transform the groups as a proximity graph filtered by a threshold -- 2) for each period, transform the groups as a proximity graph filtered by a threshold
let diago = reduceDiagos $ filterDiago diagos [prd] let diago = reduceDiagos $ filterDiago diagos [prd]
edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) diago groups edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) diago groups
in map (\comp -> in map (\comp ->
-- 4) add to each groups their futur level parent group -- 4) add to each groups their futur level parent group
let parentId = toParentId (head' "parentId" comp) let parentId = toParentId (head' "parentId" comp)
in map (\g -> g & phylo_groupScaleParents %~ (++ [(parentId,1)]) ) comp ) in map (\g -> g & phylo_groupScaleParents %~ (++ [(parentId,1)]) ) comp )
-- 3) reduce the graph a a set of related components -- 3) reduce the graph a a set of related components
$ toRelatedComponents groups edges) periods $ toRelatedComponents groups edges) periods
chooseClusteringStrategy :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]] adjustClustering :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]]
chooseClusteringStrategy sync branches = case sync of adjustClustering sync branches = case sync of
ByProximityThreshold _ _ scope _ -> case scope of ByProximityThreshold _ _ scope _ -> case scope of
SingleBranch -> branches SingleBranch -> branches
SiblingBranches -> groupBy (\g g' -> (last' "chooseClusteringStrategy" $ (g ^. phylo_groupMeta) ! "breaks") SiblingBranches -> groupBy (\g g' -> (last' "adjustClustering" $ (g ^. phylo_groupMeta) ! "breaks")
== (last' "chooseClusteringStrategy" $ (g' ^. phylo_groupMeta) ! "breaks")) == (last' "adjustClustering" $ (g' ^. phylo_groupMeta) ! "breaks"))
$ sortOn _phylo_groupBranchId $ concat branches $ sortOn _phylo_groupBranchId $ concat branches
AllBranches -> [concat branches] AllBranches -> [concat branches]
ByProximityDistribution _ _ -> branches ByProximityDistribution _ _ -> branches
...@@ -186,12 +185,12 @@ chooseClusteringStrategy sync branches = case sync of ...@@ -186,12 +185,12 @@ chooseClusteringStrategy sync branches = case sync of
levelUpAncestors :: [PhyloGroup] -> [PhyloGroup] levelUpAncestors :: [PhyloGroup] -> [PhyloGroup]
levelUpAncestors groups = levelUpAncestors groups =
-- 1) create an associative map of (old,new) ids -- 1) create an associative map of (old,new) ids
let ids' = fromList $ map (\g -> (getGroupId g, fst $ head' "levelUpAncestors" ( g ^. phylo_groupScaleParents))) groups let ids' = fromList $ map (\g -> (getGroupId g, fst $ head' "levelUpAncestors" ( g ^. phylo_groupScaleParents))) groups
in map (\g -> in map (\g ->
let id' = ids' ! (getGroupId g) let id' = ids' ! (getGroupId g)
ancestors = g ^. phylo_groupAncestors ancestors = g ^. phylo_groupAncestors
-- 2) level up the ancestors ids and filter the ones that will be merged -- 2) level up the ancestors ids and filter the ones that will be merged
ancestors' = filter (\(id,_) -> id /= id') $ map (\(id,w) -> (ids' ! id,w)) ancestors ancestors' = filter (\(id,_) -> id /= id') $ map (\(id,w) -> (ids' ! id,w)) ancestors
in g & phylo_groupAncestors .~ ancestors' in g & phylo_groupAncestors .~ ancestors'
) groups ) groups
...@@ -201,30 +200,30 @@ synchronicClustering phylo = ...@@ -201,30 +200,30 @@ synchronicClustering phylo =
sync = phyloSynchrony $ getConfig phylo sync = phyloSynchrony $ getConfig phylo
docs = phylo ^. phylo_timeDocs docs = phylo ^. phylo_timeDocs
diagos = map coocToDiago $ phylo ^. phylo_timeCooc diagos = map coocToDiago $ phylo ^. phylo_timeCooc
newBranches = map (\branch -> reduceGroups prox sync docs diagos branch) newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
$ map processDynamics $ map processDynamics
$ chooseClusteringStrategy sync $ adjustClustering sync
$ phyloLastScale $ phyloToLastBranches
$ traceSynchronyStart phylo $ traceSynchronyStart phylo
newBranches' = newBranches `using` parList rdeepseq newBranches' = newBranches `using` parList rdeepseq
in toNextScale phylo $ levelUpAncestors $ concat newBranches' in toNextScale phylo $ levelUpAncestors $ concat newBranches'
-- synchronicSimilarity :: Phylo -> Level -> String -- synchronicDistance :: Phylo -> Level -> String
-- synchronicSimilarity phylo lvl = -- synchronicDistance phylo lvl =
-- foldl' (\acc branch -> -- foldl' (\acc branch ->
-- acc <> (foldl' (\acc' period -> -- acc <> (foldl' (\acc' period ->
-- acc' <> let prox = phyloProximity $ getConfig phylo -- acc' <> let prox = phyloProximity $ getConfig phylo
-- sync = phyloSynchrony $ getConfig phylo -- sync = phyloSynchrony $ getConfig phylo
-- docs = _phylo_timeDocs phylo -- docs = _phylo_timeDocs phylo
-- prd = _phylo_groupPeriod $ head' "distance" period -- prd = _phylo_groupPeriod $ head' "distance" period
-- edges = groupsToEdges prox 0.1 (_bpt_sensibility sync) -- edges = groupsToEdges prox 0.1 (_bpt_sensibility sync)
-- ((sum . elems) $ restrictKeys docs $ periodsToYears [_phylo_groupPeriod $ head' "distance" period]) period -- ((sum . elems) $ restrictKeys docs $ periodsToYears [_phylo_groupPeriod $ head' "distance" period]) period
-- in foldl' (\mem (_,w) -> -- in foldl' (\mem (_,w) ->
-- mem <> show (prd) -- mem <> show (prd)
-- <> "\t" -- <> "\t"
-- <> show (w) -- <> show (w)
-- <> "\n" -- <> "\n"
-- ) "" edges -- ) "" edges
-- ) "" $ elems $ groupByField _phylo_groupPeriod branch) -- ) "" $ elems $ groupByField _phylo_groupPeriod branch)
-- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo -- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo
...@@ -6,62 +6,47 @@ License : AGPL + CECILL v3 ...@@ -6,62 +6,47 @@ License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
Reference : Chavalarias, D., Lobbé, Q. & Delanoë, A. Draw me Science. Scientometrics 127, 545–575 (2022). https://doi.org/10.1007/s11192-021-04186-5
-} -}
module Gargantext.Core.Viz.Phylo.TemporalMatching where module Gargantext.Core.Viz.Phylo.TemporalMatching where
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.Ord import Data.Ord
import Data.List (concat, splitAt, tail, sortOn, sortBy, (++), intersect, null, inits, groupBy, scanl, nub, nubBy, union, dropWhile, partition, or) import Data.List (concat, splitAt, tail, sortOn, sortBy, (++), intersect, null, inits, groupBy, scanl, nub, nubBy, union, dropWhile, partition, or, sort, (!!))
import Data.Map.Strict (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), empty, mapKeys, adjust) import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust)
import Debug.Trace (trace) import Debug.Trace (trace)
import Gargantext.Core.Viz.Phylo import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Prelude import Gargantext.Prelude
import Prelude (tan,pi) import Prelude (floor,tan,pi)
import Text.Printf import Text.Printf
import qualified Data.Map.Strict as Map import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
type Branch = [PhyloGroup]
type FinalQuality = Double
type LocalQuality = Double
type ShouldTry = Bool
---------------------------- -------------------
-- | Similarity Measure | -- -- | Proximity | --
---------------------------- -------------------
{- -- | To compute a jaccard similarity between two lists
-- compute a jaccard similarity between two lists
-}
jaccard :: [Int] -> [Int] -> Double jaccard :: [Int] -> [Int] -> Double
jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union') jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
{- -- | Process the inverse sumLog
-- process the inverse sumLog
-}
sumInvLog' :: Double -> Double -> [Double] -> Double sumInvLog' :: Double -> Double -> [Double] -> Double
sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + 1/ tan (s * pi / 2)) / log (nb + 1/ tan (s * pi / 2))))) 0 diago sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + 1/ tan (s * pi / 2)) / log (nb + 1/ tan (s * pi / 2))))) 0 diago
{- -- | Process the sumLog
-- process the sumLog
-}
sumLog' :: Double -> Double -> [Double] -> Double sumLog' :: Double -> Double -> [Double] -> Double
sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + 1/ tan (s * pi / 2)) / log (nb + 1/ tan (s * pi / 2)))) 0 diago sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + 1/ tan (s * pi / 2)) / log (nb + 1/ tan (s * pi / 2)))) 0 diago
{-
-- compute the weightedLogJaccard
-}
weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
weightedLogJaccard' sens nbDocs diago ngrams ngrams' weightedLogJaccard' sens nbDocs diago ngrams ngrams'
| null ngramsInter = 0 | null ngramsInter = 0
...@@ -84,12 +69,8 @@ weightedLogJaccard' sens nbDocs diago ngrams ngrams' ...@@ -84,12 +69,8 @@ weightedLogJaccard' sens nbDocs diago ngrams ngrams'
diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion) diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
-------------------------------------- --------------------------------------
-- | Process the weighted similarity between clusters. Adapted from Wang, X., Cheng, Q., Lu, W., 2014. Analyzing evolution of research topics with NEViewer: a new method based on dynamic co-word networks. Scientometrics 101, 1253–1271. https://doi.org/10.1007/s11192-014-1347-y (log added in the formula + pair comparison)
{-
-- compute the weightedLogSim
-- Adapted from Wang, X., Cheng, Q., Lu, W., 2014. Analyzing evolution of research topics with NEViewer: a new method based on dynamic co-word networks. Scientometrics 101, 1253–1271. https://doi.org/10.1007/s11192-014-1347-y (log added in the formula + pair comparison)
-- tests not conclusive -- tests not conclusive
-}
weightedLogSim' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double weightedLogSim' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
weightedLogSim' sens nbDocs diago ego_ngrams target_ngrams weightedLogSim' sens nbDocs diago ego_ngrams target_ngrams
| null ngramsInter = 0 | null ngramsInter = 0
...@@ -115,11 +96,8 @@ weightedLogSim' sens nbDocs diago ego_ngrams target_ngrams ...@@ -115,11 +96,8 @@ weightedLogSim' sens nbDocs diago ego_ngrams target_ngrams
diagoTarget = elems $ restrictKeys diago (Set.fromList target_ngrams) diagoTarget = elems $ restrictKeys diago (Set.fromList target_ngrams)
-------------------------------------- --------------------------------------
{-
-- perform a seamilarity measure between a given group and a pair of targeted groups
-}
toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
-- | To process the proximity between a current group and a pair of targets group using the adapted Wang et al. Similarity
toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' = toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
case proximity of case proximity of
WeightedLogJaccard sens _ -> WeightedLogJaccard sens _ ->
...@@ -134,11 +112,9 @@ toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' = ...@@ -134,11 +112,9 @@ toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
in weightedLogSim' sens nbDocs diago egoNgrams pairNgrams in weightedLogSim' sens nbDocs diago egoNgrams pairNgrams
Hamming _ _ -> undefined Hamming _ _ -> undefined
------------------------
----------------------------- -- | Local Matching | --
-- | Pointers & Matrices | -- ------------------------
-----------------------------
findLastPeriod :: Filiation -> [Period] -> Period findLastPeriod :: Filiation -> [Period] -> Period
findLastPeriod fil periods = case fil of findLastPeriod fil periods = case fil of
...@@ -147,6 +123,8 @@ findLastPeriod fil periods = case fil of ...@@ -147,6 +123,8 @@ findLastPeriod fil periods = case fil of
ToChildsMemory -> undefined ToChildsMemory -> undefined
ToParentsMemory -> undefined ToParentsMemory -> undefined
-- | To filter pairs of candidates related to old pointers periods
removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> Period removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> Period
-> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))] -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
-> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))] -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
...@@ -166,6 +144,33 @@ removeOldPointers oldPointers fil thr prox prd pairs ...@@ -166,6 +144,33 @@ removeOldPointers oldPointers fil thr prox prd pairs
|| (((fst . fst . fst) id') > (fst lastMatchedPrd))) pairs || (((fst . fst . fst) id') > (fst lastMatchedPrd))) pairs
| otherwise = [] | otherwise = []
makePairs :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [Period] -> [Pointer] -> Filiation -> Double -> Proximity
-> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos =
if (null periods)
then []
else removeOldPointers oldPointers fil thr prox lastPrd
{- at least on of the pair candidates should be from the last added period -}
$ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd))
$ filter (\((id,_),(id',_)) -> (elem id inPairs) || (elem id' inPairs))
$ listToCombi' candidates
where
--------------------------------------
inPairs :: [PhyloGroupId]
inPairs = map fst
$ filter (\(id,ngrams) ->
let nbDocs = (sum . elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id])
diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id])
in (toProximity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr
) candidates
--------------------------------------
lastPrd :: Period
lastPrd = findLastPeriod fil periods
--------------------------------------
filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer] filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer]
filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts
...@@ -191,76 +196,10 @@ filterPointersByPeriod fil pts = ...@@ -191,76 +196,10 @@ filterPointersByPeriod fil pts =
ToChildsMemory -> undefined ToChildsMemory -> undefined
ToParentsMemory -> undefined ToParentsMemory -> undefined
filterDocs :: Map Date Double -> [Period] -> Map Date Double
filterDocs d pds = restrictKeys d $ periodsToYears pds
filterDiago :: Map Date Cooc -> [Period] -> Map Date Cooc phyloGroupMatching' :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
filterDiago diago pds = restrictKeys diago $ periodsToYears pds
---------------------------------
-- | Inter-temporal matching | --
---------------------------------
{-
-- perform the related component algorithm, construct the resulting branch id and update the corresponding group's branch id
-}
groupsToBranches :: Map PhyloGroupId PhyloGroup -> [Branch]
groupsToBranches groups =
{- run the related component algorithm -}
let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
$ sortOn (\gs -> fst $ fst $ head' "egos" gs)
$ map (\group -> [getGroupId group]
++ (map fst $ group ^. phylo_groupPeriodParents)
++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
-- first find the related components by inside each ego's period
-- a supprimer
graph' = map relatedComponents egos
-- then run it for the all the periods
branches = zip [1..]
$ relatedComponents $ concat (graph' `using` parList rdeepseq)
-- update each group's branch id
in map (\(bId,branch) ->
let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
$ elems $ restrictKeys groups (Set.fromList branch)
in groups' `using` parList rdeepseq
) branches `using` parList rdeepseq
{-
-- find the best pair/singleton of parents/childs for a given group
-}
makePairs :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [Period] -> [Pointer] -> Filiation -> Double -> Proximity
-> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos =
if (null periods)
then []
else removeOldPointers oldPointers fil thr prox lastPrd
{- at least on of the pair candidates should be from the last added period -}
$ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd))
$ filter (\((id,_),(id',_)) -> (elem id inPairs) || (elem id' inPairs))
$ listToCombi' candidates
where
--------------------------------------
inPairs :: [PhyloGroupId]
inPairs = map fst
$ filter (\(id,ngrams) ->
let nbDocs = (sum . elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id])
diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id])
in (toProximity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr
) candidates
--------------------------------------
lastPrd :: Period
lastPrd = findLastPeriod fil periods
--------------------------------------
{-
-- find the best temporal links between a given group and its parents/childs
-}
phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
-> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer] -> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ngrams) = phyloGroupMatching' candidates filiation proxi docs diagos thr oldPointers (id,ngrams) =
if (null $ filterPointers proxi thr oldPointers) if (null $ filterPointers proxi thr oldPointers)
-- if no previous pointers satisfy the current threshold then let's find new pointers -- if no previous pointers satisfy the current threshold then let's find new pointers
then if null nextPointers then if null nextPointers
...@@ -275,7 +214,7 @@ phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ng ...@@ -275,7 +214,7 @@ phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ng
where where
nextPointers :: [[(Pointer,[Int])]] nextPointers :: [[(Pointer,[Int])]]
nextPointers = take 1 nextPointers = take 1
-- stop as soon as we find a time frame where at least one singleton / pair satisfies the threshold -- stop as soon as we find a time frame where at least one singleton / pair satisfies the threshold
$ dropWhile (null) $ dropWhile (null)
-- for each time frame, process the proximity on relevant pairs of targeted groups -- for each time frame, process the proximity on relevant pairs of targeted groups
$ scanl (\acc targets -> $ scanl (\acc targets ->
...@@ -286,11 +225,11 @@ phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ng ...@@ -286,11 +225,11 @@ phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ng
$ filterDiago diagos ([(fst . fst) id] ++ periods) $ filterDiago diagos ([(fst . fst) id] ++ periods)
singletons = processProximity nbdocs diago $ map (\g -> (g,g)) $ filter (\g -> (fst . fst . fst) g == lastPrd) targets singletons = processProximity nbdocs diago $ map (\g -> (g,g)) $ filter (\g -> (fst . fst . fst) g == lastPrd) targets
pairs = makePairs (id,ngrams) targets periods oldPointers filiation thr proxi docs diagos pairs = makePairs (id,ngrams) targets periods oldPointers filiation thr proxi docs diagos
in in
if (null singletons) if (null singletons)
then acc ++ ( processProximity nbdocs diago pairs ) then acc ++ ( processProximity nbdocs diago pairs )
else acc ++ singletons else acc ++ singletons
) [] $ map concat $ inits candidates -- groups from [[1900],[1900,1901],[1900,1901,1902],...] ) [] $ map concat $ inits candidates -- groups from [[1900],[1900,1901],[1900,1901,1902],...]
----------------------------- -----------------------------
processProximity :: Double -> Map Int Double -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))] -> [(Pointer,[Int])] processProximity :: Double -> Map Int Double -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))] -> [(Pointer,[Int])]
processProximity nbdocs diago targets = filterPointers' proxi thr processProximity nbdocs diago targets = filterPointers' proxi thr
...@@ -299,12 +238,58 @@ phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ng ...@@ -299,12 +238,58 @@ phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ng
let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c') let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c')
in if ((c == c') || (snd c == snd c')) in if ((c == c') || (snd c == snd c'))
then [((fst c,proximity),snd c)] then [((fst c,proximity),snd c)]
else [((fst c,proximity),snd c),((fst c',proximity),snd c')] ) targets else [((fst c,proximity),snd c),((fst c',proximity),snd c')] ) targets
phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
-> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ngrams) =
if (null $ filterPointers proxi thr oldPointers)
{- let's find new pointers -}
then if null nextPointers
then []
else filterPointersByPeriod filiation
$ head' "phyloGroupMatching"
-- Keep only the best set of pointers grouped by proximity
$ groupBy (\pt pt' -> (snd . fst) pt == (snd . fst) pt')
-- verifier que l on garde bien les plus importants
$ sortBy (comparing (Down . snd . fst)) $ head' "pointers" nextPointers
-- Find the first time frame where at leats one pointer satisfies the proximity threshold
else oldPointers
where
nextPointers :: [[(Pointer,[Int])]]
nextPointers = take 1
$ dropWhile (null)
{- for each time frame, process the proximity on relevant pairs of targeted groups -}
$ scanl (\acc groups ->
let periods = nub $ map (fst . fst . fst) $ concat groups
nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods))
diago = reduceDiagos
$ filterDiago diagos ([(fst . fst) id] ++ periods)
pairs = makePairs (id,ngrams) (concat groups) periods oldPointers filiation thr proxi docs diagos
in acc ++ ( filterPointers' proxi thr
$ concat
$ map (\(c,c') ->
{- process the proximity between the current group and a pair of candidates -}
let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c')
in if ((c == c') || (snd c == snd c'))
then [((fst c,proximity),snd c)]
else [((fst c,proximity),snd c),((fst c',proximity),snd c')] ) pairs )) []
$ inits candidates -- groups from [[1900],[1900,1901],[1900,1901,1902],...]
filterDocs :: Map Date Double -> [Period] -> Map Date Double
filterDocs d pds = restrictKeys d $ periodsToYears pds
filterDiago :: Map Date Cooc -> [Period] -> Map Date Cooc
filterDiago diago pds = restrictKeys diago $ periodsToYears pds
-----------------------------
-- | Matching Processing | --
-----------------------------
{-
-- get the upstream/downstream timescale of a given period
-}
getNextPeriods :: Filiation -> Int -> Period -> [Period] -> [Period] getNextPeriods :: Filiation -> Int -> Period -> [Period] -> [Period]
getNextPeriods fil max' pId pIds = getNextPeriods fil max' pId pIds =
case fil of case fil of
...@@ -314,23 +299,17 @@ getNextPeriods fil max' pId pIds = ...@@ -314,23 +299,17 @@ getNextPeriods fil max' pId pIds =
ToParentsMemory -> undefined ToParentsMemory -> undefined
{-
-- find all the candidates parents/childs of ego
-}
getCandidates :: Int -> PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]] getCandidates :: Int -> PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]]
getCandidates minNgrams ego targets = getCandidates minNgrams ego targets =
if (length (ego ^. phylo_groupNgrams)) > 1 if (length (ego ^. phylo_groupNgrams)) > 1
then then
map (\groups' -> filter (\g' -> (> minNgrams) $ length $ intersect (ego ^. phylo_groupNgrams) (snd g')) groups') targets map (\groups' -> filter (\g' -> (> minNgrams) $ length $ intersect (ego ^. phylo_groupNgrams) (snd g')) groups') targets
else else
map (\groups' -> filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')) groups') targets map (\groups' -> filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')) groups') targets
{- matchGroupsToGroups :: Int -> [Period] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
-- set up and start performing the upstream/downstream inter‐temporal matching period by period matchGroupsToGroups frame periods proximity thr docs coocs groups =
-}
reconstructTemporalLinks :: Int -> [Period] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
reconstructTemporalLinks frame periods proximity thr docs coocs groups =
let groups' = groupByField _phylo_groupPeriod groups let groups' = groupByField _phylo_groupPeriod groups
in foldl' (\acc prd -> in foldl' (\acc prd ->
let -- 1) find the parents/childs matching periods let -- 1) find the parents/childs matching periods
...@@ -339,7 +318,7 @@ reconstructTemporalLinks frame periods proximity thr docs coocs groups = ...@@ -339,7 +318,7 @@ reconstructTemporalLinks frame periods proximity thr docs coocs groups =
-- 2) find the parents/childs matching candidates -- 2) find the parents/childs matching candidates
candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar
candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi
-- 3) find the parents/childs number of docs by years -- 3) find the parents/child number of docs by years
docsPar = filterDocs docs ([prd] ++ periodsPar) docsPar = filterDocs docs ([prd] ++ periodsPar)
docsChi = filterDocs docs ([prd] ++ periodsChi) docsChi = filterDocs docs ([prd] ++ periodsChi)
-- 4) find the parents/child diago by years -- 4) find the parents/child diago by years
...@@ -347,9 +326,9 @@ reconstructTemporalLinks frame periods proximity thr docs coocs groups = ...@@ -347,9 +326,9 @@ reconstructTemporalLinks frame periods proximity thr docs coocs groups =
diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar) diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
-- 5) match in parallel all the groups (egos) to their possible candidates -- 5) match in parallel all the groups (egos) to their possible candidates
egos = map (\ego -> egos = map (\ego ->
let pointersPar = phyloGroupMatching (getCandidates (getMinSharedNgrams proximity) ego candidatesPar) ToParents proximity docsPar diagoPar let pointersPar = phyloGroupMatching' (getCandidates (getMinSharedNgrams proximity) ego candidatesPar) ToParents proximity docsPar diagoPar
thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams) thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
pointersChi = phyloGroupMatching (getCandidates (getMinSharedNgrams proximity) ego candidatesChi) ToChilds proximity docsChi diagoChi pointersChi = phyloGroupMatching' (getCandidates (getMinSharedNgrams proximity) ego candidatesChi) ToChilds proximity docsChi diagoChi
thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams) thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams)
in addPointers ToChilds TemporalPointer pointersChi in addPointers ToChilds TemporalPointer pointersChi
$ addPointers ToParents TemporalPointer pointersPar $ addPointers ToParents TemporalPointer pointersPar
...@@ -361,51 +340,27 @@ reconstructTemporalLinks frame periods proximity thr docs coocs groups = ...@@ -361,51 +340,27 @@ reconstructTemporalLinks frame periods proximity thr docs coocs groups =
) [] periods ) [] periods
{- -----------------------
-- reconstruct a phylomemetic network from a list of groups and from a given threshold -- | Phylo Quality | --
-} -----------------------
toPhylomemeticNetwork :: Int -> [Period] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [Branch]
toPhylomemeticNetwork timescale periods similarity thr docs coocs groups =
groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ reconstructTemporalLinks timescale periods similarity thr docs coocs groups
----------------------------
-- | Quality Assessment | --
----------------------------
{-
-- filter the branches containing x
-}
relevantBranches :: Int -> [Branch] -> [Branch]
relevantBranches x branches =
filter (\groups -> (any (\group -> elem x $ group ^. phylo_groupNgrams) groups)) branches
relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]]
relevantBranches term branches =
filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches
{- accuracy :: Int -> [(Date,Date)] -> [PhyloGroup] -> Double
-- compute the accuracy ξ -- The accuracy of a branch relatively to a term x is computed only over the periods there exist some cluster mentionning x in the phylomemy
-- the accuracy of a branch relatively to a root x is computed only over the periods where clusters mentionning x in the phylo do exist accuracy x periods bk = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk')
-} / (fromIntegral $ length bk'))
accuracy :: Int -> [(Date,Date)] -> Branch -> Double
accuracy x periods bk = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk') / (fromIntegral $ length bk'))
where where
---
bk' :: [PhyloGroup] bk' :: [PhyloGroup]
bk' = filter (\g -> elem (g ^. phylo_groupPeriod) periods) bk bk' = filter (\g -> elem (g ^. phylo_groupPeriod) periods) bk
recall :: Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
{-
-- compute the recall ρ
-}
recall :: Int -> Branch -> [Branch] -> Double
recall x bk bx = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk) recall x bk bx = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk)
/ (fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx)) / (fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx))
{-
-- compute the F-score function
-}
fScore :: Double -> Int -> [(Date,Date)] -> [PhyloGroup] -> [[PhyloGroup]] -> Double fScore :: Double -> Int -> [(Date,Date)] -> [PhyloGroup] -> [[PhyloGroup]] -> Double
fScore lambda x periods bk bx = fScore lambda x periods bk bx =
let rec = recall x bk bx let rec = recall x bk bx
...@@ -414,18 +369,11 @@ fScore lambda x periods bk bx = ...@@ -414,18 +369,11 @@ fScore lambda x periods bk bx =
/ (((lambda ** 2) * acc + rec)) / (((lambda ** 2) * acc + rec))
{-
-- compute the number of groups
-}
wk :: [PhyloGroup] -> Double wk :: [PhyloGroup] -> Double
wk bk = fromIntegral $ length bk wk bk = fromIntegral $ length bk
toRecall :: Map Int Double -> [[PhyloGroup]] -> Double
{- toRecall freq branches =
-- compute the recall ρ for all the branches
-}
globalRecall :: Map Int Double -> [Branch] -> Double
globalRecall freq branches =
if (null branches) if (null branches)
then 0 then 0
else sum else sum
...@@ -440,11 +388,8 @@ globalRecall freq branches = ...@@ -440,11 +388,8 @@ globalRecall freq branches =
pys = sum (elems freq) pys = sum (elems freq)
{- toAccuracy :: Map Int Double -> [[PhyloGroup]] -> Double
-- compute the accuracy ξ for all the branches toAccuracy freq branches =
-}
globalAccuracy :: Map Int Double -> [Branch] -> Double
globalAccuracy freq branches =
if (null branches) if (null branches)
then 0 then 0
else sum else sum
...@@ -461,9 +406,7 @@ globalAccuracy freq branches = ...@@ -461,9 +406,7 @@ globalAccuracy freq branches =
pys = sum (elems freq) pys = sum (elems freq)
{- -- | here we do the average of all the local f_scores
-- compute the quality score F(λ)
-}
toPhyloQuality :: Double -> Double -> Map Int Double -> [[PhyloGroup]] -> Double toPhyloQuality :: Double -> Double -> Map Int Double -> [[PhyloGroup]] -> Double
toPhyloQuality fdt lambda freq branches = toPhyloQuality fdt lambda freq branches =
if (null branches) if (null branches)
...@@ -483,166 +426,307 @@ toPhyloQuality fdt lambda freq branches = ...@@ -483,166 +426,307 @@ toPhyloQuality fdt lambda freq branches =
-- pys :: Double -- pys :: Double
-- pys = sum (elems freq) -- pys = sum (elems freq)
-- 1 / nb de foundation
------------------------- ------------------------------------
-- | Sea-level Rise | -- -- | Constant Temporal Matching | --
------------------------- ------------------------------------
{-
-- attach a rise value to branches & groups metadata
-}
riseToMeta :: Double -> [Branch] -> [Branch]
riseToMeta rise branches =
let break = length branches > 1
in map (\b ->
map (\g ->
if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [rise]) "breaks"(g ^. phylo_groupMeta))
else g) b) branches
-- add a branch id within each group
groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches groups =
{- run the related component algorithm -}
let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
$ sortOn (\gs -> fst $ fst $ head' "egos" gs)
$ map (\group -> [getGroupId group]
++ (map fst $ group ^. phylo_groupPeriodParents)
++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
-- first find the related components by inside each ego's period
-- a supprimer
graph' = map relatedComponents egos
-- then run it for the all the periods
branches = zip [1..]
$ relatedComponents $ concat (graph' `using` parList rdeepseq)
-- update each group's branch id
in map (\(bId,branch) ->
let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
$ elems $ restrictKeys groups (Set.fromList branch)
in groups' `using` parList rdeepseq
) branches `using` parList rdeepseq
{-
-- attach a thr value to branches & groups metadata
-}
thrToMeta :: Double -> [Branch] -> [Branch]
thrToMeta thr branches =
map (\b ->
map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
reduceFrequency frequency branches =
restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
{- updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
-- TODO updateThr thr branches = map (\b -> map (\g ->
-- 1) try the zipper structure https://wiki.haskell.org/Zipper to performe the sea-level rise algorithme g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches
-- 2) investigate how the branches order influences the 'separateBranches' function
-}
{- -- Sequentially break each branch of a phylo where
-- sequentially separate each branch for a given threshold and check if it locally increases the quality score -- done = all the allready broken branches
-- sequence = [done] | currentBranch | [rest] -- ego = the current branch we want to break
-- done = all the already separated branches -- rest = the branches we still have to break
-- rest = all the branches we still have to separate breakBranches :: Double -> Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
-} -> Int -> Map Date Double -> Map Date Cooc -> [Period] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
separateBranches :: Double -> Proximity -> Double -> Map Int Double -> Int -> Double -> Double breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
-> Int -> Map Date Double -> Map Date Cooc -> [Period] -- 1) keep or not the new division of ego
-> [(Branch,ShouldTry)] -> (Branch,ShouldTry) -> [(Branch,ShouldTry)] let done' = done ++ (if snd ego
-> [(Branch,ShouldTry)]
separateBranches fdt similarity lambda frequency minBranch thr rise timescale docs coocs periods done currentBranch rest =
let done' = done ++ (if snd currentBranch
then then
(if ((null (fst branches')) || (quality > quality')) (if ((null (fst ego')) || (quality > quality'))
---- 5) if the quality is not increased by the new branches or if the new branches are all small
---- then undo the separation and localy stop the sea rise
---- else validate the separation and authorise next sea rise in the long new branches
then then
-- trace (" ✗ F(λ) = " <> show(quality) <> " (vs) " <> show(quality') -- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
-- <> " | " <> show(length $ fst ego) <> " groups : " -- <> " | " <> show(length $ fst ego) <> " groups : "
-- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego') -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
-- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]") -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
[(fst currentBranch,False)] [(fst ego,False)]
else else
-- trace (" ✓ F(λ) = " <> show(quality) <> " (vs) " <> show(quality') -- trace (" ✓ level = " <> printf "%.1f" thr <> "")
-- <> " | " <> show(length $ fst ego) <> " groups : " -- trace (" ✓ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
-- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego') -- <> " | " <> show(length $ fst ego) <> " groups : "
-- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]") -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
((map (\e -> (e,True)) (fst branches')) ++ (map (\e -> (e,False)) (snd branches')))) -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
else [currentBranch]) ((map (\e -> (e,True)) (fst ego')) ++ (map (\e -> (e,False)) (snd ego'))))
else [ego])
in in
-- 6) if there is no more branch to separate tne return [done'] else continue with [rest] -- 2) if there is no more branches in rest then return else continue
if null rest if null rest
then done' then done'
else separateBranches fdt similarity lambda frequency minBranch thr rise timescale docs coocs periods else breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods
done' (List.head rest) (List.tail rest) done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
where where
------- 1) compute the quality before splitting any branch --------------------------------------
quality :: LocalQuality quality :: Double
quality = toPhyloQuality fdt lambda frequency ((map fst done) ++ [fst currentBranch] ++ (map fst rest)) quality = toPhyloQuality fdt lambda frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
--------------------------------------
------------------- 2) split the current branch and create a new phylomemetic network ego' :: ([[PhyloGroup]],[[PhyloGroup]])
phylomemeticNetwork :: [Branch] ego' =
phylomemeticNetwork = toPhylomemeticNetwork timescale periods similarity thr docs coocs (fst currentBranch) let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego)
--------- 3) change the new phylomemetic network into a tuple of new branches branches' = branches `using` parList rdeepseq
--------- on the left : the long branches, on the right : the small ones in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
branches' :: ([Branch],[Branch]) $ thrToMeta thr
branches' = partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch) $ depthToMeta (elevation - depth) branches'
$ thrToMeta thr --------------------------------------
$ riseToMeta rise phylomemeticNetwork quality' :: Double
-------- 4) compute again the quality by considering the new branches
quality' :: LocalQuality
quality' = toPhyloQuality fdt lambda frequency quality' = toPhyloQuality fdt lambda frequency
((map fst done) ++ (fst branches') ++ (snd branches') ++ (map fst rest)) ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
{- seaLevelMatching :: Double -> Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
-- perform the sea-level rise algorithm, browse the similarity ladder and check that we can try out the next step -> Int -> [Period] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> ([([PhyloGroup],Bool)],Double)
-} seaLevelMatching fdt proximity lambda minBranch frequency thr step depth elevation frame periods docs coocs branches =
seaLevelRise :: Double -> Proximity -> Double -> Int -> Map Int Double -- if there is no branch to break or if seaLvl level > 1 then end
-> [Double] -> Double if (thr >= 1) || ((not . or) $ map snd branches)
-> Int -> [Period]
-> Map Date Double -> Map Date Cooc
-> [(Branch,ShouldTry)]
-> ([(Branch,ShouldTry)],FinalQuality)
seaLevelRise fdt proximity lambda minBranch frequency ladder rise frame periods docs coocs branches =
-- if the ladder is empty or thr > 1 or there is no branch to break then stop
if (null ladder) || ((List.head ladder) > 1) || (stopRise branches)
then (branches, toPhyloQuality fdt lambda frequency (map fst branches)) then (branches, toPhyloQuality fdt lambda frequency (map fst branches))
else else
-- start breaking up all the possible branches for the current similarity threshold -- break all the possible branches at the current seaLvl level
let thr = List.head ladder let quality = toPhyloQuality fdt lambda frequency (map fst branches)
branches' = trace ("threshold = " <> printf "%.3f" thr acc = toAccuracy frequency (map fst branches)
<> " F(λ) = " <> printf "%.5f" (toPhyloQuality fdt lambda frequency (map fst branches)) rec = toRecall frequency (map fst branches)
<> " ξ = " <> printf "%.5f" (globalAccuracy frequency (map fst branches)) branches' = trace ("↑ level = " <> printf "%.3f" thr <> " F(λ) = " <> printf "%.5f" quality
<> " ρ = " <> printf "%.5f" (globalRecall frequency (map fst branches)) <> " ξ = " <> printf "%.5f" acc
<> " branches = " <> show(length branches)) <> " ρ = " <> printf "%.5f" rec
$ separateBranches fdt proximity lambda frequency minBranch thr rise frame docs coocs periods <> " branches = " <> show(length branches) <> " ↴")
[] (List.head branches) (List.tail branches) $ breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods
in seaLevelRise fdt proximity lambda minBranch frequency (List.tail ladder) (rise + 1) frame periods docs coocs branches' [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches')
in seaLevelMatching fdt proximity lambda minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches'
constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo
constanteTemporalMatching start step phylo = updatePhyloGroups 1
(fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat (map fst $ (fst branches)))
(toPhyloHorizon (updateQuality (snd branches) phylo))
where where
-------- -- 2) process the temporal matching by elevating seaLvl level
stopRise :: [(Branch,ShouldTry)] -> Bool -- branches :: ([([groups in the same branch],should westill break the branch?)],final quality)
stopRise bs = ((not . or) $ map snd bs) branches :: ([([PhyloGroup],Bool)],Double)
branches = seaLevelMatching (fromIntegral $ Vector.length $ getRoots phylo)
{-
-- start the temporal matching process up, recover the resulting branches and update the groups (at scale 1) consequently
-}
temporalMatching :: [Double] -> Phylo -> Phylo
temporalMatching ladder phylo = updatePhyloGroups 1
(Map.fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
(updateQuality quality phylo)
where
-------
quality :: FinalQuality
quality = snd sea
--------
branches :: [Branch]
branches = map fst $ fst sea
--- 2) process the temporal matching by elevating the similarity ladder
sea :: ([(Branch,ShouldTry)],FinalQuality)
sea = seaLevelRise (fromIntegral $ Vector.length $ getRoots phylo)
(phyloProximity $ getConfig phylo) (phyloProximity $ getConfig phylo)
(_qua_granularity $ phyloQuality $ getConfig phylo) (_qua_granularity $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo) (_qua_minBranch $ phyloQuality $ getConfig phylo)
(phylo ^. phylo_termFreq) (phylo ^. phylo_termFreq)
ladder 1 start step
((((1 - start) / step) - 1))
(((1 - start) / step))
(getTimeFrame $ timeUnit $ getConfig phylo) (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (getPeriodIds phylo)
(phylo ^. phylo_timeDocs) (phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc) (phylo ^. phylo_timeCooc)
(reverse $ sortOn (length . fst) seabed) (reverse $ sortOn (length . fst) initBranches)
-- 1) for each group process an initial temporal Matching
------ 1) for each group, process an initial temporal Matching and create a 'seabed' -- here we suppose that all the groups of level 1 are part of the same big branch
------ ShouldTry determines if you should apply the seaLevelRise function again within each branch -- the Bool param determines weither you should apply the sealevel within the branch
seabed :: [(Branch,ShouldTry)] -- creer un type [PhyloGroup] <=> Branch
seabed = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo))) initBranches :: [([PhyloGroup],Bool)]
$ toPhylomemeticNetwork (getTimeFrame $ timeUnit $ getConfig phylo) initBranches = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
(getPeriodIds phylo) $ groupsToBranches $ Map.fromList $ map (\g -> (getGroupId g, g))
(phyloProximity $ getConfig phylo) $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
(List.head ladder) (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
start
(phylo ^. phylo_timeDocs) (phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc) (phylo ^. phylo_timeCooc)
(traceTemporalMatching $ getGroupsFromScale 1 phylo) (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
-----------------
-- | Horizon | --
-----------------
toPhyloHorizon :: Phylo -> Phylo
toPhyloHorizon phylo =
let t0 = take 1 (getPeriodIds phylo)
groups = getGroupsFromLevelPeriods 1 t0 phylo
sens = getSensibility (phyloProximity $ getConfig phylo)
nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) t0
diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) t0
in phylo & phylo_horizon .~ (fromList $ map (\(g,g') ->
((getGroupId g,getGroupId g'),weightedLogJaccard' sens nbDocs diago (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) $ listToCombi' groups)
--------------------------------------
-- | Adaptative Temporal Matching | --
--------------------------------------
thrToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
thrToMeta thr branches =
map (\b ->
map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
depthToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
depthToMeta depth branches =
let break = length branches > 1
in map (\b ->
map (\g ->
if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [depth]) "breaks"(g ^. phylo_groupMeta))
else g) b) branches
reduceTupleMapByKeys :: Eq a => [a] -> Map (a,a) Double -> Map (a,a) Double
reduceTupleMapByKeys ks m = filterWithKey (\(k,k') _ -> (elem k ks) && (elem k' ks)) m
getInTupleMap :: Ord a => Map (a,a) Double -> a -> a -> Double
getInTupleMap m k k'
| isJust (m !? ( k ,k')) = m ! ( k ,k')
| isJust (m !? ( k',k )) = m ! ( k',k )
| otherwise = 0
toThreshold :: Double -> Map (PhyloGroupId,PhyloGroupId) Double -> Double
toThreshold nbSteps proxiGroups =
let idx = ((Map.size proxiGroups) `div` (floor nbSteps)) - 1
in if idx >= 0
then (sort $ elems proxiGroups) !! idx
else 1
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
adaptativeBreakBranches :: Double -> Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double
-> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc
-> [Period] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
-> [([PhyloGroup],(Bool,[Double]))]
adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods done ego rest =
-- 1) keep or not the new division of ego
let done' = done ++ (if (fst . snd) ego
then (if ((null (fst ego')) || (quality > quality'))
then
[(concat $ thrToMeta thr $ [fst ego],(False, ((snd . snd) ego)))]
else
( (map (\e -> (e,(True, ((snd . snd) ego) ++ [thr]))) (fst ego'))
++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego'))))
else [(concat $ thrToMeta thr $ [fst ego], snd ego)])
in
-- uncomment let .. in for debugging
-- let part1 = partition (snd) done'
-- part2 = partition (snd) rest
-- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
-- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
-- ) $
-- 2) if there is no more branches in rest then return else continue
if null rest
then done'
else adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods
done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
where
--------------------------------------
thr :: Double
thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi
--------------------------------------
quality :: Double
quality = toPhyloQuality fdt lambda frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
--------------------------------------
ego' :: ([[PhyloGroup]],[[PhyloGroup]])
ego' =
let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups frame periods proxiConf thr docs coocs (fst ego)
branches' = branches `using` parList rdeepseq
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) > minBranch)
$ thrToMeta thr
$ depthToMeta (elevation - depth) branches'
--------------------------------------
quality' :: Double
quality' = toPhyloQuality fdt lambda frequency
((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
adaptativeSeaLevelMatching :: Double -> Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double
-> Double -> Int -> Map Int Double
-> Int -> [Period] -> Map Date Double -> Map Date Cooc
-> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
adaptativeSeaLevelMatching fdt proxiConf depth elevation groupsProxi lambda minBranch frequency frame periods docs coocs branches =
-- if there is no branch to break or if seaLvl level >= depth then end
if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
then branches
else
-- break all the possible branches at the current seaLvl level
let branches' = adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches')
groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi
thr = toThreshold depth groupsProxi
in trace("\n " <> foldl (\acc _ -> acc <> "🌊 ") "" [0..(elevation - depth)]
<> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches')
<> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]"
<> " thr = " <> show(thr))
$ adaptativeSeaLevelMatching fdt proxiConf (depth - 1) elevation groupsProxi' lambda minBranch frequency' frame periods docs coocs branches'
adaptativeTemporalMatching :: Double -> Phylo -> Phylo
adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
(fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
(toPhyloHorizon phylo)
where
-- 2) process the temporal matching by elevating seaLvl level
branches :: [[PhyloGroup]]
branches = map fst
$ adaptativeSeaLevelMatching (fromIntegral $ Vector.length $ getRoots phylo)
(phyloProximity $ getConfig phylo)
(elevation - 1)
elevation
(phylo ^. phylo_groupsProxi)
(_qua_granularity $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo)
(phylo ^. phylo_termFreq)
(getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo)
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
groups
-- 1) for each group process an initial temporal Matching
-- here we suppose that all the groups of level 1 are part of the same big branch
groups :: [([PhyloGroup],(Bool,[Double]))]
groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr])))
$ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (phyloProximity $ getConfig phylo)
thr
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
(traceTemporalMatching $ getGroupsFromLevel 1 phylo)
--------------------------------------
thr :: Double
thr = toThreshold elevation (phylo ^. phylo_groupsProxi)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment