Commit bea591f6 authored by qlobbe's avatar qlobbe

working on synchony

parent cb1136b4
Pipeline #570 failed with stage
......@@ -67,6 +67,13 @@ data Proximity =
deriving (Show,Generic,Eq)
data Synchrony =
ByProximityThreshold
{ _bpt_threshold :: Double }
| ByProximityDistribution
deriving (Show,Generic,Eq)
data TimeUnit =
Year
{ _year_period :: Int
......@@ -90,6 +97,7 @@ data Config =
, phyloName :: Text
, phyloLevel :: Int
, phyloProximity :: Proximity
, phyloSynchrony :: Synchrony
, timeUnit :: TimeUnit
, contextualUnit :: ContextualUnit
, exportLabel :: [PhyloLabel]
......@@ -107,6 +115,7 @@ defaultConfig =
, phyloName = pack "Default Phylo"
, phyloLevel = 1
, phyloProximity = WeightedLogJaccard 10 0 0.1
, phyloSynchrony = ByProximityThreshold 0.4
, timeUnit = Year 3 1 5
, contextualUnit = Fis 2 4
, exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
......@@ -134,6 +143,8 @@ instance FromJSON Order
instance ToJSON Order
instance FromJSON Filter
instance ToJSON Filter
instance FromJSON Synchrony
instance ToJSON Synchrony
-- | Software parameters
......
......@@ -31,6 +31,7 @@ import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.PhyloMaker
import Gargantext.Viz.Phylo.PhyloExport
import Gargantext.Viz.Phylo.TemporalMatching (temporalMatching)
import Gargantext.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Control.Lens
import Data.GraphViz.Types.Generalised (DotGraph)
......@@ -42,9 +43,12 @@ phyloExport :: IO ()
phyloExport = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot" phyloDot
phyloDot :: DotGraph DotId
phyloDot = toPhyloExport phylo1
phyloDot = toPhyloExport phylo2
phylo2 :: Phylo
phylo2 = synchronicClustering phylo1
-----------------------------------------------
-- | STEP 3 | -- Build the Level 1 of the Phylo
-----------------------------------------------
......@@ -91,6 +95,7 @@ nbDocsByYear = docsToTimeScaleNb docs
config :: Config
config =
defaultConfig { phyloName = "Cesar et Cleopatre"
, phyloLevel = 2
, exportFilter = [ByBranchSize 0]
, contextualUnit = Fis 0 0 }
......
......@@ -16,7 +16,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloMaker where
import Data.List (concat, nub, partition, sort, (++))
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), filterWithKey, restrictKeys)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys)
import Data.Set (size)
import Data.Vector (Vector)
......@@ -24,6 +24,7 @@ import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.TemporalMatching (temporalMatching)
import Gargantext.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Text.Context (TermList)
import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
......@@ -46,7 +47,8 @@ toPhylo docs lst conf = phylo1
where
--------------------------------------
phylo1 :: Phylo
phylo1 = temporalMatching
phylo1 = synchronicClustering
$ temporalMatching
$ toPhylo1 docs phyloBase
--------------------------------------
phyloBase :: Phylo
......@@ -168,14 +170,6 @@ toPhyloFis phyloDocs support clique = traceFis "Filtered Fis"
--------------------
-- | To build the local cooc matrix of each phylogroup
ngramsToCooc :: [Int] -> [Cooc] -> Cooc
ngramsToCooc ngrams coocs =
let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
pairs = listToKeys ngrams
in filterWithKey (\k _ -> elem k pairs) cooc
-- | To transform the docs into a time map of coocurency matrix
docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc
docsToTimeScaleCooc docs fdt =
......
......@@ -17,9 +17,9 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, intersect, (\\))
import Data.Set (Set, size)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty)
import Data.String (String)
import Data.Text (Text, unwords)
......@@ -178,7 +178,6 @@ getFisSize unit = case unit of
-- | Cooc | --
--------------
listToCombi' :: [a] -> [(a,a)]
listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
......@@ -197,6 +196,15 @@ sumCooc cooc cooc' = unionWith (+) cooc cooc'
getTrace :: Cooc -> Double
getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
-- | To build the local cooc matrix of each phylogroup
ngramsToCooc :: [Int] -> [Cooc] -> Cooc
ngramsToCooc ngrams coocs =
let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
pairs = listToKeys ngrams
in filterWithKey (\k _ -> elem k pairs) cooc
--------------------
-- | PhyloGroup | --
--------------------
......@@ -224,6 +232,16 @@ getPeriodIds phylo = sortOn fst
$ keys
$ phylo ^. phylo_periods
getLastLevel :: Phylo -> Level
getLastLevel phylo = last' "lastLevel" $ getLevels phylo
getLevels :: Phylo -> [Level]
getLevels phylo = nub
$ map snd
$ keys $ view ( phylo_periods
. traverse
. phylo_periodLevels ) phylo
getConfig :: Phylo -> Config
getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
......@@ -232,6 +250,11 @@ getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
getRoots :: Phylo -> Vector Ngrams
getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
phyloToLastBranches :: Phylo -> [[PhyloGroup]]
phyloToLastBranches phylo = elems
$ fromListWith (++)
$ map (\g -> (g ^. phylo_groupBranchId, [g]))
$ getGroupsFromLevel (last' "byBranches" $ getLevels phylo) phylo
getGroupsFromLevel :: Level -> Phylo -> [PhyloGroup]
getGroupsFromLevel lvl phylo =
......@@ -259,6 +282,21 @@ updatePhyloGroups lvl m phylo =
then m ! id
else group ) phylo
--------------------
-- | Clustering | --
--------------------
relatedComponents :: Eq a => [[a]] -> [[a]]
relatedComponents graphs = foldl' (\mem groups ->
if (null mem)
then mem ++ [groups]
else
let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
in if (null related)
then mem ++ [groups]
else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
-------------------
-- | Proximity | --
......
......@@ -16,22 +16,112 @@ Portability : POSIX
module Gargantext.Viz.Phylo.SynchronicClustering where
import Gargantext.Prelude
-- import Gargantext.Viz.AdaptativePhylo
-- import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard)
import Data.List ((++), null, intersect, nub, concat, sort)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
import Control.Lens hiding (Level)
import Debug.Trace (trace)
-------------------------
-- | New Level Maker | --
-------------------------
mergeGroups :: [Cooc] -> PhyloGroupId -> [PhyloGroup] -> PhyloGroup
mergeGroups coocs id childs =
let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs
in PhyloGroup (fst $ fst id)
(snd $ fst id)
(snd id)
""
(sum $ map _phylo_groupSupport childs)
ngrams
(ngramsToCooc ngrams coocs)
(((head' "mergeGroups" childs) ^. phylo_groupLevel) + 1, snd ((head' "mergeGroups" childs) ^. phylo_groupBranchId))
empty
[]
(map (\g -> (getGroupId g, 1)) childs)
(concat $ map _phylo_groupPeriodParents childs)
(concat $ map _phylo_groupPeriodChilds childs)
addNewLevel :: Level -> Phylo -> Phylo
addNewLevel lvl phylo =
over ( phylo_periods
. traverse )
(\phyloPrd ->
phyloPrd & phylo_periodLevels %~ (insert (phyloPrd ^. phylo_periodPeriod, lvl + 1)
(PhyloLevel (phyloPrd ^. phylo_periodPeriod) (lvl + 1) empty))) phylo
toNextLevel :: Phylo -> [PhyloGroup] -> Phylo
toNextLevel phylo groups =
let level = getLastLevel phylo
phylo' = updatePhyloGroups level (fromList $ map (\g -> (getGroupId g, g)) groups) phylo
nextGroups = fromListWith (++)
$ foldlWithKey (\acc k v ->
let group = mergeGroups (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [fst $ fst k]) k v
in acc ++ [(group ^. phylo_groupPeriod,[group])]) []
$ fromListWith (++) $ map (\g -> (fst $ head' "nextGroups" $ g ^. phylo_groupLevelParents,[g])) groups
in trace (">>>>>>>>>>>>>>>>>>>>>>>>" <> show (nextGroups)) over ( phylo_periods
. traverse
. phylo_periodLevels
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == (level + 1)))
(\phyloLvl -> if member (phyloLvl ^. phylo_levelPeriod) nextGroups
then phyloLvl & phylo_levelGroups .~ fromList ( map (\g -> (getGroupId g,g))
$ nextGroups ! (phyloLvl ^. phylo_levelPeriod))
else phyloLvl
) $ addNewLevel level phylo'
import Data.List (foldl', (++), null, intersect, (\\), union, nub, concat)
--------------------
-- | Clustering | --
--------------------
relatedComponents :: Eq a => [[a]] -> [[a]]
relatedComponents graphs = foldl' (\mem groups ->
if (null mem)
then mem ++ [groups]
else
let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
in if (null related)
then mem ++ [groups]
else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
\ No newline at end of file
toPairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
toPairs groups = filter (\(g,g') -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))
$ listToCombi' groups
groupsToEdges :: Proximity -> Double -> Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
groupsToEdges prox thr docs groups =
case prox of
WeightedLogJaccard sens _ _ -> filter (\(_,w) -> w >= thr)
$ map (\(g,g') -> ((g,g'), weightedLogJaccard sens docs (g ^. phylo_groupCooc) (g' ^. phylo_groupCooc) (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)))
$ toPairs groups
_ -> undefined
toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]]
toRelatedComponents nodes edges = relatedComponents $ ((map (\((g,g'),_) -> [g,g']) edges) ++ (map (\g -> [g]) nodes))
reduceBranch :: Proximity -> Double -> Map Date Double -> [PhyloGroup] -> [PhyloGroup]
reduceBranch prox thr docs branch =
-- | 1) reduce a branch as a set of periods & groups
let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems)
$ mapWithKey (\prd groups ->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
let edges = groupsToEdges prox thr ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) groups
in map (\(idx,comp) ->
-- | 4) add to each groups their futur level parent group
let parentId = (((head' "reduceBranch" comp) ^. phylo_groupPeriod, 1 + (head' "reduceBranch" comp) ^. phylo_groupLevel), idx)
in map (\g -> g & phylo_groupLevelParents %~ (++ [(parentId,1)]) ) comp )
-- |3) reduce the graph a a set of related components
$ zip [1..] (toRelatedComponents groups edges)) periods
synchronicClustering :: Phylo -> Phylo
synchronicClustering phylo =
case (phyloSynchrony $ getConfig phylo) of
ByProximityThreshold thr -> toNextLevel phylo
$ concat
$ map (\branch -> reduceBranch (phyloProximity $ getConfig phylo) thr (phylo ^. phylo_timeDocs) branch)
$ phyloToLastBranches phylo
ByProximityDistribution -> undefined
\ No newline at end of file
......@@ -21,7 +21,6 @@ import Data.Map (Map, fromList, fromListWith, filterWithKey, elems, restrictKey
import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.SynchronicClustering
import Debug.Trace (trace)
import Prelude (logBase)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment