Commit f6f6d304 authored by Quentin Lobbé's avatar Quentin Lobbé

Add the clustering up to level 2 and more

parent c07e35b8
Pipeline #265 failed with stage
...@@ -189,7 +189,14 @@ data PhyloError = LevelDoesNotExist ...@@ -189,7 +189,14 @@ data PhyloError = LevelDoesNotExist
deriving (Show) deriving (Show)
data Proximity = WeightedLogJaccard | Other type PhyloGraph = (PhyloNodes,PhyloEdges)
type PhyloNodes = [PhyloGroup]
type PhyloEdges = [(((PhyloGroup,PhyloGroup)),Double)]
data Proximity = WeightedLogJaccard | Hamming | FromPairs
data Clustering = Louvain | RelatedComponents
data PairTo = Childs | Parents data PairTo = Childs | Parents
......
...@@ -62,82 +62,117 @@ import qualified Data.Vector as Vector ...@@ -62,82 +62,117 @@ import qualified Data.Vector as Vector
-- | STEP 13 | -- Cluster the Fis -- | STEP 13 | -- Cluster the Fis
-- Il faudrait plutôt passer (Proximity,[Double]) où [Double] serait la liste des paramètres -- | To do : ajouter de nouveaux clusters / proxi
groupsToGraph :: Proximity -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -- gérer les cooc à level 2 et +, idem pour les quality
groupsToGraph prox groups = case prox of -- réfléchir aux formats de sortie
WeightedLogJaccard -> map (\(x,y) -> ((x,y), weightedLogJaccard 0 (getGroupCooc x) (unifySharedKeys (getGroupCooc x) (getGroupCooc y)))) edges
_ -> undefined
where -- | To apply a Clustering method to a PhyloGraph
edges :: [(PhyloGroup,PhyloGroup)] graphToClusters :: (Clustering,[Double]) -> PhyloGraph -> [[PhyloGroup]]
edges = listToDirectedCombi groups graphToClusters (clust,param) (nodes,edges) = case clust of
Louvain -> undefined
RelatedComponents -> relatedComp 0 (head nodes) (tail nodes,edges) [] []
phyloToGraphs :: Level -> Proximity -> Phylo -> Map (Date,Date) [((PhyloGroup,PhyloGroup),Double)]
phyloToGraphs lvl prox p = Map.fromList
$ zip periods -- | To transform a Phylo into Clusters of PhyloGroups at a given level
(map (\prd -> groupsToGraph prox phyloToClusters :: Level -> (Proximity,[Double]) -> (Clustering,[Double]) -> Phylo -> Map (Date,Date) [[PhyloGroup]]
$ getGroupsWithFilters (getLevelValue lvl) prd p) periods) phyloToClusters lvl (prox,param) (clus,param') p = Map.fromList
where $ zip (getPhyloPeriods p)
-------------------------------------- (map (\prd -> let graph = groupsToGraph (prox,param) (getGroupsWithFilters (getLevelValue lvl) prd p) p
periods :: [PhyloPeriodId] in if null (fst graph)
periods = getPhyloPeriods p then []
-------------------------------------- else graphToClusters (clus,param') graph)
(getPhyloPeriods p))
-- | To transform a Cluster into a Phylogroup
clusterToGroup :: PhyloPeriodId -> Int -> Int -> Text -> [PhyloGroup] -> PhyloGroup
clusterToGroup prd lvl idx lbl groups = PhyloGroup ((prd, lvl), idx)
lbl
((sort . nub . concat) $ map getGroupNgrams groups)
empty
empty
[] [] []
(map (\g -> (getGroupId g, 1)) groups)
-- | To transform a list of Clusters into a new Phylolevel
clustersToPhyloLevel :: Level -> Map (Date,Date) [[PhyloGroup]] -> Phylo -> Phylo
clustersToPhyloLevel lvl m p = over (phylo_periods . traverse)
(\period ->
let periodId = _phylo_periodId period
clusters = zip [1..] (m ! periodId)
in over (phylo_periodLevels)
(\levels ->
let groups = map (\cluster -> clusterToGroup periodId (getLevelValue lvl) (fst cluster) "" (snd cluster)) clusters
in levels ++ [PhyloLevel (periodId, (getLevelValue lvl)) groups]
) period) p
phyloWithGroups2 = clustersToPhyloLevel (initLevel 2 Level_N)
(phyloToClusters (initLevel 1 Level_1) (WeightedLogJaccard,[0]) (RelatedComponents, []) phyloWithBranches_1)
phyloWithBranches_1
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | STEP 12 | -- Find the Branches -- | STEP 12 | -- Find the Branches
-- | To add a PhyloGroupId to list of Branches with conditions -- | To apply the related components method to a PhyloGraph
addToBranches :: (Int,Int) -> PhyloGroupId -> [PhyloBranch] -> [PhyloBranch] -- curr = the current PhyloGroup
addToBranches (lvl,idx) id branches -- (nodes,edges) = the initial PhyloGraph minus the current PhyloGroup
| null branches = [newBranch] -- next = the next PhyloGroups to be added in the cluster
| idx == lastIdx = (init branches) ++ [addGroupIdToBranch id (last branches)] -- memo = the memory of the allready created clusters
| otherwise = branches ++ [newBranch] relatedComp :: Int -> PhyloGroup -> PhyloGraph -> [PhyloGroup] -> [[PhyloGroup]] -> [[PhyloGroup]]
relatedComp idx curr (nodes,edges) next memo
| null nodes' && null next' = memo'
| (not . null) next' = relatedComp idx (head next') (nodes',edges) (tail next') memo'
| otherwise = relatedComp (idx + 1) (head nodes') (tail nodes',edges) [] memo'
where where
-------------------------------------- --------------------------------------
newBranch :: PhyloBranch memo' :: [[PhyloGroup]]
newBranch = PhyloBranch (lvl,idx) "" [id] memo'
| null memo = [[curr]]
| idx == ((length memo) - 1) = (init memo) ++ [(last memo) ++ [curr]]
| otherwise = memo ++ [[curr]]
--------------------------------------
next' :: [PhyloGroup]
next' = filter (\x -> not $ elem x $ concat memo) $ nub $ next ++ (getNeighbours False curr edges)
-------------------------------------- --------------------------------------
lastIdx :: Int nodes' :: [PhyloGroup]
lastIdx = (snd . _phylo_branchId . last) branches nodes' = filter (\x -> not $ elem x next') nodes
-------------------------------------- --------------------------------------
-- | To transform a list of PhyloGroups into a list of PhyloBranches where : -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
-- curr = the current PhyloGroup graphToBranches :: Int -> PhyloGraph -> Phylo -> [PhyloBranch]
-- rest = the rest of the initial list of PhyloGroups graphToBranches lvl (nodes,edges) p = map (\(idx,c) -> PhyloBranch (lvl,idx) "" (map getGroupId c)) $ zip [0..] clusters
-- next = the next PhyloGroups to be added in the current Branch where
-- memo = the memory of the allready created Branches, the last one is the current one --------------------------------------
groupsToBranches :: (Int,Int) -> PhyloGroup -> [PhyloGroup] -> [PhyloGroup] -> [PhyloBranch] -> Phylo -> [PhyloBranch] clusters :: [[PhyloGroup]]
groupsToBranches (lvl,idx) curr rest next memo p clusters = relatedComp 0 (head nodes) (tail nodes,edges) [] []
| null rest' && null next' = memo' --------------------------------------
| (not . null) next' = groupsToBranches (lvl,idx) (head next') rest' (tail next') memo' p
| otherwise = groupsToBranches (lvl,idx + 1) (head rest') (tail rest') [] memo' p
where -- | To transform a list of PhyloGroups into a PhyloGraph by using a given Proximity mesure
-------------------------------------- groupsToGraph :: (Proximity,[Double]) -> [PhyloGroup] -> Phylo -> PhyloGraph
done :: [PhyloGroup] groupsToGraph (prox,param) groups p = (groups,edges)
done = getGroupsFromIds (concat $ map (_phylo_branchGroups) memo) p where
-------------------------------------- edges :: PhyloEdges
memo' :: [PhyloBranch] edges = case prox of
memo' = addToBranches (lvl,idx) (getGroupId curr) memo FromPairs -> (nub . concat) $ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p)
-------------------------------------- ++
next' :: [PhyloGroup] (map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) groups
next' = filter (\x -> not $ elem x done) $ nub $ next ++ (getGroupPairs curr p) WeightedLogJaccard -> map (\(x,y) -> ((x,y), weightedLogJaccard
-------------------------------------- (param !! 0) (getGroupCooc x)
rest' :: [PhyloGroup] (unifySharedKeys (getGroupCooc x) (getGroupCooc y)))) $ listToDirectedCombi groups
rest' = filter (\x -> not $ elem x next') rest _ -> undefined
--------------------------------------
-- | To set all the PhyloBranches for a given Level in a Phylo -- | To set all the PhyloBranches for a given Level in a Phylo
setPhyloBranches :: Level -> Phylo -> Phylo setPhyloBranches :: Level -> Phylo -> Phylo
setPhyloBranches lvl p = alterPhyloBranches setPhyloBranches lvl p = alterPhyloBranches
(\branches -> branches ++ (groupsToBranches (\branches -> branches
(getLevelValue lvl, 0) ++
(head groups) (graphToBranches (getLevelValue lvl) (groupsToGraph (FromPairs,[]) groups p) p)
(tail groups)
[] [] p)
) p ) p
where where
-------------------------------------- --------------------------------------
...@@ -178,10 +213,9 @@ weightedLogJaccard s f1 f2 ...@@ -178,10 +213,9 @@ weightedLogJaccard s f1 f2
-- | To apply the corresponding proximity function based on a given Proximity -- | To apply the corresponding proximity function based on a given Proximity
getProximity :: Proximity -> Double -> PhyloGroup -> PhyloGroup -> (PhyloGroupId, Double) getProximity :: (Proximity,[Double]) -> PhyloGroup -> PhyloGroup -> (PhyloGroupId, Double)
getProximity prox s g1 g2 = case prox of getProximity (prox,param) g1 g2 = case prox of
WeightedLogJaccard -> ((getGroupId g2),weightedLogJaccard s (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1))) WeightedLogJaccard -> ((getGroupId g2),weightedLogJaccard (param !! 0) (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1)))
Other -> undefined
_ -> panic ("[ERR][Viz.Phylo.Example.getProximity] Proximity function not defined") _ -> panic ("[ERR][Viz.Phylo.Example.getProximity] Proximity function not defined")
...@@ -215,11 +249,11 @@ getNextPeriods to id l = case to of ...@@ -215,11 +249,11 @@ getNextPeriods to id l = case to of
-- | To find the best set (max = 2) of Childs/Parents candidates based on a given Proximity mesure until a maximum depth (max = Period + 5 units ) -- | To find the best set (max = 2) of Childs/Parents candidates based on a given Proximity mesure until a maximum depth (max = Period + 5 units )
findBestCandidates :: PairTo -> Int -> Int -> Double -> Double -> PhyloGroup -> Phylo -> [(PhyloGroupId, Double)] findBestCandidates :: PairTo -> Int -> Int -> Double -> (Proximity,[Double]) -> PhyloGroup -> Phylo -> [(PhyloGroupId, Double)]
findBestCandidates to depth max thr s group p findBestCandidates to depth max thr (prox,param) group p
| depth > max || null next = [] | depth > max || null next = []
| (not . null) best = take 2 best | (not . null) best = take 2 best
| otherwise = findBestCandidates to (depth + 1) max thr s group p | otherwise = findBestCandidates to (depth + 1) max thr (prox,param) group p
where where
-------------------------------------- --------------------------------------
next :: [PhyloPeriodId] next :: [PhyloPeriodId]
...@@ -229,7 +263,7 @@ findBestCandidates to depth max thr s group p ...@@ -229,7 +263,7 @@ findBestCandidates to depth max thr s group p
candidates = getGroupsWithFilters (getGroupLevel group) (head next) p candidates = getGroupsWithFilters (getGroupLevel group) (head next) p
-------------------------------------- --------------------------------------
scores :: [(PhyloGroupId, Double)] scores :: [(PhyloGroupId, Double)]
scores = map (\group' -> getProximity WeightedLogJaccard s group group') candidates scores = map (\group' -> getProximity (prox,param) group group') candidates
-------------------------------------- --------------------------------------
best :: [(PhyloGroupId, Double)] best :: [(PhyloGroupId, Double)]
best = reverse best = reverse
...@@ -252,8 +286,8 @@ makePair to group ids = case to of ...@@ -252,8 +286,8 @@ makePair to group ids = case to of
-- | To pair all the Phylogroups of given PhyloLevel to their best Parents or Childs -- | To pair all the Phylogroups of given PhyloLevel to their best Parents or Childs
pairGroupsToGroups :: PairTo -> Level -> Double -> Double -> Phylo -> Phylo pairGroupsToGroups :: PairTo -> Level -> Double -> (Proximity,[Double]) -> Phylo -> Phylo
pairGroupsToGroups to lvl thr s p = alterPhyloGroups pairGroupsToGroups to lvl thr (prox,param) p = alterPhyloGroups
(\groups -> (\groups ->
map (\group -> map (\group ->
if (getGroupLevel group) == (getLevelValue lvl) if (getGroupLevel group) == (getLevelValue lvl)
...@@ -261,7 +295,7 @@ pairGroupsToGroups to lvl thr s p = alterPhyloGroups ...@@ -261,7 +295,7 @@ pairGroupsToGroups to lvl thr s p = alterPhyloGroups
let let
-------------------------------------- --------------------------------------
candidates :: [(PhyloGroupId, Double)] candidates :: [(PhyloGroupId, Double)]
candidates = findBestCandidates to 1 5 thr s group p candidates = findBestCandidates to 1 5 thr (prox,param) group p
-------------------------------------- --------------------------------------
in in
makePair to group candidates makePair to group candidates
...@@ -270,11 +304,11 @@ pairGroupsToGroups to lvl thr s p = alterPhyloGroups ...@@ -270,11 +304,11 @@ pairGroupsToGroups to lvl thr s p = alterPhyloGroups
phyloWithPair_1_Childs :: Phylo phyloWithPair_1_Childs :: Phylo
phyloWithPair_1_Childs = pairGroupsToGroups Childs (initLevel 1 Level_1) 0.01 0 phyloWithPair_1_Parents phyloWithPair_1_Childs = pairGroupsToGroups Childs (initLevel 1 Level_1) 0.01 (WeightedLogJaccard,[0]) phyloWithPair_1_Parents
phyloWithPair_1_Parents :: Phylo phyloWithPair_1_Parents :: Phylo
phyloWithPair_1_Parents = pairGroupsToGroups Parents (initLevel 1 Level_1) 0.01 0 phyloLinked_0_1 phyloWithPair_1_Parents = pairGroupsToGroups Parents (initLevel 1 Level_1) 0.01 (WeightedLogJaccard,[0]) phyloLinked_0_1
------------------------------------------------------------------------ ------------------------------------------------------------------------
......
...@@ -18,7 +18,7 @@ module Gargantext.Viz.Phylo.Tools ...@@ -18,7 +18,7 @@ module Gargantext.Viz.Phylo.Tools
where where
import Control.Lens hiding (both, Level) import Control.Lens hiding (both, Level)
import Data.List (filter, intersect, (++), sort, null, head, tail, last, tails) import Data.List (filter, intersect, (++), sort, null, head, tail, last, tails, delete, nub)
import Data.Map (Map, mapKeys, member) import Data.Map (Map, mapKeys, member)
import Data.Set (Set) import Data.Set (Set)
import Data.Text (Text) import Data.Text (Text)
...@@ -117,6 +117,11 @@ filterNestedSets h l l' ...@@ -117,6 +117,11 @@ filterNestedSets h l l'
| otherwise = filterNestedSets (head l) (tail l) (h : l') | otherwise = filterNestedSets (head l) (tail l) (h : l')
-- | To filter some PhyloEdges with a given threshold
filterPhyloEdges :: Double -> PhyloEdges -> PhyloEdges
filterPhyloEdges thr edges = filter (\((s,t),w) -> w > thr) edges
-- | To get the PhyloGroups Childs of a PhyloGroup -- | To get the PhyloGroups Childs of a PhyloGroup
getGroupChilds :: PhyloGroup -> Phylo -> [PhyloGroup] getGroupChilds :: PhyloGroup -> Phylo -> [PhyloGroup]
getGroupChilds g p = getGroupsFromIds (map fst $ _phylo_groupPeriodChilds g) p getGroupChilds g p = getGroupsFromIds (map fst $ _phylo_groupPeriodChilds g) p
...@@ -222,6 +227,15 @@ getLevelLinkValue dir link = case dir of ...@@ -222,6 +227,15 @@ getLevelLinkValue dir link = case dir of
_ -> panic "[ERR][Viz.Phylo.Tools.getLevelLinkValue] Wrong direction" _ -> panic "[ERR][Viz.Phylo.Tools.getLevelLinkValue] Wrong direction"
-- | To get the neighbours (directed/undirected) of a PhyloGroup from a list of PhyloEdges
getNeighbours :: Bool -> PhyloGroup -> PhyloEdges -> [PhyloGroup]
getNeighbours directed g e = case directed of
True -> map (\((s,t),w) -> t)
$ filter (\((s,t),w) -> s == g) e
False -> map (\((s,t),w) -> head $ delete g $ nub [s,t,g])
$ filter (\((s,t),w) -> s == g || t == g) e
-- | To get the Branches of a Phylo -- | To get the Branches of a Phylo
getPhyloBranches :: Phylo -> [PhyloBranch] getPhyloBranches :: Phylo -> [PhyloBranch]
getPhyloBranches = _phylo_branches getPhyloBranches = _phylo_branches
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment