Cluster.hs 6.05 KB
Newer Older
Quentin Lobbé's avatar
Quentin Lobbé committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
{-|
Module      : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX


-}

{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts  #-}
{-# LANGUAGE OverloadedStrings #-}

module Gargantext.Viz.Phylo.Aggregates.Cluster
  where

Quentin Lobbé's avatar
Quentin Lobbé committed
20
import Data.List        (null,tail,concat,sort,intersect)
21 22
import Data.Map         (Map)
import Data.Tuple       (fst)
Alexandre Delanoë's avatar
Alexandre Delanoë committed
23
import Gargantext.Prelude
Quentin Lobbé's avatar
Quentin Lobbé committed
24 25
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
26
import Gargantext.Viz.Phylo.Metrics.Proximity
Quentin Lobbé's avatar
Quentin Lobbé committed
27
import Gargantext.Viz.Phylo.Metrics.Clustering
Quentin Lobbé's avatar
Quentin Lobbé committed
28
import Gargantext.Viz.Phylo.Aggregates.Cooc
Quentin Lobbé's avatar
Quentin Lobbé committed
29 30
import qualified Data.Map    as Map

31 32 33
import qualified Data.Vector.Storable as VS
import Debug.Trace (trace)
import Numeric.Statistics (percentile)
Quentin Lobbé's avatar
Quentin Lobbé committed
34

35

Quentin Lobbé's avatar
Quentin Lobbé committed
36 37 38 39 40 41 42
-- | Optimisation to filter only relevant candidates
getCandidates :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
                 $ filter (\(g,g') -> g /= g')
                 $ listToDirectedCombi gs


43
-- | To transform a Graph into Clusters
44
graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
45
graphToClusters clust (nodes,edges) = case clust of
46 47 48 49 50 51
      Louvain (LouvainParams _)      -> undefined
      RelatedComponents (RCParams _) -> relatedComp 0 (head' "graphToClusters" nodes) (tail nodes,edges) [] []
      _                              -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"


-- | To transform a list of PhyloGroups into a Graph of Proximity
Quentin Lobbé's avatar
Quentin Lobbé committed
52 53 54 55
groupsToGraph :: Proximity -> [PhyloGroup] -> Map (Int, Int) Double -> Phylo -> ([GroupNode],[GroupEdge])
groupsToGraph prox gs cooc p = case prox of 
      WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), traceSim x y (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc) p  
                                                                         $ weightedLogJaccard sens (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
Quentin Lobbé's avatar
Quentin Lobbé committed
56 57 58
                                                                         $ getCandidates gs)
      Hamming (HammingParams _)             -> (gs, map (\(x,y) -> ((x,y), hamming (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
                                                                         $ getCandidates gs)
59 60
      _                                     -> undefined 

Quentin Lobbé's avatar
Quentin Lobbé committed
61

62 63 64 65 66 67 68 69 70
-- | To filter a Graph of Proximity using a given threshold
filterGraph :: Proximity -> ([GroupNode],[GroupEdge]) -> ([GroupNode],[GroupEdge])
filterGraph prox (ns,es) = case prox of
      WeightedLogJaccard (WLJParams thr _) -> (ns, filter (\(_,v) -> v >= thr) es)
      Hamming (HammingParams thr)          -> (ns, filter (\(_,v) -> v <= thr) es)
      _                                    -> undefined 


-- | To clusterise a Phylo
71
phyloToClusters :: Level -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster]
72 73 74 75 76 77 78 79 80 81 82 83 84
phyloToClusters lvl clus p = Map.fromList 
                            $ zip periods
                            $ map (\g -> if null (fst g)
                                         then []
                                         else graphToClusters clus g) graphs'
  where
    --------------------------------------
    graphs' :: [([GroupNode],[GroupEdge])]
    graphs' = traceGraphFiltered lvl
            $ map (\g -> filterGraph prox g) graphs
    --------------------------------------
    graphs  :: [([GroupNode],[GroupEdge])]
    graphs  = traceGraph lvl (getThreshold prox) 
Quentin Lobbé's avatar
Quentin Lobbé committed
85
            $ map (\prd -> groupsToGraph prox (getGroupsWithFilters lvl prd p) (getCooc [prd] p) p) periods 
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
    --------------------------------------
    prox :: Proximity
    prox = getProximity clus
    --------------------------------------
    periods :: [PhyloPeriodId]
    periods = getPhyloPeriods p
    --------------------------------------


----------------
-- | Tracer | --
----------------


traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
                                      <> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
Quentin Lobbé's avatar
Quentin Lobbé committed
103
                                      <> show (lst) <> "\n"
104 105 106 107 108
                                      <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
                                                         <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
                                                         <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
                                                         <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
  where 
Quentin Lobbé's avatar
Quentin Lobbé committed
109
    lst = sort $ map snd $ concat $ map snd g 
110 111 112 113 114 115 116 117 118 119


traceGraphFiltered :: Level -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <> " :\n"
                                      <> "count : " <> show (length lst) <> " edges\n"
                                      <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
                                                         <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
                                                         <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
                                                         <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
  where 
Quentin Lobbé's avatar
Quentin Lobbé committed
120
    lst = sort $ map snd $ concat $ map snd g 
121

Quentin Lobbé's avatar
Quentin Lobbé committed
122 123

traceSim :: PhyloGroup -> PhyloGroup -> Map (Int, Int) Double -> Map (Int, Int) Double -> Phylo -> Double -> Double
124
traceSim g g' _ _ p sim = trace (show (getGroupText g p) <> " [vs] " <>  show (getGroupText g' p) <> " = " <> show (sim) <> "\n"
Quentin Lobbé's avatar
Quentin Lobbé committed
125 126
                                 -- <> show (c) <> " [vs] " <> show (c') <>  " = " <> show (sim)
                                 ) sim