Fis.hs 6.58 KB
Newer Older
Quentin Lobbé's avatar
Quentin Lobbé committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
{-|
Module      : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX


-}

{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts  #-}
{-# LANGUAGE OverloadedStrings #-}

module Gargantext.Viz.Phylo.Aggregates.Fis
  where

20 21
import Data.List        (null,concat,sort)
import Data.Map         (Map, empty,elems)
Quentin Lobbé's avatar
Quentin Lobbé committed
22
import Data.Tuple       (fst, snd)
23
import Data.Set         (size)
Alexandre Delanoë's avatar
Alexandre Delanoë committed
24
import Gargantext.Prelude
Quentin Lobbé's avatar
Quentin Lobbé committed
25 26 27 28
import Gargantext.Text.Metrics.FrequentItemSet  (fisWithSizePolyMap, Size(..))
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import qualified Data.Map    as Map
29 30 31 32 33 34
import qualified Data.Set    as Set
import qualified Data.Vector.Storable as Vector

import Numeric.Statistics (percentile)

import Debug.Trace (trace)
Quentin Lobbé's avatar
Quentin Lobbé committed
35 36


37 38 39 40 41
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterFis :: Bool -> Int -> (Int -> [PhyloFis] -> [PhyloFis]) -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFis keep thr f m = case keep of
  False -> Map.map (\l -> f thr l) m
  True  -> Map.map (\l -> keepFilled (f) thr l) m
Quentin Lobbé's avatar
Quentin Lobbé committed
42 43


44 45 46
-- | To filter Fis with small Support
filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis]
filterFisBySupport thr l = filter (\fis -> getSupport fis > thr) l
47 48


49 50 51
-- | To filter Fis with small Clique size
filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis]
filterFisByClique thr l = filter (\fis -> (size $ getClique fis) > thr) l
Quentin Lobbé's avatar
Quentin Lobbé committed
52 53 54


-- | To filter nested Fis 
55
filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
Alexandre Delanoë's avatar
Alexandre Delanoë committed
56
filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head' "Fis" $ map getClique l) (map getClique l) []
57
                               in  filter (\fis -> elem (getClique fis) cliqueMax) l)
Quentin Lobbé's avatar
Quentin Lobbé committed
58 59 60


-- | To transform a list of Documents into a Frequent Items Set 
61
docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) [PhyloFis]
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
docsToFis docs = map (\d -> let fs = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text d)
                            in map (\f -> PhyloFis (fst f) (snd f) empty) fs) docs


-- | To process a list of Filters on top of the PhyloFis
processFilters :: [Filter] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
processFilters filters phyloFis
  | null filters = phyloFis
  | otherwise    = panic "[ERR][Viz.Phylo.LevelMaker.processFilters] please add some filters for the Fis"


-- | To process a list of Metrics on top of the PhyloFis
processMetrics :: [Metric] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
processMetrics metrics phyloFis
  | null metrics = phyloFis
  | otherwise    = panic "[ERR][Viz.Phylo.LevelMaker.processMetrics] please add some metrics for the Fis"


-- | To transform some Documents into PhyloFis and apply a List of Metrics and Filters
81 82 83
toPhyloFis :: Map (Date, Date) [Document] -> Bool -> Support -> Int -> [Metric] -> [Filter] -> Map (Date, Date) [PhyloFis]
toPhyloFis ds k s t ms fs = processFilters fs  
                          $ processMetrics ms
84
                          $ traceFis "----\nFiltered Fis by clique size :\n"
85
                          $ filterFis k t (filterFisByClique)
86
                          $ traceFis "----\nFiltered Fis by nested :\n"
87
                          $ filterFisByNested 
88
                          $ traceFis "----\nFiltered Fis by support :\n"
89
                          $ filterFis k s (filterFisBySupport)
90
                          $ traceFis "----\nUnfiltered Fis :\n"
91
                          $ docsToFis ds  
92 93 94 95 96 97


-----------------
-- | Tracers | --
-----------------

98 99


100 101
traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis] 
traceFis lbl m = trace (lbl <> "count : " <> show (sum $ map length $ elems m) <> " Fis\n"
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
                            <> "support : " <> show (percentile 25 (Vector.fromList supps)) <> " (25%) "
                                            <> show (percentile 50 (Vector.fromList supps)) <> " (50%) "
                                            <> show (percentile 75 (Vector.fromList supps)) <> " (75%) "
                                            <> show (percentile 90 (Vector.fromList supps)) <> " (90%) "
                                            <> show (percentile 100 (Vector.fromList supps)) <> " (100%)\n"
                            <> "          " <> show (countSup 1 supps) <> " (>1) "
                                            <> show (countSup 2 supps) <> " (>2) "
                                            <> show (countSup 3 supps) <> " (>3) "
                                            <> show (countSup 4 supps) <> " (>4) "
                                            <> show (countSup 5 supps) <> " (>5) "
                                            <> show (countSup 6 supps) <> " (>6)\n"                                                                                                                                          
                            <> "clique size : " <> show (percentile 25 (Vector.fromList ngrms)) <> " (25%) "
                                                <> show (percentile 50 (Vector.fromList ngrms)) <> " (50%) "
                                                <> show (percentile 75 (Vector.fromList ngrms)) <> " (75%) "
                                                <> show (percentile 90 (Vector.fromList ngrms)) <> " (90%) "
                                                <> show (percentile 100 (Vector.fromList ngrms)) <> " (100%)\n"
                            <> "              " <> show (countSup 1 ngrms) <> " (>1) "
                                                <> show (countSup 2 ngrms) <> " (>2) "
                                                <> show (countSup 3 ngrms) <> " (>3) "
                                                <> show (countSup 4 ngrms) <> " (>4) "
                                                <> show (countSup 5 ngrms) <> " (>5) "
                                                <> show (countSup 6 ngrms) <> " (>6)\n"                                                                                             
124
                            ) m
125 126 127 128 129 130 131 132 133 134 135
  where
    --------------------------------------
    countSup :: Double -> [Double] -> Int
    countSup s l = length $ filter (>s) l 
    --------------------------------------
    supps :: [Double]
    supps = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems m
    --------------------------------------
    ngrms :: [Double]
    ngrms = sort $ map (\f -> fromIntegral $ Set.size $ _phyloFis_clique f) $ concat $ elems m
    --------------------------------------