LevelMaker.hs 13.8 KB
Newer Older
Quentin Lobbé's avatar
Quentin Lobbé committed
1 2 3 4 5 6 7 8 9 10 11 12
{-|
Module      : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX


-}

13
{-# LANGUAGE NoImplicitPrelude    #-}
14
{-# LANGUAGE FlexibleContexts     #-}
15 16
{-# LANGUAGE OverloadedStrings    #-}
{-# LANGUAGE TypeSynonymInstances #-}
17
{-# LANGUAGE FlexibleInstances    #-}
Quentin Lobbé's avatar
Quentin Lobbé committed
18 19 20 21 22

module Gargantext.Viz.Phylo.LevelMaker
  where

import Control.Lens                 hiding (both, Level)
Alexandre Delanoë's avatar
Alexandre Delanoë committed
23
import Data.List                    ((++), sort, concat, nub, zip, last)
Quentin Lobbé's avatar
Quentin Lobbé committed
24
import Data.Map                     (Map, (!), empty, singleton)
25
import Data.Text (Text)
Quentin Lobbé's avatar
Quentin Lobbé committed
26
import Data.Tuple.Extra
Alexandre Delanoë's avatar
Alexandre Delanoë committed
27
import Gargantext.Prelude
28
import Gargantext.Viz.Phylo
29
import Gargantext.Viz.Phylo.Aggregates.Cluster
30
import Gargantext.Viz.Phylo.Aggregates.Document
31
import Gargantext.Viz.Phylo.Aggregates.Fis
32
import Gargantext.Viz.Phylo.BranchMaker
33 34
import Gargantext.Viz.Phylo.LinkMaker
import Gargantext.Viz.Phylo.Tools
35
import Gargantext.Text.Context (TermList)
36 37

import qualified Data.Vector.Storable as VS
Quentin Lobbé's avatar
Quentin Lobbé committed
38
import qualified Data.Set    as Set
39 40 41 42
import qualified Data.Vector as Vector

import Debug.Trace (trace)
import Numeric.Statistics (percentile)
Quentin Lobbé's avatar
Quentin Lobbé committed
43 44 45


-- | A typeClass for polymorphic PhyloLevel functions
46
class PhyloLevelMaker aggregate
Quentin Lobbé's avatar
Quentin Lobbé committed
47 48 49 50 51 52 53
    where
        -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
        addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
        -- | To create a list of PhyloGroups based on a list of aggregates a
        toPhyloGroups :: Level -> (Date,Date)  -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]


54
instance PhyloLevelMaker PhyloCluster
Quentin Lobbé's avatar
Quentin Lobbé committed
55 56
  where
    --------------------------------------
57
    -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
Quentin Lobbé's avatar
Quentin Lobbé committed
58 59
    addPhyloLevel lvl m p
      | lvl > 1   = toPhyloLevel lvl m p
60
      | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
Quentin Lobbé's avatar
Quentin Lobbé committed
61 62
    --------------------------------------
    -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
Quentin Lobbé's avatar
Quentin Lobbé committed
63
    toPhyloGroups lvl (d,d') l m _ = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m) $ zip [1..] l
Quentin Lobbé's avatar
Quentin Lobbé committed
64 65 66
    --------------------------------------


67
instance PhyloLevelMaker PhyloFis
Quentin Lobbé's avatar
Quentin Lobbé committed
68 69
  where
    --------------------------------------
70
    -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
Quentin Lobbé's avatar
Quentin Lobbé committed
71 72
    addPhyloLevel lvl m p
      | lvl == 1  = toPhyloLevel lvl m p
73
      | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
Quentin Lobbé's avatar
Quentin Lobbé committed
74 75
    --------------------------------------
    -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
Quentin Lobbé's avatar
Quentin Lobbé committed
76
    toPhyloGroups lvl (d,d') l _ p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
Quentin Lobbé's avatar
Quentin Lobbé committed
77 78 79
    --------------------------------------


80
instance PhyloLevelMaker Document
Quentin Lobbé's avatar
Quentin Lobbé committed
81 82
  where
    --------------------------------------
83 84
    -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
    addPhyloLevel lvl m p
85
      | lvl == 0  = toPhyloLevel lvl m p
86
      | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
Quentin Lobbé's avatar
Quentin Lobbé committed
87 88
    --------------------------------------
    -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
89 90 91
    toPhyloGroups lvl (d,d') l _m p = map (\(idx,ngram) -> ngramsToGroup (d,d') lvl idx ngram [ngram] p)
                                          $ zip [1..]
                                          $ (nub . concat)
92
                                          $ map text l
Quentin Lobbé's avatar
Quentin Lobbé committed
93 94 95
    --------------------------------------


96
-- | To transform a Cluster into a Phylogroup
Quentin Lobbé's avatar
Quentin Lobbé committed
97 98 99
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> PhyloGroup
clusterToGroup prd lvl idx lbl groups _m =
    PhyloGroup ((prd, lvl), idx) lbl ngrams empty Nothing [] [] [] (map (\g -> (getGroupId g, 1)) groups)
100 101 102
      where
        --------------------------------------
        ngrams :: [Int]
103
        ngrams = (sort . nub . concat) $ map getGroupNgrams groups
104
        --------------------------------------
Quentin Lobbé's avatar
Quentin Lobbé committed
105 106 107


-- | To transform a Clique into a PhyloGroup
Quentin Lobbé's avatar
Quentin Lobbé committed
108 109 110
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
cliqueToGroup prd lvl idx lbl fis p =
    PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing [] [] [] []
Quentin Lobbé's avatar
Quentin Lobbé committed
111 112 113
      where
        --------------------------------------
        ngrams :: [Int]
Quentin Lobbé's avatar
Quentin Lobbé committed
114
        ngrams = sort $ map (\x -> getIdxInRoots x p)
Quentin Lobbé's avatar
Quentin Lobbé committed
115
                      $ Set.toList
116
                      $ getClique fis
Quentin Lobbé's avatar
Quentin Lobbé committed
117 118 119 120 121 122
        --------------------------------------


-- | To transform a list of Ngrams into a PhyloGroup
ngramsToGroup ::  PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
ngramsToGroup prd lvl idx lbl ngrams p =
Quentin Lobbé's avatar
Quentin Lobbé committed
123
    PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing [] [] [] []
Quentin Lobbé's avatar
Quentin Lobbé committed
124 125 126


-- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
127 128 129
toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
toPhyloLevel lvl m p = alterPhyloPeriods
                        (\period -> let pId = _phylo_periodId period
Quentin Lobbé's avatar
Quentin Lobbé committed
130 131
                                    in  over (phylo_periodLevels)
                                        (\phyloLevels ->
132
                                          let groups = toPhyloGroups lvl pId (m ! pId) m p
Quentin Lobbé's avatar
Quentin Lobbé committed
133 134
                                          in  phyloLevels ++ [PhyloLevel (pId, lvl) groups]
                                        ) period) p
135 136


137
-- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
138
toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
139
toNthLevel lvlMax prox clus p
140
  | lvl >= lvlMax = p
141
  | otherwise     = toNthLevel lvlMax prox clus
142
                  $ traceBranches (lvl + 1)
143
                  $ setPhyloBranches (lvl + 1)
144 145 146 147 148
                  -- $ traceTempoMatching Descendant (lvl + 1)
                  -- $ interTempoMatching Descendant (lvl + 1) prox
                  -- $ traceTempoMatching Ascendant  (lvl + 1)
                  -- $ interTempoMatching Ascendant  (lvl + 1) prox
                  $ transposePeriodLinks (lvl + 1)
149 150
                  $ setLevelLinks (lvl, lvl + 1)
                  $ addPhyloLevel (lvl + 1)
151
                    (phyloToClusters lvl clus p) p
152 153
  where
    --------------------------------------
154 155
    lvl :: Level
    lvl = getLastLevel p
156
    --------------------------------------
157 158


159
-- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
160
toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo
161
toPhylo1 clus prox metrics filters d p = case clus of
162 163 164
  Fis (FisParams k s t) -> traceBranches 1 
                       $ setPhyloBranches 1
                       $ traceTempoMatching Descendant 1
165
                       $ interTempoMatching Descendant 1 prox
166 167
                       $ traceTempoMatching Ascendant 1
                       $ interTempoMatching Ascendant 1 prox
168 169 170 171
                       $ setLevelLinks (0,1)
                       $ setLevelLinks (1,0)
                       $ addPhyloLevel 1 phyloFis p
    where
172 173
      --------------------------------------
      phyloFis :: Map (Date, Date) [PhyloFis]
174
      phyloFis = toPhyloFis d k s t metrics filters
175 176
      --------------------------------------

177
  _   -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
178 179 180


-- | To reconstruct the Level 0 of a Phylo
181
toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo
182 183 184
toPhylo0 d p = addPhyloLevel 0 d p


Quentin Lobbé's avatar
Quentin Lobbé committed
185 186
class PhyloMaker corpus
    where
187 188
        toPhylo ::  PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Phylo
        toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Phylo
Quentin Lobbé's avatar
Quentin Lobbé committed
189 190 191
        corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]

instance PhyloMaker [(Date, Text)]
192
  where
193
    --------------------------------------
194
    toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
Quentin Lobbé's avatar
Quentin Lobbé committed
195 196 197 198 199 200 201 202 203 204 205 206
      where
        --------------------------------------
        phylo1 :: Phylo
        phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
        --------------------------------------
        phylo0 :: Phylo
        phylo0 = toPhylo0 phyloDocs phyloBase
        --------------------------------------
        phyloDocs :: Map (Date, Date) [Document]
        phyloDocs = corpusToDocs c phyloBase
        --------------------------------------
        phyloBase :: Phylo
207
        phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
Quentin Lobbé's avatar
Quentin Lobbé committed
208
        --------------------------------------       
209
    --------------------------------------
210
    toPhyloBase q p c roots termList = initPhyloBase periods foundations p
Quentin Lobbé's avatar
Quentin Lobbé committed
211 212
      where
        --------------------------------------
213 214
        foundations :: PhyloFoundations
        foundations = PhyloFoundations (initFoundationsRoots roots) termList
Quentin Lobbé's avatar
Quentin Lobbé committed
215 216 217 218 219
        --------------------------------------
        periods :: [(Date,Date)]
        periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
                $ both fst (head' "LevelMaker" c,last c)
        --------------------------------------
220
    --------------------------------------
221
    corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundationsRoots p) c
222 223


Quentin Lobbé's avatar
Quentin Lobbé committed
224
instance PhyloMaker [Document]
225 226
  where
    --------------------------------------
227
    toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
Quentin Lobbé's avatar
Quentin Lobbé committed
228 229 230 231 232 233 234 235 236 237 238 239
      where
        --------------------------------------
        phylo1 :: Phylo
        phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
        --------------------------------------
        phylo0 :: Phylo
        phylo0 = toPhylo0 phyloDocs phyloBase
        --------------------------------------
        phyloDocs :: Map (Date, Date) [Document]
        phyloDocs = corpusToDocs c phyloBase
        --------------------------------------
        phyloBase :: Phylo
240
        phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
Quentin Lobbé's avatar
Quentin Lobbé committed
241
        --------------------------------------       
242
    --------------------------------------
243
    toPhyloBase q p c roots termList = initPhyloBase periods foundations p
Quentin Lobbé's avatar
Quentin Lobbé committed
244 245
      where
        --------------------------------------
246 247
        foundations :: PhyloFoundations
        foundations = PhyloFoundations (initFoundationsRoots roots) termList
Quentin Lobbé's avatar
Quentin Lobbé committed
248 249 250 251 252
        --------------------------------------
        periods :: [(Date,Date)]
        periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
                $ both date (head' "LevelMaker" c,last c)
        --------------------------------------
253
    --------------------------------------
254 255 256 257 258 259 260 261 262
    corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c


-----------------
-- | Tracers | --
-----------------


tracePhyloBase :: Phylo -> Phylo
263
tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n" 
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
                        <> show (length $ _phylo_periods p) <> " periods from " 
                                 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
                                 <> " to " 
                                 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
                                 <> "\n"
                        <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p


traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
                                    <> "count : " <> show (length pts) <> " pointers\n"
                                    <> "similarity : " <> show (percentile 25 (VS.fromList sim)) <> " (25%) "
                                                       <> show (percentile 50 (VS.fromList sim)) <> " (50%) "
                                                       <> show (percentile 75 (VS.fromList sim)) <> " (75%) "
                                                       <> show (percentile 90 (VS.fromList sim)) <> " (90%)\n") p
  where 
    --------------------------------------
    sim :: [Double]
    sim = sort $ map snd pts 
    --------------------------------------
    pts :: [Pointer]
    pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
    --------------------------------------


traceBranches :: Level -> Phylo -> Phylo
traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
291
                           <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
292 293 294 295 296 297 298 299 300 301 302
                           <> "count : " <> show (length $ getGroupsWithLevel lvl p)    <> " groups\n"
                           <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
                                                    <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
                                                    <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
                                                    <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
  where
    --------------------------------------
    brs :: [Double]
    brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
        $ filter (\(id,_) -> (fst id) == lvl)
        $ getGroupsByBranches p
303
    --------------------------------------