1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
module Gargantext.Viz.Phylo.LevelMaker
where
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level)
import Data.List ((++), sort, concat, nub, zip, last, null)
import Data.Map (Map, (!), empty, singleton, size)
import Data.Text (Text)
import Data.Tuple.Extra
import Data.Vector (Vector)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Metrics
import Gargantext.Viz.Phylo.Aggregates
import Gargantext.Viz.Phylo.Cluster
import Gargantext.Viz.Phylo.BranchMaker
import Gargantext.Viz.Phylo.LinkMaker
import Gargantext.Viz.Phylo.Tools
import Gargantext.Text.Context (TermList)
import qualified Data.Vector.Storable as VS
import qualified Data.Set as Set
import qualified Data.Vector as Vector
import Debug.Trace (trace)
import Numeric.Statistics (percentile)
-------------------------
-- | PhyloLevelMaker | --
-------------------------
-- | A typeClass for polymorphic PhyloLevel functions
class PhyloLevelMaker aggregate
where
-- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
-- | To create a list of PhyloGroups based on a list of aggregates a
toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
instance PhyloLevelMaker PhyloCluster
where
--------------------------------------
-- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
addPhyloLevel lvl m p
| lvl > 1 = addPhyloLevel' lvl m p
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
--------------------------------------
-- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l m p =
let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
clusters' = clusters `using` parList rdeepseq
in clusters'
--------------------------------------
instance PhyloLevelMaker PhyloFis
where
--------------------------------------
-- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
addPhyloLevel lvl m p
| lvl == 1 = addPhyloLevel' lvl m p
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
--------------------------------------
-- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l _ p =
let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis (getPhyloCooc p) (getFoundationsRoots p)) $ zip [1..] l
groups' = groups `using` parList rdeepseq
in groups'
--------------------------------------
instance PhyloLevelMaker Document
where
--------------------------------------
-- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
addPhyloLevel lvl m p
| lvl == 0 = addPhyloLevel' lvl m p
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
--------------------------------------
-- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
$ (nub . concat)
$ map text l
--------------------------------------
-- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
addPhyloLevel' :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
addPhyloLevel' lvl m p = alterPhyloPeriods
(\period -> let pId = _phylo_periodId period
in over (phylo_periodLevels)
(\phyloLevels ->
let groups = toPhyloGroups lvl pId (m ! pId) m p
in trace (show (length groups) <> " groups for " <> show (pId) ) $ phyloLevels ++ [PhyloLevel (pId, lvl) groups]
) period) p
----------------------
-- | toPhyloGroup | --
----------------------
-- | To transform a Clique into a PhyloGroup
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Map Date (Map (Int,Int) Double) -> Vector Ngrams -> PhyloGroup
cliqueToGroup prd lvl idx lbl fis cooc' root = PhyloGroup ((prd, lvl), idx) lbl ngrams
(getNgramsMeta cooc ngrams)
-- empty
(singleton "support" (fromIntegral $ getSupport fis))
Nothing
cooc
[] [] [] childs
where
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) cooc'
--------------------------------------
ngrams :: [Int]
ngrams = sort $ map (\x -> getIdxInRoots' x root)
$ Set.toList
$ getClique fis
--------------------------------------
childs :: [Pointer]
childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
--------------------------------------
-- | To transform a Cluster into a Phylogroup
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
clusterToGroup prd lvl idx lbl groups _m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams
(getNgramsMeta cooc ngrams)
-- empty
empty
Nothing
cooc
ascLink desLink [] childs
where
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)
--------------------------------------
childs :: [Pointer]
childs = map (\g -> (getGroupId g, 1)) groups
ascLink = concat $ map getGroupPeriodParents groups
desLink = concat $ map getGroupPeriodChilds groups
--------------------------------------
ngrams :: [Int]
ngrams = (sort . nub . concat) $ map getGroupNgrams groups
--------------------------------------
-- | To transform a list of Ngrams into a PhyloGroup
ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing
(getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
[] [] [] []
----------------------
-- | toPhyloLevel | --
----------------------
-- | To reconstruct the Phylo from a set of Document to a given Level
toPhylo :: PhyloQueryBuild -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
where
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phyloBase
-- phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo
--------------------------------------
-- phylo0 :: Phylo
-- phylo0 = tracePhyloN 0
-- $ addPhyloLevel 0 phyloDocs phyloBase
--------------------------------------
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
--------------------------------------
phyloBase :: Phylo
phyloBase = tracePhyloBase
$ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c termList fis
--------------------------------------
-- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
toNthLevel lvlMax prox clus p
| lvl >= lvlMax = p
| otherwise = toNthLevel lvlMax prox clus
$ traceBranches (lvl + 1)
$ setPhyloBranches (lvl + 1)
-- $ transposePeriodLinks (lvl + 1)
$ traceTranspose (lvl + 1) Descendant
$ transposeLinks (lvl + 1) Descendant
$ traceTranspose (lvl + 1) Ascendant
$ transposeLinks (lvl + 1) Ascendant
$ tracePhyloN (lvl + 1)
$ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1)
(clusters) p
where
--------------------------------------
clusters :: Map (Date,Date) [PhyloCluster]
clusters = phyloToClusters lvl clus p
--------------------------------------
lvl :: Level
lvl = getLastLevel p
--------------------------------------
-- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox d p = case clus of
Fis (FisParams k s t) -> traceBranches 1
-- $ reLinkPhyloBranches 1
-- $ traceBranches 1
$ setPhyloBranches 1
$ traceTempoMatching Descendant 1
$ interTempoMatching Descendant 1 prox
$ traceTempoMatching Ascendant 1
$ interTempoMatching Ascendant 1 prox
$ tracePhyloN 1
-- $ setLevelLinks (0,1)
$ addPhyloLevel 1 (getPhyloFis phyloFis)
$ trace (show (size $ getPhyloFis phyloFis) <> " Fis created") $ phyloFis
where
--------------------------------------
phyloFis :: Phylo
phyloFis = if (null $ getPhyloFis p)
then p & phylo_fis .~ refineFis (docsToFis d p) k s t
else p & phylo_fis .~ docsToFis d p
--------------------------------------
_ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
-- | To create the base of the Phylo (foundations, periods, cooc, etc)
toPhyloBase :: PhyloQueryBuild -> PhyloParam -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
toPhyloBase q p c termList fis = initPhyloBase periods foundations nbDocs cooc fis p
where
--------------------------------------
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
--------------------------------------
nbDocs :: Map Date Double
nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
--------------------------------------
foundations :: PhyloFoundations
foundations = PhyloFoundations (initFoundationsRoots (termListToNgrams termList)) termList
--------------------------------------
periods :: [(Date,Date)]
periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
$ both date (head' "toPhyloBase" c, last' "toPhyloBase" c)
--------------------------------------
-----------------
-- | Tracers | --
-----------------
tracePhyloN :: Level -> Phylo -> Phylo
tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
<> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
traceTranspose :: Level -> Filiation -> Phylo -> Phylo
traceTranspose lvl fil p = trace ("----\n Transpose " <> show (fil) <> " links for " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n") p
tracePhyloBase :: Phylo -> Phylo
tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
<> show (length $ _phylo_periods p) <> " periods from "
<> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
<> " to "
<> show (getPhyloPeriodId $ last $ _phylo_periods p)
<> "\n"
<> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
<> "count : " <> show (length pts) <> " pointers\n") p
where
--------------------------------------
pts :: [Pointer]
pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
--------------------------------------
traceBranches :: Level -> Phylo -> Phylo
traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
<> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
<> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
<> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
<> show (percentile 50 (VS.fromList brs)) <> " (50%) "
<> show (percentile 75 (VS.fromList brs)) <> " (75%) "
<> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
where
--------------------------------------
brs :: [Double]
brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
$ filter (\(id,_) -> (fst id) == lvl)
$ getGroupsByBranches p
--------------------------------------