1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
{-|
Module : Gargantext.Core.Viz.Phylo.PhyloExport
Description : Exportation module of a Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE TypeSynonymInstances #-}
module Gargantext.Core.Viz.Phylo.PhyloExport where
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.GraphViz hiding (DotGraph, Order)
import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
import Data.GraphViz.Attributes.HTML qualified as H
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.GraphViz.Types.Monadic
import Data.List (nub, union, (\\), (!!), init, partition, nubBy, elemIndex)
import Data.Map (fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, member, toList)
import Data.Maybe (fromJust)
import Data.Text qualified as Text
import Data.Text.Lazy (pack, unpack)
import Data.Text.Lazy qualified as Lazy
import Data.Vector (Vector)
import Data.Vector qualified as Vector
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toSimilarity, getNextPeriods)
import Gargantext.Prelude hiding (empty, scale, toList)
--------------------
-- | Dot export | --
--------------------
dotToFile :: FilePath -> DotGraph DotId -> IO ()
dotToFile filePath dotG = writeFile filePath $ Text.pack $ dotToString dotG
dotToString :: DotGraph DotId -> [Char]
dotToString dotG = unpack (printDotGraph dotG)
dynamicToColor :: Int -> H.Attribute
dynamicToColor d
| d == 0 = H.BGColor (toColor LightCoral)
| d == 1 = H.BGColor (toColor Khaki)
| d == 2 = H.BGColor (toColor SkyBlue)
| otherwise = H.Color (toColor Black)
pickLabelColor :: [Int] -> H.Attribute
pickLabelColor lst
| elem 0 lst = dynamicToColor 0
| elem 1 lst = dynamicToColor 1
| elem 2 lst = dynamicToColor 2
| otherwise = dynamicToColor 3
toDotLabel :: Text.Text -> Label
toDotLabel lbl = StrLabel $ fromStrict lbl
toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
toAttr k v = customAttribute k v
metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
metaToAttr meta = map (\(k, v) -> toAttr (fromStrict k) $ (fromStrict . unwords) $ map show v) $ toList meta
groupIdToDotId :: PhyloGroupId -> DotId
groupIdToDotId (((d, d'), lvl), idx) =
(fromStrict . Text.pack) $ "group" <> show d <> show d' <> show lvl <> show idx
branchIdToDotId :: PhyloBranchId -> DotId
branchIdToDotId bId = (fromStrict . Text.pack) $ "branch" <> show (snd bId)
periodIdToDotId :: Period -> DotId
periodIdToDotId prd = (fromStrict . Text.pack) $ "period" <> show (fst prd) <> show (snd prd)
branchToDotNode :: PhyloBranch -> Int -> Dot DotId
branchToDotNode b bId =
node (branchIdToDotId $ b ^. branch_id)
( [ FillColor [toWColor CornSilk]
, FontName "Arial"
, FontSize 40
, Shape Egg
, Style [SItem Bold []] ]
<> (metaToAttr $ b ^. branch_meta)
<> [ toAttr "nodeType" "branch"
, toAttr "bId" (show bId)
, toAttr "branchId" (fromStrict $ unwords (map show $ snd $ b ^. branch_id))
, toAttr "branch_x" (fromStrict $ show $ b ^. branch_x)
, toAttr "branch_y" (fromStrict $ show $ b ^. branch_y)
, toAttr "label" (show $ b ^. branch_label)
])
periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
periodToDotNode prd prd' =
node (periodIdToDotId prd) $
[ Shape BoxShape
, FontSize 50
, Label $ toDotLabel $ Text.pack $ show (fst prd) <> " " <> show (snd prd) ]
<> [ toAttr "nodeType" "period"
, toAttr "strFrom" $ fromStrict $ show $ fst prd'
, toAttr "strTo" $ fromStrict $ show $ snd prd'
, toAttr "from" $ fromStrict $ show $ fst prd
, toAttr "to" $ fromStrict $ show $ snd prd ]
groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
groupToDotNode fdt g bId =
node (groupIdToDotId $ getGroupId g)
([FontName "Arial", Shape Square, penWidth 4]
<> [ toAttr "nodeType" "group"
, toAttr "gid" (groupIdToDotId $ getGroupId g)
, toAttr "from" (show (fst $ g ^. phylo_groupPeriod))
, toAttr "to" (show (snd $ g ^. phylo_groupPeriod))
, toAttr "strFrom" (show (fst $ g ^. phylo_groupPeriod'))
, toAttr "strTo" (show (snd $ g ^. phylo_groupPeriod'))
, toAttr "branchId" (fromStrict $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
, toAttr "bId" (show bId)
, toAttr "support" (show (g ^. phylo_groupSupport))
, toAttr "weight" (show (g ^. phylo_groupWeight))
, toAttr "source" (show (nub $ g ^. phylo_groupSources))
, toAttr "sourceFull" (show (g ^. phylo_groupSources))
, toAttr "density" (show (g ^. phylo_groupDensity))
, toAttr "cooc" (show (g ^. phylo_groupCooc))
, toAttr "lbl" (Lazy.fromStrict $ ngramsToLabel fdt (g ^. phylo_groupNgrams))
, toAttr "foundation" (show (idxToLabel (g ^. phylo_groupNgrams)))
, toAttr "role" (show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
, toAttr "frequence" (show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
, toAttr "seaLvl" (show ((g ^. phylo_groupMeta) ! "seaLevels"))
])
toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
toDotEdge' source target thr w edgeType = edge source target
(case edgeType of
GroupToGroup -> undefined
GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
BranchToGroup -> undefined
BranchToBranch -> undefined
GroupToAncestor -> undefined
PeriodToPeriod -> undefined)
toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
toDotEdge source target lbl edgeType = edge source target
(case edgeType of
GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
GroupToGroupMemory -> undefined
BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
mergePointers groups =
let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
in unionWith (\w w' -> max w w') toChilds toParents
mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
mergePointersMemory groups =
let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
in concat [toChilds,toParents]
mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
mergeAncestors groups = concat
$ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
$ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
toBid :: PhyloGroup -> [PhyloBranch] -> Int
toBid g bs =
let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
in fromJust $ elemIndex b' bs
exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
exportToDot phylo export =
tracePhylo ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
<> show(length $ export ^. export_groups) <> " groups "
<> show(length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups)
<> " terms to a dot file\n\n"
<> "##########################" :: Text ) $
digraph ((Str . fromStrict) $ phyloName $ getConfig phylo) $ do
{- 1) init the dot graph -}
graphAttrs ( [ Label (toDotLabel $ phyloName $ getConfig phylo)]
<> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
, Ratio FillRatio
-- , Ratio AutoRatio
, Style [SItem Filled []],Color [toWColor White]]
{-- home made attributes -}
<> [ toAttr (fromStrict "phyloFoundations") $ show (length $ Vector.toList $ getRoots phylo)
, toAttr (fromStrict "phyloTerms") $ show (length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups)
, toAttr (fromStrict "phyloDocs") $ show (sum $ elems $ getDocsByDate phylo)
, toAttr (fromStrict "phyloPeriods") $ show (length $ elems $ phylo ^. phylo_periods)
, toAttr (fromStrict "phyloBranches") $ show (length $ export ^. export_branches)
, toAttr (fromStrict "phyloGroups") $ show (length $ export ^. export_groups)
, toAttr (fromStrict "phyloSources") $ show (Vector.toList $ getSources phylo)
, toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo
, toAttr (fromStrict "PhyloScale") $ show (getLevel phylo)
, toAttr (fromStrict "phyloQuality") $ show (phylo ^. phylo_quality)
, toAttr (fromStrict "phyloSeaRiseStart") $ show (getPhyloSeaRiseStart phylo)
, toAttr (fromStrict "phyloSeaRiseSteps") $ show (getPhyloSeaRiseSteps phylo)
-- ,(toAttr (fromStrict "phyloTermsFreq") $ show (toList $ _phylo_lastTermFreq phylo))
])
{-
-- toAttr (fromStrict k) $ (pack . unwords) $ map show v
-- 2) create a layer for the branches labels -}
subgraph (Str "Branches peaks") $ do
-- graphAttrs [Rank SameRank]
{-
-- 3) group the branches by hierarchy
-- mapM (\branches ->
-- subgraph (Str "Branches clade") $ do
-- graphAttrs [Rank SameRank]
-- -- 4) create a node for each branch
-- mapM branchToDotNode branches
-- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
-}
mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
{-- 5) create a layer for each period -}
_ <- mapM (\period ->
subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
graphAttrs [Rank SameRank]
periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriodStr)
{-- 6) create a node for each group -}
mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
) $ phylo ^. phylo_periods
{-- 7) create the edges between a branch and its first groups -}
_ <- mapM (\(bId,groups) ->
mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
)
$ toList
$ map (\groups -> head' "toDot"
$ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
$ sortOn (fst . _phylo_groupPeriod) groups)
$ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
{- 8) create the edges between the groups -}
_ <- mapM (\((k,k'),v) ->
toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
) $ (toList . mergePointers) $ export ^. export_groups
{- 8-bis) create the edges between the groups -}
{- _ <- mapM (\((k,k'),v) ->
toDotEdge' (groupIdToDotId k) (groupIdToDotId k') (show (fst v)) (show (snd v)) GroupToGroupMemory
) $ mergePointersMemory $ export ^. export_groups -}
_ <- mapM (\((k,k'),v) ->
toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
) $ mergeAncestors $ export ^. export_groups
-- 10) create the edges between the periods
_ <- mapM (\(prd,prd') ->
toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
{- 8) create the edges between the branches
-- _ <- mapM (\(bId,bId') ->
-- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
-- (Text.pack $ show(branchIdsToSimilarity bId bId'
-- (getThresholdInit $ phyloSimilarity $ getConfig phylo)
-- (getThresholdStep $ phyloSimilarity $ getConfig phylo))) BranchToBranch
-- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
-}
graphAttrs [Rank SameRank]
----------------
-- | Filter | --
----------------
filterByBranchSize :: Double -> PhyloExport -> PhyloExport
filterByBranchSize thr export =
let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
in export & export_branches .~ (fst splited)
& export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
processFilters filters qua export =
foldl (\export' f -> case f of
ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
else filterByBranchSize thr export'
) export filters
--------------
-- | Sort | --
--------------
branchToIso :: [PhyloBranch] -> [PhyloBranch]
branchToIso branches =
let steps = map sum
$ inits
$ map (\(b,x) -> b ^. branch_y + 0.05 - x)
$ zip branches
$ ([0] ++ (map (\(b,b') ->
let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
in
if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
then (b' ^. branch_seaLevel) !! (lmin - 1)
else (b' ^. branch_seaLevel) !! (idx - 1)
) $ listToSeq branches))
in map (\(x,b) -> b & branch_x .~ x)
$ zip steps branches
branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
branchToIso' start step branches =
let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
$ inits
$ ([0] ++ (map (\(b,b') ->
let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
in 1 - start - step * root) $ listToSeq branches))
in map (\(x,b) -> b & branch_x .~ x)
$ zip bx branches
sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
sortByHierarchy depth branches =
if (length branches == 1)
then branches
else concat
$ map (\branches' ->
let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
++ (sortByHierarchy (depth + 1) (snd partitions)))
$ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
$ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
sortByBirthDate :: Order -> PhyloExport -> PhyloExport
sortByBirthDate order export =
let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
branches' = case order of
Asc -> branches
Desc -> reverse branches
in export & export_branches .~ branches'
processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
processSort sort' elev export = case sort' of
ByBirthDate o -> sortByBirthDate o export
ByHierarchy _ -> case elev of
Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches))
Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
Evolving _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
-----------------
-- | Metrics | --
-----------------
-- | Return the conditional probability of i knowing j
conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
conditional m i j = (findWithDefault 0 (i,j) m)
/ (m ! (j,j))
-- | Return the genericity score of a given ngram
genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
- (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
-- | Return the specificity score of a given ngram
specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
- (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
-- | Return the inclusion score of a given ngram
inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
+ (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
ngramsMetrics phylo export =
over ( export_groups
. traverse )
(\g -> g & phylo_groupMeta %~ insert "genericity"
(map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "specificity"
(map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "inclusion"
(map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "frequence"
(map (\n -> getInMap n (getLastRootsFreq phylo)) $ g ^. phylo_groupNgrams)
) export
branchDating :: PhyloExport -> PhyloExport
branchDating export =
over ( export_branches
. traverse )
(\b ->
let groups = sortOn fst
$ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
then acc ++ [g ^. phylo_groupPeriod]
else acc ) [] $ export ^. export_groups
periods = nub groups
birth = fst $ head' "birth" groups
death = snd $ last' "death" groups
age = death - birth
in b & branch_meta %~ insert "birth" [fromIntegral birth]
& branch_meta %~ insert "death" [fromIntegral death]
& branch_meta %~ insert "age" [fromIntegral age]
& branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
processMetrics :: Phylo -> PhyloExport -> PhyloExport
processMetrics phylo export = ngramsMetrics phylo
$ branchDating export
-----------------
-- | Taggers | --
-----------------
nk :: Int -> [[Int]] -> Int
nk n groups = sum
$ map (\g -> if (elem n g)
then 1
else 0) groups
tf :: Int -> [[Int]] -> Double
tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
idf :: Int -> [[Int]] -> Double
idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
findTfIdf :: [[Int]] -> [(Int,Double)]
findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ nub $ concat groups
findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
findEmergences groups freq =
let ngrams = map _phylo_groupNgrams groups
dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
$ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
in reverse $ sortOn snd
$ map (\(n,_) -> if (member n freq)
then (n,freq ! n)
else (n,0)) emerging
mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
mostEmergentTfIdf nth freq foundations export =
over ( export_branches
. traverse )
(\b ->
let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
tfidf = findTfIdf (map _phylo_groupNgrams groups)
emergences = findEmergences groups freq
selected = if (null emergences)
then map fst $ take nth tfidf
else [fst $ head' "mostEmergentTfIdf" emergences]
++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
in b & branch_label .~ (ngramsToLabel foundations selected)) export
getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
$ take nth
$ reverse
$ sortOn snd $ zip [0..] meta
mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
mostInclusive nth foundations export =
over ( export_branches
. traverse )
(\b ->
let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
in b & branch_label .~ lbl ) export
mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
mostEmergentInclusive nth foundations export =
over ( export_groups
. traverse )
(\g ->
let lbl = ngramsToLabel foundations
$ take nth
$ map (\(_,(_,idx)) -> idx)
$ concat
$ map (\groups -> sortOn (fst . snd) groups)
$ groupBy ((==) `on` fst) $ reverse $ sortOn fst
$ zip ((g ^. phylo_groupMeta) ! "inclusion")
$ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
in g & phylo_groupLabel .~ lbl ) export
processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
processLabels labels foundations freq export =
foldl (\export' label ->
case label of
GroupLabel tagger nth ->
case tagger of
MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
_ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
BranchLabel tagger nth ->
case tagger of
MostInclusive -> mostInclusive nth foundations export'
MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
_ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
------------------
-- | Dynamics | --
------------------
-- utiliser & creer une Map FdtId [PhyloGroup]
-- n = index of the current term
toDynamics :: FdtId -> [PhyloGroup] -> PhyloGroup -> Map FdtId (Date,Date) -> Double
toDynamics n elders g m =
let prd = g ^. phylo_groupPeriod
end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
{- decrease -}
then 2
else if ((fst prd) == (fst $ m ! n))
{- emerging -}
then 0
else if isNew
{- emergence -}
then 1
else 3
where
--------------------------------------
isNew :: Bool
isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders
type FdtId = Int
processDynamics :: [PhyloGroup] -> [PhyloGroup]
processDynamics groups =
map (\g ->
let elders = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
&& ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n elders g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
where
--------------------------------------
mapNgrams :: Map FdtId (Date,Date)
mapNgrams = map (\dates ->
let dates' = sort dates
in (head' "dynamics" dates', last' "dynamics" dates'))
$ fromListWith (++)
$ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
$ (g ^. phylo_groupNgrams))) [] groups
-----------------
-- | horizon | --
-----------------
getGroupThr :: Double -> PhyloGroup -> Double
getGroupThr step g =
let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
breaks = (g ^. phylo_groupMeta) ! "breaks"
in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
toAncestor :: Double
-> Map Int Double
-> PhyloSimilarity
-> Double
-> [PhyloGroup]
-> PhyloGroup
-> PhyloGroup
toAncestor nbDocs diago similarity step candidates ego =
let curr = ego ^. phylo_groupAncestors
in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
$ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
$ map (\g -> (g, toSimilarity nbDocs diago similarity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
$ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
headsToAncestors :: Double
-> Map Int Double
-> PhyloSimilarity
-> Double
-> [PhyloGroup]
-> [PhyloGroup]
-> [PhyloGroup]
headsToAncestors nbDocs diago similarity step heads acc =
if (null heads)
then acc
else
let ego = head' "headsToAncestors" heads
heads' = tail' "headsToAncestors" heads
in headsToAncestors nbDocs diago similarity step heads' (acc ++ [toAncestor nbDocs diago similarity step heads' ego])
toHorizon :: Phylo -> Phylo
toHorizon phylo =
let phyloAncestor = updatePhyloGroups
scale
(fromList $ map (\g -> (getGroupId g, g))
$ concat
$ tracePhyloAncestors newGroups) phylo
reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
$ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromScale scale phyloAncestor
in updatePhyloGroups scale reBranched phylo
where
-- | 1) for each periods
periods :: [Period]
periods = getPeriodIds phylo
-- --
scale :: Scale
scale = getLastLevel phylo
-- --
frame :: Int
frame = getTimeFrame $ timeUnit $ getConfig phylo
-- | 2) find ancestors between groups without parents
mapGroups :: [[PhyloGroup]]
mapGroups = map (\prd ->
let groups = getGroupsFromScalePeriods scale [prd] phylo
childs = getPreviousChildIds scale frame prd periods phylo
-- maybe add a better filter for non isolated ancestors
heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
$ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
noHeads = groups \\ heads
nbDocs = sum $ elems $ filterDocs (getDocsByDate phylo) [prd]
diago = reduceDiagos $ filterDiago (getCoocByDate phylo) [prd]
sim = (similarity $ getConfig phylo)
step = case getSeaElevation phylo of
Constante _ s -> s
Adaptative _ -> 0
Evolving _ -> 0
-- in headsToAncestors nbDocs diago Similarity heads groups []
in map (toAncestor nbDocs diago sim step noHeads)
$ headsToAncestors nbDocs diago sim step heads []
) periods
-- | 3) process this task concurrently
newGroups :: [[PhyloGroup]]
newGroups = mapGroups `using` parList rdeepseq
--------------------------------------
getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
getPreviousChildIds lvl frame curr prds phylo =
concat $ map ((map fst) . _phylo_groupPeriodChilds)
$ getGroupsFromScalePeriods lvl (getNextPeriods ToParents frame curr prds) phylo
---------------------
-- | phyloExport | --
---------------------
toPhyloExport :: Phylo -> DotGraph DotId
toPhyloExport phylo = exportToDot phylo
$ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
$ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
$ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (getLastRootsFreq phylo)
$ processMetrics phylo export
where
export :: PhyloExport
export = PhyloExport groups branches
--------------------------------------
branches :: [PhyloBranch]
branches = map (\g ->
let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
breaks = (g ^. phylo_groupMeta) ! "breaks"
canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
in PhyloBranch { _branch_id = g ^. phylo_groupBranchId
, _branch_canonId = canonId
, _branch_seaLevel = seaLvl
, _branch_x = 0
, _branch_y = last' "export" $ take (round $ (last' "export" breaks) + 1) seaLvl
, _branch_w = 0
, _branch_t = 0
, _branch_label = ""
, _branch_meta = empty })
$ map (head' "export")
$ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
$ sortOn (^. phylo_groupBranchId) groups
--------------------------------------
groups :: [PhyloGroup]
groups = traceExportGroups
-- necessaire ?
$ processDynamics
$ getGroupsFromScale (phyloScale $ getConfig phylo)
$ tracePhyloInfo phylo
traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
traceExportBranches branches =
tracePhylo ("\n"
<> "-- | Export " <> show(length branches) <> " branches" :: Text) branches
tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
tracePhyloAncestors groups =
tracePhylo ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups)
<> " ancestors" :: Text) groups
tracePhyloInfo :: Phylo -> Phylo
tracePhyloInfo phylo =
tracePhylo ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with level = "
<> show(getLevel phylo) <> " applied to "
<> show(length $ Vector.toList $ getRoots phylo) <> " foundations" :: Text
) phylo
traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
traceExportGroups groups =
tracePhylo ("\n" <> "-- | Export "
<> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
<> show(length groups) <> " groups and "
<> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms" :: Text
) groups