1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
{-|
Module : Gargantext.Core.Viz.Phylo.PhyloMaker
Description : Maker engine for rebuilding a Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.PhyloMaker where
import Control.DeepSeq (NFData)
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, insert)
import Data.Set (Set)
import Data.Text (Text)
import Data.Vector (Vector)
import Debug.Trace (trace)
import Prelude (floor)
import Gargantext.Core.Methods.Similarities (Similarity(Conditional))
import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, fisWithSizePolyMap', Size(..))
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (toPhyloQuality, temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toSimilarity)
import Gargantext.Prelude
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.Vector as Vector
------------------
-- | To Phylo | --
------------------
{-
-- TODO AD
data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo}
| PhyloN { _phylo'_flatPhylo :: Phylo}
toPhylo' :: Phylo' -> [Document] -> TermList -> PhyloConfig -> Phylo
toPhylo' (PhyloN phylo) = toPhylo'
toPhylo' (PhyloBase phylo) = toPhylo
-}
-- TODO an adaptative synchronic clustering with a slider
toPhylo :: Phylo -> Phylo
toPhylo phylowithoutLink = traceToPhylo (phyloScale $ getConfig phylowithoutLink) $
if (phyloScale $ getConfig phylowithoutLink) > 1
then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloScale $ getConfig phylowithoutLink)]
else phyloAncestors
where
--------------------------------------
phyloAncestors :: Phylo
phyloAncestors =
if (findAncestors $ getConfig phylowithoutLink)
then toHorizon phyloWithLinks
else phyloWithLinks
--------------------------------------
phyloWithLinks :: Phylo
phyloWithLinks = temporalMatching (getLadder phylowithoutLink) phylowithoutLink
--------------------------------------
-----------------------------
-- | Create a flat Phylo | --
-----------------------------
{-
-- create a square ladder
-}
squareLadder :: [Double] -> [Double]
squareLadder ladder = List.map (\x -> x * x) ladder
{-
-- create an adaptative 'sea elevation' ladder
-}
adaptSeaLadder :: Double -> Set Double -> Set Double -> [Double]
adaptSeaLadder curr similarities ladder =
if curr <= 0 || Set.null similarities
then Set.toList ladder
else
let idx = ((Set.size similarities) `div` (floor curr)) - 1
thr = Set.elemAt idx similarities
-- we use a sliding methods 1/10, then 1/9, then ... 1/2
in adaptSeaLadder (curr -1) (Set.filter (> thr) similarities) (Set.insert thr ladder)
{-
-- create a constante 'sea elevation' ladder
-}
constSeaLadder :: Double -> Double -> Set Double -> [Double]
constSeaLadder curr step ladder =
if curr > 1
then Set.toList ladder
else constSeaLadder (curr + step) step (Set.insert curr ladder)
{-
-- create an evolving 'sea elevation' ladder based on estimated & local quality maxima
-}
evolvSeaLadder :: Double -> Double -> Map Int Double -> Set Double -> [((PhyloGroup,PhyloGroup),Double)] -> [Double]
evolvSeaLadder nbFdt lambda freq similarities graph = map snd
$ filter fst
$ zip maxima (map fst qua')
-- 3) find the corresponding measures of similarity and create the ladder
where
--------
-- 2) find the local maxima in the quality distribution
maxima :: [Bool]
maxima = [snd (List.head qua') > snd (List.head $ List.tail qua')] ++ (findMaxima qua') ++ [snd (List.head $ reverse qua') > snd (List.head $ List.tail $ reverse qua')]
--------
-- 1.2)
qua' :: [(Double,Double)]
qua' = foldl (\acc (s,q) ->
if length acc == 0
then [(s,q)]
else if (snd (List.last acc)) == q
then acc
else acc ++ [(s,q)]
) [] $ zip (Set.toList similarities) qua
--------
-- 1.1) for each measure of similarity, prune the flat phylo, compute the branches and estimate the quality
qua :: [Double]
qua = map (\thr ->
let edges = filter (\edge -> snd edge >= thr) graph
nodes = nub $ concat $ map (\((n,n'),_) -> [n,n']) edges
branches = toRelatedComponents nodes edges
in toPhyloQuality nbFdt lambda freq branches
) $ (Set.toList similarities)
{-
-- find a similarity ladder regarding the "sea elevation" strategy
-}
findSeaLadder :: Phylo -> Phylo
findSeaLadder phylo = case getSeaElevation phylo of
Constante start gap -> phylo & phylo_seaLadder .~ (constSeaLadder start gap Set.empty)
Adaptative steps -> phylo & phylo_seaLadder .~ (squareLadder $ adaptSeaLadder steps similarities Set.empty)
Evolving _ -> let ladder = evolvSeaLadder
(fromIntegral $ Vector.length $ getRoots phylo)
(getLevel phylo)
(getRootsFreq phylo)
similarities simGraph
in phylo & phylo_seaLadder .~ (if length ladder > 0
then ladder
-- if we don't find any local maxima with the evolving strategy
else constSeaLadder 0.1 0.1 Set.empty)
where
--------
-- 2) extract the values of the kinship links
similarities :: Set Double
similarities = Set.fromList $ sort $ map snd simGraph
--------
-- 1) we process an initial calculation of the kinship links
-- this initial calculation is used to estimate the real sea ladder
simGraph :: [((PhyloGroup,PhyloGroup),Double)]
simGraph = foldl' (\acc period ->
-- 1.1) process period by period
let sources = getGroupsFromScalePeriods 1 [period] phylo
next = getNextPeriods ToParents 3 period (keys $ phylo ^. phylo_periods)
targets = getGroupsFromScalePeriods 1 next phylo
docs = filterDocs (getDocsByDate phylo) ([period] ++ next)
diagos = filterDiago (getCoocByDate phylo) ([period] ++ next)
-- 1.2) compute the kinship similarities between pairs of source & target in parallel
pairs = map (\source ->
let candidates = filter (\target -> (> 2) $ length
$ intersect (getGroupNgrams source) (getGroupNgrams target)) targets
in map (\target ->
let nbDocs = (sum . elems)
$ filterDocs docs ([idToPrd (getGroupId source), idToPrd (getGroupId target)])
diago = reduceDiagos
$ filterDiago diagos ([idToPrd (getGroupId source), idToPrd (getGroupId target)])
in ((source,target),toSimilarity nbDocs diago (getSimilarity phylo) (getGroupNgrams source) (getGroupNgrams target) (getGroupNgrams target))
) candidates
) sources
pairs' = pairs `using` parList rdeepseq
in acc ++ (concat pairs')
) [] $ keys $ phylo ^. phylo_periods
appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> Map Int Double -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to scale " <> show (lvl) <> "\n")
$ over ( phylo_periods
. traverse
. phylo_periodScales
. traverse)
(\phyloLvl -> if lvl == (phyloLvl ^. phylo_scaleScale)
then
let pId = phyloLvl ^. phylo_scalePeriod
pId' = phyloLvl ^. phylo_scalePeriodStr
phyloCUnit = m ! pId
in phyloLvl
& phylo_scaleGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups)
, f obj pId pId' lvl (length groups)
-- select the cooc of the periods
(elems $ restrictKeys (getCoocByDate phylo) $ periodsToYears [pId])
-- select and merge the roots count of the periods
(foldl (\acc count -> unionWith (+) acc count) empty
$ elems $ restrictKeys (getRootsCountByDate phylo) $ periodsToYears [pId]))
] ) [] phyloCUnit)
else
phyloLvl )
phylo
clusterToGroup :: Clustering -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> Map Int Double -> PhyloGroup
clusterToGroup fis pId pId' lvl idx coocs rootsCount = PhyloGroup pId pId' lvl idx ""
(fis ^. clustering_support )
(fis ^. clustering_visWeighting)
(fis ^. clustering_visFiltering)
(fis ^. clustering_roots)
(ngramsToCooc (fis ^. clustering_roots) coocs)
(ngramsToDensity (fis ^. clustering_roots) coocs rootsCount)
(1,[0]) -- branchid (lvl,[path in the branching tree])
(fromList [("breaks",[0]),("seaLevels",[0])])
rootsCount [] [] [] [] [] [] []
-----------------------
-- | To Phylo Step | --
-----------------------
indexDates' :: Map (Date,Date) [Document] -> Map (Date,Date) (Text,Text)
indexDates' m = map (\docs ->
let ds = map (\d -> date' d) docs
f = if (null ds)
then ""
else toFstDate ds
l = if (null ds)
then ""
else toLstDate ds
in (f,l)) m
-- create a map of roots and group ids
joinRoots :: Phylo -> Phylo
joinRoots phylo = set (phylo_foundations . foundations_rootsInGroups) rootsMap phylo
where
--------------------------------------
rootsMap :: Map Int [PhyloGroupId]
rootsMap = fromListWith (++)
$ concat -- flatten
$ map (\g ->
map (\n -> (n,[getGroupId g])) $ _phylo_groupNgrams g)
$ getGroupsFromScale 1 phylo
maybeDefaultParams :: Phylo -> Phylo
maybeDefaultParams phylo = if (defaultMode (getConfig phylo))
then findDefaultLevel phylo
else phylo
-- To build the first phylo step from docs and terms
-- QL: backend entre phyloBase et Clustering
toPhyloWithoutLink :: [Document] -> PhyloConfig -> Phylo
toPhyloWithoutLink docs conf = joinRoots
$ findSeaLadder
$ maybeDefaultParams
$ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
where
--------------------------------------
seriesOfClustering :: Map (Date,Date) [Clustering]
seriesOfClustering = toSeriesOfClustering phyloBase docs'
--------------------------------------
docs' :: Map (Date,Date) [Document]
-- QL: Time Consuming here
docs' = groupDocsByPeriodRec date (getPeriodIds phyloBase) (sortOn date docs) empty
--------------------------------------
phyloBase :: Phylo
phyloBase = initPhylo docs conf
--------------------------------------
---------------------------
-- | Frequent Item Set | --
---------------------------
-- To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterClique :: Bool -> Int -> (Int -> [Clustering] -> [Clustering]) -> Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
filterClique keep thr f m = case keep of
False -> map (\l -> f thr l) m
True -> map (\l -> keepFilled (f) thr l) m
-- To filter Fis with small Support
filterCliqueBySupport :: Int -> [Clustering] -> [Clustering]
filterCliqueBySupport thr l = filter (\clq -> (clq ^. clustering_support ) >= thr) l
-- To filter Fis with small Clique size
filterCliqueBySize :: Int -> [Clustering] -> [Clustering]
filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. clustering_roots) >= thr) l
-- To filter nested Fis
filterCliqueByNested :: Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
filterCliqueByNested m =
let clq = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (f' ^. clustering_roots) (f ^. clustering_roots)) mem)
then mem
else
let fMax = filter (\f' -> not $ isNested (f ^. clustering_roots) (f' ^. clustering_roots)) mem
in fMax ++ [f] ) [] l)
$ elems m
clq' = clq `using` parList rdeepseq
in fromList $ zip (keys m) clq'
-- | To transform a time map of docs into a time map of Fis with some filters
toSeriesOfClustering :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [Clustering]
toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
Fis s s' -> -- traceFis "Filtered Fis"
filterCliqueByNested
{- \$ traceFis "Filtered by clique size" -}
$ filterClique True s' (filterCliqueBySize)
{- \$ traceFis "Filtered by support" -}
$ filterClique True s (filterCliqueBySupport)
{- \$ traceFis "Unfiltered Fis" -}
seriesOfClustering
MaxClique s _ _ -> filterClique True s (filterCliqueBySize)
seriesOfClustering
where
--------------------------------------
seriesOfClustering :: Map (Date,Date) [Clustering]
seriesOfClustering = case (clique $ getConfig phylo) of
Fis _ _ ->
let fis = map (\(prd,docs) ->
case (corpusParser $ getConfig phylo) of
Csv' _ -> let lst = toList
$ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs)
in (prd, map (\f -> Clustering (Set.toList $ fst f) ((fst . snd) f) prd ((fst . snd . snd) f) (((snd . snd . snd) f))) lst)
_ -> let lst = toList
$ fisWithSizePolyMap (Segment 1 20) 1 (map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs)
in (prd, map (\f -> Clustering (Set.toList $ fst f) (snd f) prd (Just $ fromIntegral $ snd f) []) lst)
)
$ toList phyloDocs
fis' = fis `using` parList rdeepseq
in fromList fis'
MaxClique _ thr filterType ->
let mcl = map (\(prd,docs) ->
let cooc = map round
$ foldl sumCooc empty
$ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> Clustering cl 0 prd Nothing []) $ getMaxCliques filterType Conditional thr cooc))
$ toList phyloDocs
mcl' = mcl `using` parList rdeepseq
in fromList mcl'
--------------------------------------
-- dev viz graph maxClique getMaxClique
--------------------
-- | Coocurency | --
--------------------
-- To transform the docs into a time map of coocurency matrix
docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc
docsToTimeScaleCooc docs fdt =
let mCooc = fromListWith sumCooc
$ map (\(_d,l) -> (_d, listToMatrix l))
$ map (\doc -> (date doc, sort $ ngramsToIdx (text doc) fdt)) docs
mCooc' = fromList
$ map (\t -> (t,empty))
$ toTimeScale (map date docs) 1
in trace ("\n" <> "-- | Build the coocurency matrix for " <> show (length $ keys mCooc') <> " unit of time" <> "\n")
$ unionWith sumCooc mCooc mCooc'
-----------------------
-- | to Phylo Base | --
-----------------------
-- TODO anoe
groupDocsByPeriodRec :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] -> Map (date, date) [doc]
groupDocsByPeriodRec f prds docs acc =
if ((null prds) || (null docs))
then acc
else
let prd = head' "groupBy" prds
docs' = partition (\d -> (f d >= fst prd) && (f d <= snd prd)) docs
in groupDocsByPeriodRec f (tail prds) (snd docs') (insert prd (fst docs') acc)
-- To group a list of Documents by fixed periods
groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
periods = map (inPeriode f docs') pds
periods' = periods `using` parList rdeepseq
in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length pds) <> " periods" <> "\n")
$ fromList $ zip pds periods'
where
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [[t]] -> (b, b) -> [t]
inPeriode f' h (start,end) =
concat $ fst $ partition (\d -> f' (head' "inPeriode" d) >= start && f' (head' "inPeriode" d) <= end) h
-- To group a list of Documents by fixed periods
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es =
let periods = map (inPeriode f es) pds
periods' = periods `using` parList rdeepseq
in trace ("\n" <> "-- | Group " <> show(length es) <> " docs by " <> show(length pds) <> " periods" <> "\n")
$ fromList $ zip pds periods'
where
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) =
fst $ partition (\d -> f' d >= start && f' d <= end) h
--------------------------------------
docsToTermFreq :: [Document] -> Vector Ngrams -> Map Int Double
docsToTermFreq docs fdt =
let nbDocs = fromIntegral $ length docs
freqs = map (/(nbDocs))
$ fromList
$ map (\lst -> (head' "docsToTermFreq" lst, fromIntegral $ length lst))
$ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) docs
sumFreqs = sum $ elems freqs
in map (/sumFreqs) freqs
docsToTermCount :: [Document] -> Vector Ngrams -> Map Int Double
docsToTermCount docs roots = fromList
$ map (\lst -> (head' "docsToTermCount" lst, fromIntegral $ length lst))
$ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) roots) docs
docsToTimeTermCount :: [Document] -> Vector Ngrams -> (Map Date (Map Int Double))
docsToTimeTermCount docs roots =
let docs' = Map.map (\l -> fromList $ map (\lst -> (head' "docsToTimeTermCount" lst, fromIntegral $ length lst))
$ group $ sort l)
$ fromListWith (++)
$ map (\d -> (date d, nub $ ngramsToIdx (text d) roots)) docs
time = fromList $ map (\t -> (t,Map.empty)) $ toTimeScale (keys docs') 1
in unionWith (Map.union) time docs'
docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double
docsToLastTermFreq n docs fdt =
let last = take n $ reverse $ sort $ map date docs
nbDocs = fromIntegral $ length $ filter (\d -> elem (date d) last) docs
freqs = map (/(nbDocs))
$ fromList
$ map (\lst -> (head' "docsToLastTermFreq" lst, fromIntegral $ length lst))
$ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) $ filter (\d -> elem (date d) last) docs
sumFreqs = sum $ elems freqs
in map (/sumFreqs) freqs
-- To count the number of docs by unit of time
docsToTimeScaleNb :: [Document] -> Map Date Double
docsToTimeScaleNb docs =
let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs
time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') 1
in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length time) <> " unit of time" <> "\n")
$ unionWith (+) time docs'
initPhyloScales :: Int -> Period -> Map PhyloScaleId PhyloScale
initPhyloScales lvlMax pId =
fromList $ map (\lvl -> ((pId,lvl),PhyloScale pId ("","") lvl empty)) [1..lvlMax]
setDefault :: PhyloConfig -> TimeUnit -> Int -> PhyloConfig
setDefault conf timeScale nbDocs = defaultConfig
{ corpusPath = (corpusPath conf)
, listPath = (listPath conf)
, outputPath = (outputPath conf)
, corpusParser = (corpusParser conf)
, listParser = (listParser conf)
, phyloName = (phyloName conf)
, defaultMode = True
, timeUnit = timeScale
, clique = Fis (toSupport nbDocs) 3}
where
--------------------------------------
toSupport :: Int -> Support
toSupport n
| n < 500 = 1
| n < 1000 = 2
| n < 2000 = 3
| n < 3000 = 4
| n < 5000 = 5
| otherwise = 6
--------------------------------------
-- Init the basic elements of a Phylo
--
initPhylo :: [Document] -> PhyloConfig -> Phylo
initPhylo docs conf =
let roots = Vector.fromList $ nub $ concat $ map text docs
timeScale = head' "initPhylo" $ map docTime docs
foundations = PhyloFoundations roots empty
docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs)
docsCounts = PhyloCounts (docsToTimeScaleCooc docs (foundations ^. foundations_roots))
(docsToTimeScaleNb docs)
(docsToTimeTermCount docs (foundations ^. foundations_roots))
(docsToTermCount docs (foundations ^. foundations_roots))
(docsToTermFreq docs (foundations ^. foundations_roots))
(docsToLastTermFreq (getTimePeriod timeScale) docs (foundations ^. foundations_roots))
params = if (defaultMode conf)
then defaultPhyloParam { _phyloParam_config = setDefault conf timeScale (length docs) }
else defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod timeScale) (getTimeStep timeScale)
in trace ("\n" <> "-- | Init a phylo out of " <> show(length docs) <> " docs \n")
$ trace ("\n" <> "-- | lambda " <> show(_qua_granularity $ phyloQuality $ _phyloParam_config params))
$ Phylo foundations
docsSources
docsCounts
[]
params
(fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloScales 1 prd))) periods)
0
(_qua_granularity $ phyloQuality $ _phyloParam_config params)