PhyloTools.hs 21.9 KB
Newer Older
1
{-|
2
Module      : Gargantext.Core.Viz.Phylo.PhyloTools
3 4 5 6 7 8 9 10 11 12
Description : Module dedicated to all the tools needed for making a Phylo
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX
-}

{-# LANGUAGE ViewPatterns      #-}

13
module Gargantext.Core.Viz.Phylo.PhyloTools where
14

15
import Data.Vector (Vector, elemIndex)
16
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group)
qlobbe's avatar
qlobbe committed
17
import Data.Set (Set, disjoint)
qlobbe's avatar
qlobbe committed
18
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
19
import Data.String (String)
qlobbe's avatar
qlobbe committed
20
import Data.Text (Text,unpack)
21

qlobbe's avatar
qlobbe committed
22
import Prelude (floor,read)
qlobbe's avatar
qlobbe committed
23

24
import Gargantext.Prelude
25
import Gargantext.Core.Viz.AdaptativePhylo
qlobbe's avatar
qlobbe committed
26 27
import Text.Printf

28

29
import Debug.Trace (trace)
qlobbe's avatar
qlobbe committed
30
import Control.Lens hiding (Level)
31 32

import qualified Data.Vector as Vector
qlobbe's avatar
qlobbe committed
33 34
import qualified Data.List as List
import qualified Data.Set as Set
35
import qualified Data.Map as Map
qlobbe's avatar
qlobbe committed
36
import qualified Data.Text as Text
37

qlobbe's avatar
qlobbe committed
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
------------
-- | Io | --
------------

-- | To print an important message as an IO()
printIOMsg :: String -> IO ()
printIOMsg msg = 
    putStrLn ( "\n"
            <> "------------" 
            <> "\n"
            <> "-- | " <> msg <> "\n" )


-- | To print a comment as an IO()
printIOComment :: String -> IO ()
printIOComment cmt =
    putStrLn ( "\n" <> cmt <> "\n" )


57 58 59 60
--------------
-- | Misc | --
--------------

qlobbe's avatar
qlobbe committed
61 62 63 64 65 66 67 68 69 70
-- truncate' :: Double -> Int -> Double
-- truncate' x n = (fromIntegral (floor (x * t))) / t
--     where t = 10^n

truncate' :: Double -> Int -> Double
truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
    where 
        --------------
        t :: Double
        t = 10 ^n
71

qlobbe's avatar
qlobbe committed
72 73 74 75 76
getInMap :: Int -> Map Int Double -> Double
getInMap k m = 
    if (member k m)
        then m ! k
        else 0
77

qlobbe's avatar
qlobbe committed
78 79 80 81
roundToStr :: (PrintfArg a, Floating a) => Int -> a -> String
roundToStr = printf "%0.*f"


82
countSup :: Double -> [Double] -> Int
qlobbe's avatar
qlobbe committed
83 84
countSup s l = length $ filter (>s) l

85

qlobbe's avatar
qlobbe committed
86 87 88
dropByIdx :: Int -> [a] -> [a]
dropByIdx k l = take k l ++ drop (k+1) l

qlobbe's avatar
qlobbe committed
89 90 91 92 93

elemIndex' :: Eq a => a -> [a] -> Int
elemIndex' e l = case (List.elemIndex e l) of
    Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
    Just i  -> i
94

95

96 97 98 99 100 101 102 103 104
commonPrefix :: Eq a => [a] -> [a] -> [a] -> [a]
commonPrefix lst lst' acc =
    if (null lst || null lst')
        then acc
        else if (head' "commonPrefix" lst == head' "commonPrefix" lst')
                then commonPrefix (tail lst) (tail lst') (acc ++ [head' "commonPrefix" lst])
                else acc


105 106 107 108 109 110 111
---------------------
-- | Foundations | --
---------------------


-- | Is this Ngrams a Foundations Root ?
isRoots :: Ngrams -> Vector Ngrams -> Bool
qlobbe's avatar
qlobbe committed
112 113
isRoots n ns = Vector.elem n ns

114 115 116 117
-- | To transform a list of nrams into a list of foundation's index
ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns

qlobbe's avatar
qlobbe committed
118 119 120 121
-- | To transform a list of sources into a list of sources' index
sourcesToIdx :: [Text] -> Vector Text -> [Int]
sourcesToIdx ss ps = nub $ map (\s -> fromJust $ elemIndex s ps) ss

122 123
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel :: Vector Ngrams -> [Int] -> Text
qlobbe's avatar
qlobbe committed
124
ngramsToLabel ngrams l = Text.unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
125

qlobbe's avatar
qlobbe committed
126 127 128 129 130
idxToLabel :: [Int] -> String
idxToLabel l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l

idxToLabel' :: [Double] -> String
idxToLabel' l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
131 132 133 134 135

-- | To transform a list of Ngrams Indexes into a list of Text
ngramsToText :: Vector Ngrams -> [Int] -> [Text]
ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l

qlobbe's avatar
qlobbe committed
136 137 138 139 140

--------------
-- | Time | --
--------------

qlobbe's avatar
qlobbe committed
141 142 143 144 145
-- | To transform a list of periods into a set of Dates
periodsToYears :: [(Date,Date)] -> Set Date
periodsToYears periods = (Set.fromList . sort . concat)
                       $ map (\(d,d') -> [d..d']) periods

qlobbe's avatar
qlobbe committed
146

147 148 149 150 151 152 153 154 155 156 157 158 159
findBounds :: [Date] -> (Date,Date)
findBounds dates = 
    let dates' = sort dates
    in  (head' "findBounds" dates', last' "findBounds" dates')


toPeriods :: [Date] -> Int -> Int -> [(Date,Date)]
toPeriods dates p s = 
    let (start,end) = findBounds dates
    in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates')) 
     $ chunkAlong p s [start .. end]


qlobbe's avatar
qlobbe committed
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
toFstDate :: [Text] -> Text
toFstDate ds = snd
             $ head' "firstDate"
             $ sortOn fst
             $ map (\d -> 
                      let d' = read (filter (\c -> c /= '-') $ unpack d)::Int
                       in (d',d)) ds

toLstDate :: [Text] -> Text
toLstDate ds = snd
             $ head' "firstDate"
             $ reverse
             $ sortOn fst
             $ map (\d -> 
                      let d' = read (filter (\c -> c /= '-') $ unpack d)::Int
                       in (d',d)) ds  


getTimeScale :: Phylo -> [Char]
getTimeScale p = case (timeUnit $ getConfig p) of
    Year  _ _ _ -> "year"
    Month _ _ _ -> "month"
    Week  _ _ _ -> "week"  
    Day   _ _ _ -> "day"      


qlobbe's avatar
qlobbe committed
186 187 188
-- | Get a regular & ascendante timeScale from a given list of dates
toTimeScale :: [Date] -> Int -> [Date]
toTimeScale dates step = 
189 190 191 192
    let (start,end) = findBounds dates
    in  [start, (start + step) .. end]


qlobbe's avatar
qlobbe committed
193 194
getTimeStep :: TimeUnit -> Int
getTimeStep time = case time of 
qlobbe's avatar
qlobbe committed
195 196 197 198
    Year  _ s _ -> s
    Month _ s _ -> s  
    Week  _ s _ -> s  
    Day   _ s _ -> s  
qlobbe's avatar
qlobbe committed
199 200 201

getTimePeriod :: TimeUnit -> Int
getTimePeriod time = case time of 
qlobbe's avatar
qlobbe committed
202 203 204 205
    Year  p _ _ -> p
    Month p _ _ -> p  
    Week  p _ _ -> p  
    Day   p _ _ -> p  
qlobbe's avatar
qlobbe committed
206 207 208

getTimeFrame :: TimeUnit -> Int
getTimeFrame time = case time of 
qlobbe's avatar
qlobbe committed
209 210 211 212
    Year  _ _ f -> f
    Month _ _ f -> f
    Week  _ _ f -> f
    Day   _ _ f -> f            
qlobbe's avatar
qlobbe committed
213

214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
-------------
-- | Fis | --
-------------


-- | To find if l' is nested in l
isNested :: Eq a => [a] -> [a] -> Bool
isNested l l'
  | null l'               = True
  | length l' > length l  = False
  | (union  l l') == l    = True
  | otherwise             = False 


-- | To filter Fis with small Support but by keeping non empty Periods
keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
keepFilled f thr l = if (null $ f thr l) && (not $ null l)
                     then keepFilled f (thr - 1) l
                     else f thr l


235
traceClique :: Map (Date, Date) [PhyloClique] -> String
236 237 238 239
traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") "  ) "" [1..6]
    where
        --------------------------------------
        cliques :: [Double]
qlobbe's avatar
qlobbe committed
240
        cliques = sort $ map (fromIntegral . length . _phyloClique_nodes) $ concat $ elems mFis
241 242 243
        -------------------------------------- 


244
traceSupport :: Map (Date, Date) [PhyloClique] -> String
245 246 247 248
traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") "  ) "" [1..6]
    where
        --------------------------------------
        supports :: [Double]
249
        supports = sort $ map (fromIntegral . _phyloClique_support) $ concat $ elems mFis
250 251 252
        -------------------------------------- 


253
traceFis :: [Char] -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
254 255
traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
                         <> "Support : " <> (traceSupport mFis) <> "\n"
256
                         <> "Nb Ngrams : "  <> (traceClique mFis)  <> "\n" ) mFis
257 258


259 260 261
---------------
-- | Clique| --
---------------
qlobbe's avatar
qlobbe committed
262 263


264 265
getCliqueSupport :: Clique -> Int
getCliqueSupport unit = case unit of 
qlobbe's avatar
qlobbe committed
266
    Fis s _ -> s
267
    MaxClique _ _ _ -> 0
qlobbe's avatar
qlobbe committed
268

269 270
getCliqueSize :: Clique -> Int
getCliqueSize unit = case unit of 
qlobbe's avatar
qlobbe committed
271
    Fis _ s -> s
272
    MaxClique s _ _ -> s
qlobbe's avatar
qlobbe committed
273 274


275 276 277 278 279 280 281 282 283 284
--------------
-- | Cooc | --
--------------

listToCombi' :: [a] -> [(a,a)]
listToCombi' l = [(x,y) | (x:rest) <- tails l,  y <- rest]

listToEqual' :: Eq a => [a] -> [(a,a)]
listToEqual' l = [(x,y) | x <- l, y <- l, x == y]

qlobbe's avatar
qlobbe committed
285
listToKeys :: Eq a =>  [a] -> [(a,a)]
286 287 288 289 290
listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)

listToMatrix :: [Int] -> Map (Int,Int) Double
listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst

qlobbe's avatar
qlobbe committed
291 292 293
listToMatrix' :: [Ngrams] -> Map (Ngrams,Ngrams) Int
listToMatrix' lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst

qlobbe's avatar
qlobbe committed
294 295 296
listToSeq :: Eq a =>  [a] -> [(a,a)]
listToSeq l = nubBy (\x y -> fst x == fst y) $ [ (x,y) | (x:rest) <- tails l,  y <- rest ]

297 298 299
sumCooc :: Cooc -> Cooc -> Cooc
sumCooc cooc cooc' = unionWith (+) cooc cooc'

qlobbe's avatar
qlobbe committed
300 301 302
getTrace :: Cooc -> Double 
getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc

qlobbe's avatar
qlobbe committed
303 304
coocToDiago :: Cooc -> Cooc
coocToDiago cooc = filterWithKey (\(k,k') _ -> k == k') cooc
qlobbe's avatar
qlobbe committed
305 306 307 308 309 310 311 312 313

-- | To build the local cooc matrix of each phylogroup
ngramsToCooc :: [Int] -> [Cooc] -> Cooc
ngramsToCooc ngrams coocs =
    let cooc  = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
        pairs = listToKeys ngrams
    in  filterWithKey (\k _ -> elem k pairs) cooc


qlobbe's avatar
qlobbe committed
314 315 316
--------------------
-- | PhyloGroup | --
--------------------
qlobbe's avatar
qlobbe committed
317 318

getGroupId :: PhyloGroup -> PhyloGroupId 
qlobbe's avatar
qlobbe committed
319
getGroupId g = ((g ^. phylo_groupPeriod, g ^. phylo_groupLevel), g ^. phylo_groupIndex)
320

321 322 323
idToPrd :: PhyloGroupId -> PhyloPeriodId
idToPrd id = (fst . fst) id

qlobbe's avatar
qlobbe committed
324 325 326 327
groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] ->  Map a [PhyloGroup]
groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups

getPeriodPointers :: Filiation -> PhyloGroup -> [Pointer]
qlobbe's avatar
qlobbe committed
328
getPeriodPointers fil g = 
qlobbe's avatar
qlobbe committed
329
    case fil of 
qlobbe's avatar
qlobbe committed
330 331
        ToChilds  -> g ^. phylo_groupPeriodChilds
        ToParents -> g ^. phylo_groupPeriodParents
qlobbe's avatar
qlobbe committed
332 333 334 335

filterProximity :: Proximity -> Double -> Double -> Bool
filterProximity proximity thr local = 
    case proximity of
336
        WeightedLogJaccard _ -> local >= thr
337
        WeightedLogSim _ -> local >= thr
qlobbe's avatar
qlobbe committed
338 339 340 341 342
        Hamming -> undefined   

getProximityName :: Proximity -> String
getProximityName proximity =
    case proximity of
343
        WeightedLogJaccard _ -> "WLJaccard"
344
        WeightedLogSim _ -> "WeightedLogSim"
345
        Hamming -> "Hamming"            
qlobbe's avatar
qlobbe committed
346

347 348 349 350
---------------
-- | Phylo | --
---------------

351
addPointers :: Filiation -> PointerType -> [Pointer] -> PhyloGroup -> PhyloGroup
qlobbe's avatar
qlobbe committed
352
addPointers fil pty pointers g = 
qlobbe's avatar
qlobbe committed
353 354
    case pty of 
        TemporalPointer -> case fil of 
qlobbe's avatar
qlobbe committed
355 356
                                ToChilds  -> g & phylo_groupPeriodChilds  .~ pointers
                                ToParents -> g & phylo_groupPeriodParents .~ pointers
qlobbe's avatar
qlobbe committed
357
        LevelPointer    -> case fil of 
qlobbe's avatar
qlobbe committed
358 359
                                ToChilds  -> g & phylo_groupLevelChilds   .~ pointers
                                ToParents -> g & phylo_groupLevelParents  .~ pointers
qlobbe's avatar
qlobbe committed
360 361


362 363
getPeriodIds :: Phylo -> [(Date,Date)]
getPeriodIds phylo = sortOn fst
qlobbe's avatar
qlobbe committed
364
                   $ keys
365 366
                   $ phylo ^. phylo_periods

qlobbe's avatar
qlobbe committed
367 368 369
getLevelParentId :: PhyloGroup -> PhyloGroupId 
getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupLevelParents

qlobbe's avatar
qlobbe committed
370 371 372 373 374 375 376 377 378 379
getLastLevel :: Phylo -> Level
getLastLevel phylo = last' "lastLevel" $ getLevels phylo

getLevels :: Phylo -> [Level]
getLevels phylo = nub 
                $ map snd
                $ keys $ view ( phylo_periods
                       .  traverse
                       . phylo_periodLevels ) phylo

380 381
getSeaElevation :: Phylo -> SeaElevation
getSeaElevation phylo = seaElevation (getConfig phylo)
382 383


384
getConfig :: Phylo -> Config
qlobbe's avatar
qlobbe committed
385 386 387
getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config


qlobbe's avatar
qlobbe committed
388 389 390 391 392 393 394 395 396 397
setConfig :: Config -> Phylo -> Phylo
setConfig config phylo = phylo 
                       & phylo_param .~ (PhyloParam 
                                            ((phylo ^. phylo_param) ^. phyloParam_version) 
                                            ((phylo ^. phylo_param) ^. phyloParam_software) 
                                            config)

-- & phylo_param & phyloParam_config & phyloParam_config .~ config


qlobbe's avatar
qlobbe committed
398
getRoots :: Phylo -> Vector Ngrams
qlobbe's avatar
qlobbe committed
399 400
getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots

qlobbe's avatar
qlobbe committed
401 402 403
getSources :: Phylo -> Vector Text
getSources phylo = _sources (phylo ^. phylo_sources)

qlobbe's avatar
qlobbe committed
404 405 406 407 408
phyloToLastBranches :: Phylo -> [[PhyloGroup]]
phyloToLastBranches phylo = elems 
    $ fromListWith (++)
    $ map (\g -> (g ^. phylo_groupBranchId, [g]))
    $ getGroupsFromLevel (last' "byBranches" $ getLevels phylo) phylo
qlobbe's avatar
qlobbe committed
409 410 411 412 413 414 415 416 417 418 419

getGroupsFromLevel :: Level -> Phylo -> [PhyloGroup]
getGroupsFromLevel lvl phylo = 
    elems $ view ( phylo_periods
                 .  traverse
                 . phylo_periodLevels
                 .  traverse
                 .  filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
                 . phylo_levelGroups ) phylo


420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
getGroupsFromLevelPeriods :: Level -> [PhyloPeriodId] -> Phylo -> [PhyloGroup]
getGroupsFromLevelPeriods lvl periods phylo = 
    elems $ view ( phylo_periods
                 .  traverse
                 .  filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
                 . phylo_periodLevels
                 .  traverse
                 .  filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
                 . phylo_levelGroups ) phylo    


getGroupsFromPeriods :: Level -> Map PhyloPeriodId PhyloPeriod -> [PhyloGroup]
getGroupsFromPeriods lvl periods = 
    elems $ view (  traverse
                 . phylo_periodLevels
                 .  traverse
                 .  filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
                 . phylo_levelGroups ) periods


qlobbe's avatar
qlobbe committed
440 441 442 443 444 445 446 447 448
updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
updatePhyloGroups lvl m phylo = 
    over ( phylo_periods
         .  traverse
         . phylo_periodLevels
         .  traverse
         .  filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
         . phylo_levelGroups
         .  traverse 
qlobbe's avatar
qlobbe committed
449 450
         ) (\g -> 
                let id = getGroupId g
qlobbe's avatar
qlobbe committed
451 452 453
                in 
                    if member id m 
                    then m ! id
qlobbe's avatar
qlobbe committed
454
                    else g ) phylo
qlobbe's avatar
qlobbe committed
455

qlobbe's avatar
qlobbe committed
456 457 458 459 460 461 462 463 464 465
updatePeriods :: Map (Date,Date) (Text,Text) -> Phylo -> Phylo
updatePeriods periods' phylo = 
    over (phylo_periods . traverse) 
            (\prd -> 
                let prd' = periods' ! (prd ^. phylo_periodPeriod)
                    lvls = map (\lvl -> lvl & phylo_levelPeriod' .~ prd') $ prd ^. phylo_periodLevels
                 in prd & phylo_periodPeriod' .~ prd'
                        & phylo_periodLevels  .~ lvls
                ) phylo

qlobbe's avatar
qlobbe committed
466 467 468 469 470 471 472

traceToPhylo :: Level -> Phylo -> Phylo
traceToPhylo lvl phylo = 
    trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with "
                <> show (length $ getGroupsFromLevel lvl phylo) <> " groups and "
                <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel lvl phylo) <> " branches" <> "\n") phylo 

qlobbe's avatar
qlobbe committed
473 474 475 476
--------------------
-- | Clustering | --
--------------------

qlobbe's avatar
qlobbe committed
477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
mergeBranchIds :: [[Int]] -> [Int]
mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
  where
    -- | 2) find the most Up Left ids in the hierarchy of similarity
    -- mostUpLeft :: [[Int]] -> [[Int]]
    -- mostUpLeft ids' = 
    --      let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
    --          inf = (fst . minimum) groupIds
    --      in map snd $ filter (\gIds -> fst gIds == inf) groupIds
    -- | 1) find the most frequent ids
    mostFreq' :: [[Int]] -> [[Int]]
    mostFreq' ids' = 
       let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
           sup = (fst . maximum) groupIds
        in map snd $ filter (\gIds -> fst gIds == sup) groupIds


mergeMeta :: [Int] -> [PhyloGroup] -> Map Text [Double]
mergeMeta bId groups = 
  let ego = head' "mergeMeta" $ filter (\g -> (snd (g ^. phylo_groupBranchId)) == bId) groups  
   in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")]   


groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches groups =
502
    {- run the related component algorithm -}
qlobbe's avatar
qlobbe committed
503 504 505 506 507
    let egos  = map (\g -> [getGroupId g] 
                        ++ (map fst $ g ^. phylo_groupPeriodParents)
                        ++ (map fst $ g ^. phylo_groupPeriodChilds)
                        ++ (map fst $ g ^. phylo_groupAncestors)) $ elems groups
        graph = relatedComponents egos
508
    {- update each group's branch id -}
qlobbe's avatar
qlobbe committed
509 510 511 512 513
    in map (\ids ->
        let groups' = elems $ restrictKeys groups (Set.fromList ids)
            bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
         in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl,bId))) groups') graph

qlobbe's avatar
qlobbe committed
514 515 516 517 518 519 520
relatedComponents :: Ord a => [[a]] -> [[a]]
relatedComponents graph = foldl' (\acc groups ->
    if (null acc)
    then acc ++ [groups]
    else 
        let acc' = partition (\groups' -> disjoint (Set.fromList groups') (Set.fromList groups)) acc
         in (fst acc') ++ [nub $ concat $ (snd acc') ++ [groups]]) [] graph
qlobbe's avatar
qlobbe committed
521

qlobbe's avatar
qlobbe committed
522 523 524 525 526 527
toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]]
toRelatedComponents nodes edges = 
  let ref = fromList $ map (\g -> (getGroupId g, g)) nodes
      clusters = relatedComponents $ ((map (\((g,g'),_) -> [getGroupId g, getGroupId g']) edges) ++ (map (\g -> [getGroupId g]) nodes)) 
   in map (\cluster -> map (\gId -> ref ! gId) cluster) clusters 

qlobbe's avatar
qlobbe committed
528

qlobbe's avatar
qlobbe committed
529 530
traceSynchronyEnd :: Phylo -> Phylo
traceSynchronyEnd phylo = 
531
    trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo) 
qlobbe's avatar
qlobbe committed
532 533 534 535 536 537
                 <> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
                 <> " and "  <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
                 <> "\n" ) phylo

traceSynchronyStart :: Phylo -> Phylo
traceSynchronyStart phylo = 
qlobbe's avatar
qlobbe committed
538
    trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo) 
qlobbe's avatar
qlobbe committed
539 540 541 542 543
                 <> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
                 <> " and "  <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
                 <> "\n" ) phylo    


qlobbe's avatar
qlobbe committed
544 545 546 547 548 549
-------------------
-- | Proximity | --
-------------------

getSensibility :: Proximity -> Double
getSensibility proxi = case proxi of 
550
    WeightedLogJaccard s -> s
551
    WeightedLogSim s -> s
qlobbe's avatar
qlobbe committed
552 553
    Hamming -> undefined

qlobbe's avatar
qlobbe committed
554 555 556 557
----------------
-- | Branch | --
----------------

qlobbe's avatar
qlobbe committed
558 559 560 561 562 563 564 565 566 567 568
intersectInit :: Eq a => [a] -> [a] -> [a] -> [a]
intersectInit acc lst lst' =
    if (null lst) || (null lst')
    then acc
    else if (head' "intersectInit" lst) == (head' "intersectInit" lst')
         then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
         else acc

branchIdsToProximity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
branchIdsToProximity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))

qlobbe's avatar
qlobbe committed
569
ngramsInBranches :: [[PhyloGroup]] -> [Int]
570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611
ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches


traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]]
traceMatchSuccess thr qua qua' nextBranches =
    trace ( "\n" <> "-- local branches : " <> (init $ show ((init . init . snd)
                                                    $ (head' "trace" $ head' "trace" $ head' "trace" nextBranches) ^. phylo_groupBranchId))
                                           <> ",(1.." <> show (length nextBranches) <> ")]"
                 <> " | " <> show ((length . concat . concat) nextBranches) <> " groups" <> "\n"
         <> " - splited with success in "  <> show (map length nextBranches) <> " sub-branches" <> "\n"
         <> " - for the local threshold "  <> show (thr) <> " ( quality : " <> show (qua) <> " < " <> show(qua') <> ")\n" ) nextBranches


traceMatchFailure :: Double -> Double -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
traceMatchFailure thr qua qua' branches =
    trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
                                           <> ",(1.." <> show (length branches) <> ")]"
                 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
         <> " - split with failure for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " > " <> show(qua') <> ")\n"
        ) branches


traceMatchNoSplit :: [[PhyloGroup]] -> [[PhyloGroup]]
traceMatchNoSplit branches =
    trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
                                           <> ",(1.." <> show (length branches) <> ")]"
                 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
         <> " - unable to split in smaller branches" <> "\n"
        ) branches


traceMatchLimit :: [[PhyloGroup]] -> [[PhyloGroup]]
traceMatchLimit branches =
    trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
                                           <> ",(1.." <> show (length branches) <> ")]"
                 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
         <> " - unable to increase the threshold above 1" <> "\n"
        ) branches            


traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
traceMatchEnd groups =
qlobbe's avatar
qlobbe committed
612 613 614 615 616 617
    trace ("\n" <> "-- | End temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
                                                         <> " branches and " <> show (length groups) <> " groups" <> "\n") groups


traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup]
traceTemporalMatching groups = 
618 619 620 621 622
    trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups


traceGroupsProxi :: Map (PhyloGroupId,PhyloGroupId) Double -> Map (PhyloGroupId,PhyloGroupId) Double
traceGroupsProxi m = 
623
    trace ( "\n" <> "-- | " <> show(Map.size m) <> " computed pairs of groups proximity" <> "\n") m