Commit f49465e4 authored by qlobbe's avatar qlobbe

add adjustable clustering by threshold

parent e3eb8220
Pipeline #610 failed with stage
......@@ -67,12 +67,19 @@ data Proximity =
deriving (Show,Generic,Eq)
data SynchronyScope = SingleBranch | SiblingBranches | AllBranches deriving (Show,Generic,Eq)
data SynchronyStrategy = MergeRegularGroups | MergeAllGroups deriving (Show,Generic,Eq)
data Synchrony =
ByProximityThreshold
{ _bpt_threshold :: Double
, _bpt_sensibility :: Double}
, _bpt_sensibility :: Double
, _bpt_scope :: SynchronyScope
, _bpt_strategy :: SynchronyStrategy }
| ByProximityDistribution
{ _bpd_sensibility :: Double}
{ _bpd_sensibility :: Double
, _bpd_strategy :: SynchronyStrategy }
deriving (Show,Generic,Eq)
......@@ -84,12 +91,12 @@ data TimeUnit =
deriving (Show,Generic,Eq)
data ContextualUnit =
data Clique =
Fis
{ _fis_support :: Int
, _fis_size :: Int }
| MaxClique
{ _clique_size :: Int }
{ _mcl_size :: Int }
deriving (Show,Generic,Eq)
......@@ -110,7 +117,7 @@ data Config =
, phyloSynchrony :: Synchrony
, phyloQuality :: Quality
, timeUnit :: TimeUnit
, contextualUnit :: ContextualUnit
, clique :: Clique
, exportLabel :: [PhyloLabel]
, exportSort :: Sort
, exportFilter :: [Filter]
......@@ -126,10 +133,10 @@ defaultConfig =
, phyloName = pack "Default Phylo"
, phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 0 0.1
, phyloSynchrony = ByProximityDistribution 0
, phyloQuality = Quality 0.2 4
, phyloSynchrony = ByProximityThreshold 0.1 0 SiblingBranches MergeAllGroups
, phyloQuality = Quality 0.1 1
, timeUnit = Year 3 1 5
, contextualUnit = Fis 1 5
, clique = Fis 1 5
, exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
, exportSort = ByHierarchy
, exportFilter = [ByBranchSize 2]
......@@ -143,8 +150,8 @@ instance FromJSON Proximity
instance ToJSON Proximity
instance FromJSON TimeUnit
instance ToJSON TimeUnit
instance FromJSON ContextualUnit
instance ToJSON ContextualUnit
instance FromJSON Clique
instance ToJSON Clique
instance FromJSON PhyloLabel
instance ToJSON PhyloLabel
instance FromJSON Tagger
......@@ -155,6 +162,10 @@ instance FromJSON Order
instance ToJSON Order
instance FromJSON Filter
instance ToJSON Filter
instance FromJSON SynchronyScope
instance ToJSON SynchronyScope
instance FromJSON SynchronyStrategy
instance ToJSON SynchronyStrategy
instance FromJSON Synchrony
instance ToJSON Synchrony
instance FromJSON Quality
......@@ -313,17 +324,17 @@ data Filiation = ToParents | ToChilds deriving (Generic, Show)
data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
---------------------------
-- | Frequent Item Set | --
---------------------------
----------------------
-- | Phylo Clique | --
----------------------
-- | Support : Number of Documents where a Clique occurs
type Support = Int
data PhyloCUnit = PhyloCUnit
{ _phyloCUnit_nodes :: Set Ngrams
, _phyloCUnit_support :: Support
, _phyloCUnit_period :: (Date,Date)
data PhyloClique = PhyloClique
{ _phyloClique_nodes :: Set Ngrams
, _phyloClique_support :: Support
, _phyloClique_period :: (Date,Date)
} deriving (Generic,NFData,Show,Eq)
......@@ -372,11 +383,11 @@ data PhyloExport =
makeLenses ''Config
makeLenses ''Proximity
makeLenses ''Quality
makeLenses ''ContextualUnit
makeLenses ''Clique
makeLenses ''PhyloLabel
makeLenses ''TimeUnit
makeLenses ''PhyloFoundations
makeLenses ''PhyloCUnit
makeLenses ''PhyloClique
makeLenses ''Phylo
makeLenses ''PhyloPeriod
makeLenses ''PhyloLevel
......
......@@ -54,16 +54,16 @@ phylo2 = synchronicClustering phylo1
phylo1 :: Phylo
phylo1 = temporalMatching
$ appendGroups fisToGroup 1 phyloFis phyloBase
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
---------------------------------------------
-- | STEP 2 | -- Build the frequent items set
-- | STEP 2 | -- Build the cliques
---------------------------------------------
phyloFis :: Map (Date,Date) [PhyloCUnit]
phyloFis = toPhyloFis docsByPeriods (getContextualUnitSupport $ contextualUnit config) (getContextualUnitSize $ contextualUnit config)
phyloClique :: Map (Date,Date) [PhyloClique]
phyloClique = toPhyloClique phyloBase docsByPeriods
docsByPeriods :: Map (Date,Date) [Document]
......@@ -96,7 +96,7 @@ config =
defaultConfig { phyloName = "Cesar et Cleopatre"
, phyloLevel = 2
, exportFilter = [ByBranchSize 0]
, contextualUnit = Fis 0 0 }
, clique = Fis 0 0 }
docs :: [Document]
......
......@@ -17,7 +17,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloExport where
import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, delete)
import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy)
import Data.Vector (Vector)
......@@ -471,6 +471,7 @@ toPhyloExport phylo = exportToDot phylo
groups :: [PhyloGroup]
groups = traceExportGroups
$ processDynamics
$ map (\g -> g & phylo_groupMeta %~ delete "dynamics")
$ getGroupsFromLevel (phyloLevel $ getConfig phylo)
$ tracePhyloInfo phylo
......
......@@ -16,7 +16,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloMaker where
import Data.List (concat, nub, partition, sort, (++), group)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, singleton)
import Data.Set (size)
import Data.Vector (Vector)
......@@ -63,7 +63,7 @@ toPhylo docs lst conf = traceToPhylo (phyloLevel conf) $
--------------------
appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups :: (a -> Double -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
$ over ( phylo_periods
. traverse
......@@ -76,7 +76,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups)
, f obj pId lvl (length groups) (getRoots phylo)
, f obj(getPhyloThresholdInit phylo) pId lvl (length groups) (getRoots phylo)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
] ) [] phyloCUnit)
else
......@@ -84,27 +84,25 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
phylo
fisToGroup :: PhyloCUnit -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup
fisToGroup fis pId lvl idx fdt coocs =
let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloCUnit_nodes) fdt
cliqueToGroup :: PhyloClique -> Double -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup
cliqueToGroup fis thr pId lvl idx fdt coocs =
let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloClique_nodes) fdt
in PhyloGroup pId lvl idx ""
(fis ^. phyloCUnit_support)
(fis ^. phyloClique_support)
ngrams
(ngramsToCooc ngrams coocs)
(1,[0])
empty
(singleton "thr" [thr])
[] [] [] []
toPhylo1 :: [Document] -> Phylo -> Phylo
toPhylo1 docs phyloBase = temporalMatching
$ appendGroups fisToGroup 1 phyloCUnit phyloBase
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
where
--------------------------------------
phyloCUnit :: Map (Date,Date) [PhyloCUnit]
phyloCUnit = case (contextualUnit $ getConfig phyloBase) of
Fis s s' -> toPhyloFis docs' s s'
MaxClique _ -> undefined
phyloClique :: Map (Date,Date) [PhyloClique]
phyloClique = toPhyloClique phyloBase docs'
--------------------------------------
docs' :: Map (Date,Date) [Document]
docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs
......@@ -117,56 +115,59 @@ toPhylo1 docs phyloBase = temporalMatching
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterFis :: Bool -> Int -> (Int -> [PhyloCUnit] -> [PhyloCUnit]) -> Map (Date, Date) [PhyloCUnit] -> Map (Date, Date) [PhyloCUnit]
filterFis keep thr f m = case keep of
filterClique :: Bool -> Int -> (Int -> [PhyloClique] -> [PhyloClique]) -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterClique keep thr f m = case keep of
False -> map (\l -> f thr l) m
True -> map (\l -> keepFilled (f) thr l) m
-- | To filter Fis with small Support
filterFisBySupport :: Int -> [PhyloCUnit] -> [PhyloCUnit]
filterFisBySupport thr l = filter (\fis -> (fis ^. phyloCUnit_support) >= thr) l
filterCliqueBySupport :: Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySupport thr l = filter (\clq -> (clq ^. phyloClique_support) >= thr) l
-- | To filter Fis with small Clique size
filterFisByClique :: Int -> [PhyloCUnit] -> [PhyloCUnit]
filterFisByClique thr l = filter (\fis -> (size $ fis ^. phyloCUnit_nodes) >= thr) l
filterCliqueBySize :: Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySize thr l = filter (\clq -> (size $ clq ^. phyloClique_nodes) >= thr) l
-- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [PhyloCUnit] -> Map (Date, Date) [PhyloCUnit]
filterFisByNested m =
let fis = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ f' ^. phyloCUnit_nodes) (Set.toList $ f ^. phyloCUnit_nodes)) mem)
filterCliqueByNested :: Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterCliqueByNested m =
let clq = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ f' ^. phyloClique_nodes) (Set.toList $ f ^. phyloClique_nodes)) mem)
then mem
else
let fMax = filter (\f' -> not $ isNested (Set.toList $ f ^. phyloCUnit_nodes) (Set.toList $ f' ^. phyloCUnit_nodes)) mem
let fMax = filter (\f' -> not $ isNested (Set.toList $ f ^. phyloClique_nodes) (Set.toList $ f' ^. phyloClique_nodes)) mem
in fMax ++ [f] ) [] l)
$ elems m
fis' = fis `using` parList rdeepseq
in fromList $ zip (keys m) fis'
clq' = clq `using` parList rdeepseq
in fromList $ zip (keys m) clq'
-- | To transform a time map of docs innto a time map of Fis with some filters
toPhyloFis :: Map (Date, Date) [Document] -> Int -> Int -> Map (Date,Date) [PhyloCUnit]
toPhyloFis phyloDocs support clique =
-- traceFis "Filtered Fis"
filterFisByNested
toPhyloClique :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [PhyloClique]
toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
Fis s s' -> -- traceFis "Filtered Fis"
filterCliqueByNested
-- $ traceFis "Filtered by clique size"
$ filterFis True clique (filterFisByClique)
$ filterClique True s' (filterCliqueBySize)
-- $ traceFis "Filtered by support"
$ filterFis True support (filterFisBySupport)
$ filterClique True s (filterCliqueBySupport)
-- $ traceFis "Unfiltered Fis"
phyloFis
phyloClique
MaxClique _ -> undefined
where
--------------------------------------
phyloFis :: Map (Date,Date) [PhyloCUnit]
phyloFis =
let fis = map (\(prd,docs) -> let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in (prd, map (\f -> PhyloCUnit (fst f) (snd f) prd) lst))
phyloClique :: Map (Date,Date) [PhyloClique]
phyloClique = case (clique $ getConfig phylo) of
Fis _ _ -> let fis = map (\(prd,docs) ->
let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in (prd, map (\f -> PhyloClique (fst f) (snd f) prd) lst))
$ toList phyloDocs
fis' = fis `using` parList rdeepseq
in fromList fis'
MaxClique _ -> undefined
--------------------------------------
......
......@@ -162,42 +162,42 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l)
else f thr l
traceClique :: Map (Date, Date) [PhyloCUnit] -> String
traceClique :: Map (Date, Date) [PhyloClique] -> String
traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where
--------------------------------------
cliques :: [Double]
cliques = sort $ map (fromIntegral . size . _phyloCUnit_nodes) $ concat $ elems mFis
cliques = sort $ map (fromIntegral . size . _phyloClique_nodes) $ concat $ elems mFis
--------------------------------------
traceSupport :: Map (Date, Date) [PhyloCUnit] -> String
traceSupport :: Map (Date, Date) [PhyloClique] -> String
traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where
--------------------------------------
supports :: [Double]
supports = sort $ map (fromIntegral . _phyloCUnit_support) $ concat $ elems mFis
supports = sort $ map (fromIntegral . _phyloClique_support) $ concat $ elems mFis
--------------------------------------
traceFis :: [Char] -> Map (Date, Date) [PhyloCUnit] -> Map (Date, Date) [PhyloCUnit]
traceFis :: [Char] -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
<> "Support : " <> (traceSupport mFis) <> "\n"
<> "Nb Ngrams : " <> (traceClique mFis) <> "\n" ) mFis
-------------------------
-- | Contextual unit | --
-------------------------
---------------
-- | Clique| --
---------------
getContextualUnitSupport :: ContextualUnit -> Int
getContextualUnitSupport unit = case unit of
getCliqueSupport :: Clique -> Int
getCliqueSupport unit = case unit of
Fis s _ -> s
MaxClique _ -> 0
getContextualUnitSize :: ContextualUnit -> Int
getContextualUnitSize unit = case unit of
getCliqueSize :: Clique -> Int
getCliqueSize unit = case unit of
Fis _ s -> s
MaxClique s -> s
......@@ -243,6 +243,9 @@ ngramsToCooc ngrams coocs =
getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex)
getGroupThr :: PhyloGroup -> Double
getGroupThr group = head' "getGroupThr" ((group ^. phylo_groupMeta) ! "thr")
groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup]
groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups
......@@ -311,6 +314,14 @@ getLevels phylo = nub
. phylo_periodLevels ) phylo
getPhyloThresholdInit :: Phylo -> Double
getPhyloThresholdInit phylo = getThresholdInit (phyloProximity (getConfig phylo))
getPhyloThresholdStep :: Phylo -> Double
getPhyloThresholdStep phylo = getThresholdStep (phyloProximity (getConfig phylo))
getConfig :: Phylo -> Config
getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
......
......@@ -19,30 +19,74 @@ import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard)
import Gargantext.Viz.Phylo.PhyloExport (processDynamics)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn, init, all, group, maximum, groupBy)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member, singleton)
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
-- import Debug.Trace (trace)
import qualified Data.Map as Map
import qualified Data.Set as Set
-------------------------
-- | New Level Maker | --
-------------------------
toBranchId :: PhyloGroup -> PhyloBranchId
toBranchId child = ((child ^. phylo_groupLevel) + 1, snd (child ^. phylo_groupBranchId))
mergeBranchIds :: [[Int]] -> [Int]
mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq) ids
where
-- | 2) find the most Up Left ids in the hierarchy of similarity
-- mostUpLeft :: [[Int]] -> [[Int]]
-- mostUpLeft ids' =
-- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
-- inf = (fst . minimum) groupIds
-- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
-- | 1) find the most frequent ids
mostFreq :: [[Int]] -> [[Int]]
mostFreq ids' =
let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
sup = (fst . maximum) groupIds
in map snd $ filter (\gIds -> fst gIds == sup) groupIds
groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches' groups =
-- | run the related component algorithm
let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
$ sortOn (\gs -> fst $ fst $ head' "egos" gs)
$ map (\g -> [getGroupId g]
++ (map fst $ g ^. phylo_groupPeriodParents)
++ (map fst $ g ^. phylo_groupPeriodChilds) ) $ elems groups
-- | first find the related components by inside each ego's period
graph' = map relatedComponents egos
-- | then run it for the all the periods
graph = relatedComponents $ concat (graph' `using` parList rdeepseq)
-- | update each group's branch id
in map (\ids ->
-- intervenir ici
let groups' = elems $ restrictKeys groups (Set.fromList ids)
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl + 1,bId))) groups') graph
-- toBranchId :: PhyloGroup -> PhyloBranchId
-- toBranchId child = ((child ^. phylo_groupLevel) + 1, snd (child ^. phylo_groupBranchId))
getLastThr :: [PhyloGroup] -> Double
getLastThr childs = maximum $ concat $ map (\g -> (g ^. phylo_groupMeta) ! "thr") childs
mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
mergeGroups coocs id mapIds childs =
let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs
in PhyloGroup (fst $ fst id) (snd $ fst id) (snd id) ""
(sum $ map _phylo_groupSupport childs) ngrams
(ngramsToCooc ngrams coocs) (toBranchId (head' "mergeGroups" childs))
empty [] (map (\g -> (getGroupId g, 1)) childs)
(ngramsToCooc ngrams coocs)
((snd $ fst id),(mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) childs))
(singleton "thr" [getLastThr childs]) [] (map (\g -> (getGroupId g, 1)) childs)
(updatePointers $ concat $ map _phylo_groupPeriodParents childs)
(updatePointers $ concat $ map _phylo_groupPeriodChilds childs)
where
......@@ -57,42 +101,47 @@ addPhyloLevel lvl phylo =
%~ (insert (phyloPrd ^. phylo_periodPeriod, lvl) (PhyloLevel (phyloPrd ^. phylo_periodPeriod) lvl empty))) phylo
toNextLevel :: Phylo -> [PhyloGroup] -> Phylo
toNextLevel phylo groups =
toNextLevel' :: Phylo -> [PhyloGroup] -> Phylo
toNextLevel' phylo groups =
let curLvl = getLastLevel phylo
oldGroups = fromList $ map (\g -> (getGroupId g, getLevelParentId g)) groups
newGroups = fromListWith (++)
-- | 5) group the parents by periods
newGroups = concat $ groupsToBranches'
$ fromList $ map (\g -> (getGroupId g, g))
$ foldlWithKey (\acc id groups' ->
-- | 4) create the parent group
let parent = mergeGroups (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [(fst . fst) id]) id oldGroups groups'
in acc ++ [(parent ^. phylo_groupPeriod, [parent])]) []
in acc ++ [parent]) []
-- | 3) group the current groups by parentId
$ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups
newPeriods = fromListWith (++) $ map (\g -> (g ^. phylo_groupPeriod, [g])) newGroups
in traceSynchronyEnd
$ over ( phylo_periods . traverse . phylo_periodLevels . traverse
-- | 6) update each period at curLvl + 1
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == (curLvl + 1)))
-- | 7) by adding the parents
(\phyloLvl ->
if member (phyloLvl ^. phylo_levelPeriod) newGroups
if member (phyloLvl ^. phylo_levelPeriod) newPeriods
then phyloLvl & phylo_levelGroups
.~ fromList (map (\g -> (getGroupId g, g)) $ newGroups ! (phyloLvl ^. phylo_levelPeriod))
.~ fromList (map (\g -> (getGroupId g, g)) $ newPeriods ! (phyloLvl ^. phylo_levelPeriod))
else phyloLvl)
-- | 2) add the curLvl + 1 phyloLevel to the phylo
$ addPhyloLevel (curLvl + 1)
-- | 1) update the current groups (with level parent pointers) in the phylo
$ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo
--------------------
-- | Clustering | --
--------------------
toPairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
toPairs groups = filter (\(g,g') -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))
$ listToCombi' groups
toPairs :: SynchronyStrategy -> [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
toPairs strategy groups = case strategy of
MergeRegularGroups -> pairs
$ filter (\g -> all (== 3) $ (g ^. phylo_groupMeta) ! "dynamics") groups
MergeAllGroups -> pairs groups
where
pairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
pairs gs = filter (\(g,g') -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)) (listToCombi' gs)
toDiamonds :: [PhyloGroup] -> [[PhyloGroup]]
......@@ -111,14 +160,14 @@ toDiamonds groups = foldl' (\acc groups' ->
groupsToEdges :: Proximity -> Synchrony -> Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
groupsToEdges prox sync docs groups =
case sync of
ByProximityThreshold t s -> filter (\(_,w) -> w >= t)
$ toEdges s
$ toPairs groups
ByProximityDistribution s ->
ByProximityThreshold thr sens _ strat ->
filter (\(_,w) -> w >= thr)
$ toEdges sens
$ toPairs strat groups
ByProximityDistribution sens strat ->
let diamonds = sortOn snd
$ toEdges s $ concat
$ map toPairs $ toDiamonds groups
$ toEdges sens $ concat
$ map (\gs -> toPairs strat gs) $ toDiamonds groups
in take (div (length diamonds) 2) diamonds
where
toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)]
......@@ -142,8 +191,8 @@ toParentId :: PhyloGroup -> PhyloGroupId
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex)
reduceBranch :: Proximity -> Synchrony -> Map Date Double -> [PhyloGroup] -> [PhyloGroup]
reduceBranch prox sync docs branch =
reduceGroups :: Proximity -> Synchrony -> Map Date Double -> [PhyloGroup] -> [PhyloGroup]
reduceGroups prox sync docs branch =
-- | 1) reduce a branch as a set of periods & groups
let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
......@@ -159,16 +208,38 @@ reduceBranch prox sync docs branch =
$ toRelatedComponents groups edges) periods
getGroupRealBId :: Double -> PhyloGroup -> [Int]
getGroupRealBId step g =
let nb = round(getGroupThr g / step) + 2
in take nb (snd $ g ^. phylo_groupBranchId)
adjustClustering :: Synchrony -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
adjustClustering sync step branches = case sync of
ByProximityThreshold _ _ scope _ ->
case scope of
SingleBranch -> branches
SiblingBranches -> groupBy (\g g' -> (init $ getGroupRealBId step g) == (init $ getGroupRealBId step g'))
$ sortOn _phylo_groupBranchId $ concat branches
-- SiblingBranches -> elems $ fromListWith (++) $ map (\b -> ((init . snd . _phylo_groupBranchId) $ head' "adjustClustering" b,b)) branches
AllBranches -> [concat branches]
ByProximityDistribution _ _ -> branches
synchronicClustering :: Phylo -> Phylo
synchronicClustering phylo =
let prox = phyloProximity $ getConfig phylo
sync = phyloSynchrony $ getConfig phylo
docs = phylo ^. phylo_timeDocs
branches = map (\branch -> reduceBranch prox sync docs branch)
newBranches = map (\branch -> reduceGroups prox sync docs branch)
$ map processDynamics
$ adjustClustering sync (getPhyloThresholdStep phylo)
$ phyloToLastBranches
$ traceSynchronyStart phylo
branches' = branches `using` parList rdeepseq
in toNextLevel phylo $ concat branches'
newBranches' = newBranches `using` parList rdeepseq
in toNextLevel' phylo $ concat newBranches'
----------------
......
......@@ -16,7 +16,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.TemporalMatching where
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, or)
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, intersectionWith, findWithDefault, keys, (!), filterWithKey)
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, intersectionWith, findWithDefault, keys, (!), filterWithKey, singleton)
import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo
......@@ -285,6 +285,8 @@ reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
reduceFrequency frequency branches =
restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
updateThr thr branches = map (\b -> map (\g -> g & phylo_groupMeta .~ (singleton "thr" [thr])) b) branches
seqMatching :: Proximity -> Double -> Map Int Double -> Int -> Double -> Int -> Map Date Double -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
seqMatching proximity beta frequency minBranch egoThr frame docs periods done ego rest =
......@@ -320,7 +322,10 @@ seqMatching proximity beta frequency minBranch egoThr frame docs periods done eg
let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ phyloBranchMatching frame periods proximity egoThr docs (fst ego)
branches' = branches `using` parList rdeepseq
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch) branches'
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
$ if (length branches' > 1)
then updateThr egoThr branches'
else branches'
--------------------------------------
quality' :: Double
quality' = toPhyloQuality' beta frequency
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment