Commit f49465e4 authored by qlobbe's avatar qlobbe

add adjustable clustering by threshold

parent e3eb8220
Pipeline #610 failed with stage
...@@ -67,12 +67,19 @@ data Proximity = ...@@ -67,12 +67,19 @@ data Proximity =
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data SynchronyScope = SingleBranch | SiblingBranches | AllBranches deriving (Show,Generic,Eq)
data SynchronyStrategy = MergeRegularGroups | MergeAllGroups deriving (Show,Generic,Eq)
data Synchrony = data Synchrony =
ByProximityThreshold ByProximityThreshold
{ _bpt_threshold :: Double { _bpt_threshold :: Double
, _bpt_sensibility :: Double} , _bpt_sensibility :: Double
, _bpt_scope :: SynchronyScope
, _bpt_strategy :: SynchronyStrategy }
| ByProximityDistribution | ByProximityDistribution
{ _bpd_sensibility :: Double} { _bpd_sensibility :: Double
, _bpd_strategy :: SynchronyStrategy }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
...@@ -84,12 +91,12 @@ data TimeUnit = ...@@ -84,12 +91,12 @@ data TimeUnit =
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data ContextualUnit = data Clique =
Fis Fis
{ _fis_support :: Int { _fis_support :: Int
, _fis_size :: Int } , _fis_size :: Int }
| MaxClique | MaxClique
{ _clique_size :: Int } { _mcl_size :: Int }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
...@@ -110,7 +117,7 @@ data Config = ...@@ -110,7 +117,7 @@ data Config =
, phyloSynchrony :: Synchrony , phyloSynchrony :: Synchrony
, phyloQuality :: Quality , phyloQuality :: Quality
, timeUnit :: TimeUnit , timeUnit :: TimeUnit
, contextualUnit :: ContextualUnit , clique :: Clique
, exportLabel :: [PhyloLabel] , exportLabel :: [PhyloLabel]
, exportSort :: Sort , exportSort :: Sort
, exportFilter :: [Filter] , exportFilter :: [Filter]
...@@ -126,10 +133,10 @@ defaultConfig = ...@@ -126,10 +133,10 @@ defaultConfig =
, phyloName = pack "Default Phylo" , phyloName = pack "Default Phylo"
, phyloLevel = 2 , phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 0 0.1 , phyloProximity = WeightedLogJaccard 10 0 0.1
, phyloSynchrony = ByProximityDistribution 0 , phyloSynchrony = ByProximityThreshold 0.1 0 SiblingBranches MergeAllGroups
, phyloQuality = Quality 0.2 4 , phyloQuality = Quality 0.1 1
, timeUnit = Year 3 1 5 , timeUnit = Year 3 1 5
, contextualUnit = Fis 1 5 , clique = Fis 1 5
, exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2] , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
, exportSort = ByHierarchy , exportSort = ByHierarchy
, exportFilter = [ByBranchSize 2] , exportFilter = [ByBranchSize 2]
...@@ -143,8 +150,8 @@ instance FromJSON Proximity ...@@ -143,8 +150,8 @@ instance FromJSON Proximity
instance ToJSON Proximity instance ToJSON Proximity
instance FromJSON TimeUnit instance FromJSON TimeUnit
instance ToJSON TimeUnit instance ToJSON TimeUnit
instance FromJSON ContextualUnit instance FromJSON Clique
instance ToJSON ContextualUnit instance ToJSON Clique
instance FromJSON PhyloLabel instance FromJSON PhyloLabel
instance ToJSON PhyloLabel instance ToJSON PhyloLabel
instance FromJSON Tagger instance FromJSON Tagger
...@@ -155,6 +162,10 @@ instance FromJSON Order ...@@ -155,6 +162,10 @@ instance FromJSON Order
instance ToJSON Order instance ToJSON Order
instance FromJSON Filter instance FromJSON Filter
instance ToJSON Filter instance ToJSON Filter
instance FromJSON SynchronyScope
instance ToJSON SynchronyScope
instance FromJSON SynchronyStrategy
instance ToJSON SynchronyStrategy
instance FromJSON Synchrony instance FromJSON Synchrony
instance ToJSON Synchrony instance ToJSON Synchrony
instance FromJSON Quality instance FromJSON Quality
...@@ -313,17 +324,17 @@ data Filiation = ToParents | ToChilds deriving (Generic, Show) ...@@ -313,17 +324,17 @@ data Filiation = ToParents | ToChilds deriving (Generic, Show)
data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show) data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
--------------------------- ----------------------
-- | Frequent Item Set | -- -- | Phylo Clique | --
--------------------------- ----------------------
-- | Support : Number of Documents where a Clique occurs -- | Support : Number of Documents where a Clique occurs
type Support = Int type Support = Int
data PhyloCUnit = PhyloCUnit data PhyloClique = PhyloClique
{ _phyloCUnit_nodes :: Set Ngrams { _phyloClique_nodes :: Set Ngrams
, _phyloCUnit_support :: Support , _phyloClique_support :: Support
, _phyloCUnit_period :: (Date,Date) , _phyloClique_period :: (Date,Date)
} deriving (Generic,NFData,Show,Eq) } deriving (Generic,NFData,Show,Eq)
...@@ -372,11 +383,11 @@ data PhyloExport = ...@@ -372,11 +383,11 @@ data PhyloExport =
makeLenses ''Config makeLenses ''Config
makeLenses ''Proximity makeLenses ''Proximity
makeLenses ''Quality makeLenses ''Quality
makeLenses ''ContextualUnit makeLenses ''Clique
makeLenses ''PhyloLabel makeLenses ''PhyloLabel
makeLenses ''TimeUnit makeLenses ''TimeUnit
makeLenses ''PhyloFoundations makeLenses ''PhyloFoundations
makeLenses ''PhyloCUnit makeLenses ''PhyloClique
makeLenses ''Phylo makeLenses ''Phylo
makeLenses ''PhyloPeriod makeLenses ''PhyloPeriod
makeLenses ''PhyloLevel makeLenses ''PhyloLevel
......
...@@ -54,16 +54,16 @@ phylo2 = synchronicClustering phylo1 ...@@ -54,16 +54,16 @@ phylo2 = synchronicClustering phylo1
phylo1 :: Phylo phylo1 :: Phylo
phylo1 = temporalMatching phylo1 = temporalMatching
$ appendGroups fisToGroup 1 phyloFis phyloBase $ appendGroups cliqueToGroup 1 phyloClique phyloBase
--------------------------------------------- ---------------------------------------------
-- | STEP 2 | -- Build the frequent items set -- | STEP 2 | -- Build the cliques
--------------------------------------------- ---------------------------------------------
phyloFis :: Map (Date,Date) [PhyloCUnit] phyloClique :: Map (Date,Date) [PhyloClique]
phyloFis = toPhyloFis docsByPeriods (getContextualUnitSupport $ contextualUnit config) (getContextualUnitSize $ contextualUnit config) phyloClique = toPhyloClique phyloBase docsByPeriods
docsByPeriods :: Map (Date,Date) [Document] docsByPeriods :: Map (Date,Date) [Document]
...@@ -96,7 +96,7 @@ config = ...@@ -96,7 +96,7 @@ config =
defaultConfig { phyloName = "Cesar et Cleopatre" defaultConfig { phyloName = "Cesar et Cleopatre"
, phyloLevel = 2 , phyloLevel = 2
, exportFilter = [ByBranchSize 0] , exportFilter = [ByBranchSize 0]
, contextualUnit = Fis 0 0 } , clique = Fis 0 0 }
docs :: [Document] docs :: [Document]
......
...@@ -17,7 +17,7 @@ Portability : POSIX ...@@ -17,7 +17,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloExport where module Gargantext.Viz.Phylo.PhyloExport where
import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList) import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, delete)
import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy) import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy)
import Data.Vector (Vector) import Data.Vector (Vector)
...@@ -471,6 +471,7 @@ toPhyloExport phylo = exportToDot phylo ...@@ -471,6 +471,7 @@ toPhyloExport phylo = exportToDot phylo
groups :: [PhyloGroup] groups :: [PhyloGroup]
groups = traceExportGroups groups = traceExportGroups
$ processDynamics $ processDynamics
$ map (\g -> g & phylo_groupMeta %~ delete "dynamics")
$ getGroupsFromLevel (phyloLevel $ getConfig phylo) $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
$ tracePhyloInfo phylo $ tracePhyloInfo phylo
......
...@@ -16,7 +16,7 @@ Portability : POSIX ...@@ -16,7 +16,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloMaker where module Gargantext.Viz.Phylo.PhyloMaker where
import Data.List (concat, nub, partition, sort, (++), group) import Data.List (concat, nub, partition, sort, (++), group)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys) import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, singleton)
import Data.Set (size) import Data.Set (size)
import Data.Vector (Vector) import Data.Vector (Vector)
...@@ -63,7 +63,7 @@ toPhylo docs lst conf = traceToPhylo (phyloLevel conf) $ ...@@ -63,7 +63,7 @@ toPhylo docs lst conf = traceToPhylo (phyloLevel conf) $
-------------------- --------------------
appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo appendGroups :: (a -> Double -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n") appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
$ over ( phylo_periods $ over ( phylo_periods
. traverse . traverse
...@@ -76,7 +76,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co ...@@ -76,7 +76,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
in phyloLvl in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj -> & phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups) groups ++ [ (((pId,lvl),length groups)
, f obj pId lvl (length groups) (getRoots phylo) , f obj(getPhyloThresholdInit phylo) pId lvl (length groups) (getRoots phylo)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId])) (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
] ) [] phyloCUnit) ] ) [] phyloCUnit)
else else
...@@ -84,27 +84,25 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co ...@@ -84,27 +84,25 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
phylo phylo
fisToGroup :: PhyloCUnit -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup cliqueToGroup :: PhyloClique -> Double -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup
fisToGroup fis pId lvl idx fdt coocs = cliqueToGroup fis thr pId lvl idx fdt coocs =
let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloCUnit_nodes) fdt let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloClique_nodes) fdt
in PhyloGroup pId lvl idx "" in PhyloGroup pId lvl idx ""
(fis ^. phyloCUnit_support) (fis ^. phyloClique_support)
ngrams ngrams
(ngramsToCooc ngrams coocs) (ngramsToCooc ngrams coocs)
(1,[0]) (1,[0])
empty (singleton "thr" [thr])
[] [] [] [] [] [] [] []
toPhylo1 :: [Document] -> Phylo -> Phylo toPhylo1 :: [Document] -> Phylo -> Phylo
toPhylo1 docs phyloBase = temporalMatching toPhylo1 docs phyloBase = temporalMatching
$ appendGroups fisToGroup 1 phyloCUnit phyloBase $ appendGroups cliqueToGroup 1 phyloClique phyloBase
where where
-------------------------------------- --------------------------------------
phyloCUnit :: Map (Date,Date) [PhyloCUnit] phyloClique :: Map (Date,Date) [PhyloClique]
phyloCUnit = case (contextualUnit $ getConfig phyloBase) of phyloClique = toPhyloClique phyloBase docs'
Fis s s' -> toPhyloFis docs' s s'
MaxClique _ -> undefined
-------------------------------------- --------------------------------------
docs' :: Map (Date,Date) [Document] docs' :: Map (Date,Date) [Document]
docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs
...@@ -117,56 +115,59 @@ toPhylo1 docs phyloBase = temporalMatching ...@@ -117,56 +115,59 @@ toPhylo1 docs phyloBase = temporalMatching
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False) -- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterFis :: Bool -> Int -> (Int -> [PhyloCUnit] -> [PhyloCUnit]) -> Map (Date, Date) [PhyloCUnit] -> Map (Date, Date) [PhyloCUnit] filterClique :: Bool -> Int -> (Int -> [PhyloClique] -> [PhyloClique]) -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterFis keep thr f m = case keep of filterClique keep thr f m = case keep of
False -> map (\l -> f thr l) m False -> map (\l -> f thr l) m
True -> map (\l -> keepFilled (f) thr l) m True -> map (\l -> keepFilled (f) thr l) m
-- | To filter Fis with small Support -- | To filter Fis with small Support
filterFisBySupport :: Int -> [PhyloCUnit] -> [PhyloCUnit] filterCliqueBySupport :: Int -> [PhyloClique] -> [PhyloClique]
filterFisBySupport thr l = filter (\fis -> (fis ^. phyloCUnit_support) >= thr) l filterCliqueBySupport thr l = filter (\clq -> (clq ^. phyloClique_support) >= thr) l
-- | To filter Fis with small Clique size -- | To filter Fis with small Clique size
filterFisByClique :: Int -> [PhyloCUnit] -> [PhyloCUnit] filterCliqueBySize :: Int -> [PhyloClique] -> [PhyloClique]
filterFisByClique thr l = filter (\fis -> (size $ fis ^. phyloCUnit_nodes) >= thr) l filterCliqueBySize thr l = filter (\clq -> (size $ clq ^. phyloClique_nodes) >= thr) l
-- | To filter nested Fis -- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [PhyloCUnit] -> Map (Date, Date) [PhyloCUnit] filterCliqueByNested :: Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterFisByNested m = filterCliqueByNested m =
let fis = map (\l -> let clq = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ f' ^. phyloCUnit_nodes) (Set.toList $ f ^. phyloCUnit_nodes)) mem) foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ f' ^. phyloClique_nodes) (Set.toList $ f ^. phyloClique_nodes)) mem)
then mem then mem
else else
let fMax = filter (\f' -> not $ isNested (Set.toList $ f ^. phyloCUnit_nodes) (Set.toList $ f' ^. phyloCUnit_nodes)) mem let fMax = filter (\f' -> not $ isNested (Set.toList $ f ^. phyloClique_nodes) (Set.toList $ f' ^. phyloClique_nodes)) mem
in fMax ++ [f] ) [] l) in fMax ++ [f] ) [] l)
$ elems m $ elems m
fis' = fis `using` parList rdeepseq clq' = clq `using` parList rdeepseq
in fromList $ zip (keys m) fis' in fromList $ zip (keys m) clq'
-- | To transform a time map of docs innto a time map of Fis with some filters -- | To transform a time map of docs innto a time map of Fis with some filters
toPhyloFis :: Map (Date, Date) [Document] -> Int -> Int -> Map (Date,Date) [PhyloCUnit] toPhyloClique :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [PhyloClique]
toPhyloFis phyloDocs support clique = toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
-- traceFis "Filtered Fis" Fis s s' -> -- traceFis "Filtered Fis"
filterFisByNested filterCliqueByNested
-- $ traceFis "Filtered by clique size" -- $ traceFis "Filtered by clique size"
$ filterFis True clique (filterFisByClique) $ filterClique True s' (filterCliqueBySize)
-- $ traceFis "Filtered by support" -- $ traceFis "Filtered by support"
$ filterFis True support (filterFisBySupport) $ filterClique True s (filterCliqueBySupport)
-- $ traceFis "Unfiltered Fis" -- $ traceFis "Unfiltered Fis"
phyloFis phyloClique
MaxClique _ -> undefined
where where
-------------------------------------- --------------------------------------
phyloFis :: Map (Date,Date) [PhyloCUnit] phyloClique :: Map (Date,Date) [PhyloClique]
phyloFis = phyloClique = case (clique $ getConfig phylo) of
let fis = map (\(prd,docs) -> let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs) Fis _ _ -> let fis = map (\(prd,docs) ->
in (prd, map (\f -> PhyloCUnit (fst f) (snd f) prd) lst)) let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in (prd, map (\f -> PhyloClique (fst f) (snd f) prd) lst))
$ toList phyloDocs $ toList phyloDocs
fis' = fis `using` parList rdeepseq fis' = fis `using` parList rdeepseq
in fromList fis' in fromList fis'
MaxClique _ -> undefined
-------------------------------------- --------------------------------------
......
...@@ -162,42 +162,42 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l) ...@@ -162,42 +162,42 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l)
else f thr l else f thr l
traceClique :: Map (Date, Date) [PhyloCUnit] -> String traceClique :: Map (Date, Date) [PhyloClique] -> String
traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6] traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where where
-------------------------------------- --------------------------------------
cliques :: [Double] cliques :: [Double]
cliques = sort $ map (fromIntegral . size . _phyloCUnit_nodes) $ concat $ elems mFis cliques = sort $ map (fromIntegral . size . _phyloClique_nodes) $ concat $ elems mFis
-------------------------------------- --------------------------------------
traceSupport :: Map (Date, Date) [PhyloCUnit] -> String traceSupport :: Map (Date, Date) [PhyloClique] -> String
traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6] traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where where
-------------------------------------- --------------------------------------
supports :: [Double] supports :: [Double]
supports = sort $ map (fromIntegral . _phyloCUnit_support) $ concat $ elems mFis supports = sort $ map (fromIntegral . _phyloClique_support) $ concat $ elems mFis
-------------------------------------- --------------------------------------
traceFis :: [Char] -> Map (Date, Date) [PhyloCUnit] -> Map (Date, Date) [PhyloCUnit] traceFis :: [Char] -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n" traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
<> "Support : " <> (traceSupport mFis) <> "\n" <> "Support : " <> (traceSupport mFis) <> "\n"
<> "Nb Ngrams : " <> (traceClique mFis) <> "\n" ) mFis <> "Nb Ngrams : " <> (traceClique mFis) <> "\n" ) mFis
------------------------- ---------------
-- | Contextual unit | -- -- | Clique| --
------------------------- ---------------
getContextualUnitSupport :: ContextualUnit -> Int getCliqueSupport :: Clique -> Int
getContextualUnitSupport unit = case unit of getCliqueSupport unit = case unit of
Fis s _ -> s Fis s _ -> s
MaxClique _ -> 0 MaxClique _ -> 0
getContextualUnitSize :: ContextualUnit -> Int getCliqueSize :: Clique -> Int
getContextualUnitSize unit = case unit of getCliqueSize unit = case unit of
Fis _ s -> s Fis _ s -> s
MaxClique s -> s MaxClique s -> s
...@@ -243,6 +243,9 @@ ngramsToCooc ngrams coocs = ...@@ -243,6 +243,9 @@ ngramsToCooc ngrams coocs =
getGroupId :: PhyloGroup -> PhyloGroupId getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex) getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex)
getGroupThr :: PhyloGroup -> Double
getGroupThr group = head' "getGroupThr" ((group ^. phylo_groupMeta) ! "thr")
groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup] groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup]
groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups
...@@ -311,6 +314,14 @@ getLevels phylo = nub ...@@ -311,6 +314,14 @@ getLevels phylo = nub
. phylo_periodLevels ) phylo . phylo_periodLevels ) phylo
getPhyloThresholdInit :: Phylo -> Double
getPhyloThresholdInit phylo = getThresholdInit (phyloProximity (getConfig phylo))
getPhyloThresholdStep :: Phylo -> Double
getPhyloThresholdStep phylo = getThresholdStep (phyloProximity (getConfig phylo))
getConfig :: Phylo -> Config getConfig :: Phylo -> Config
getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
......
...@@ -19,30 +19,74 @@ import Gargantext.Prelude ...@@ -19,30 +19,74 @@ import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard) import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard)
import Gargantext.Viz.Phylo.PhyloExport (processDynamics)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn) import Data.List ((++), null, intersect, nub, concat, sort, sortOn, init, all, group, maximum, groupBy)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member) import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member, singleton)
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
-- import Debug.Trace (trace)
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set
------------------------- -------------------------
-- | New Level Maker | -- -- | New Level Maker | --
------------------------- -------------------------
toBranchId :: PhyloGroup -> PhyloBranchId mergeBranchIds :: [[Int]] -> [Int]
toBranchId child = ((child ^. phylo_groupLevel) + 1, snd (child ^. phylo_groupBranchId)) mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq) ids
where
-- | 2) find the most Up Left ids in the hierarchy of similarity
-- mostUpLeft :: [[Int]] -> [[Int]]
-- mostUpLeft ids' =
-- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
-- inf = (fst . minimum) groupIds
-- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
-- | 1) find the most frequent ids
mostFreq :: [[Int]] -> [[Int]]
mostFreq ids' =
let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
sup = (fst . maximum) groupIds
in map snd $ filter (\gIds -> fst gIds == sup) groupIds
groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches' groups =
-- | run the related component algorithm
let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
$ sortOn (\gs -> fst $ fst $ head' "egos" gs)
$ map (\g -> [getGroupId g]
++ (map fst $ g ^. phylo_groupPeriodParents)
++ (map fst $ g ^. phylo_groupPeriodChilds) ) $ elems groups
-- | first find the related components by inside each ego's period
graph' = map relatedComponents egos
-- | then run it for the all the periods
graph = relatedComponents $ concat (graph' `using` parList rdeepseq)
-- | update each group's branch id
in map (\ids ->
-- intervenir ici
let groups' = elems $ restrictKeys groups (Set.fromList ids)
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl + 1,bId))) groups') graph
-- toBranchId :: PhyloGroup -> PhyloBranchId
-- toBranchId child = ((child ^. phylo_groupLevel) + 1, snd (child ^. phylo_groupBranchId))
getLastThr :: [PhyloGroup] -> Double
getLastThr childs = maximum $ concat $ map (\g -> (g ^. phylo_groupMeta) ! "thr") childs
mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
mergeGroups coocs id mapIds childs = mergeGroups coocs id mapIds childs =
let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs
in PhyloGroup (fst $ fst id) (snd $ fst id) (snd id) "" in PhyloGroup (fst $ fst id) (snd $ fst id) (snd id) ""
(sum $ map _phylo_groupSupport childs) ngrams (sum $ map _phylo_groupSupport childs) ngrams
(ngramsToCooc ngrams coocs) (toBranchId (head' "mergeGroups" childs)) (ngramsToCooc ngrams coocs)
empty [] (map (\g -> (getGroupId g, 1)) childs) ((snd $ fst id),(mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) childs))
(singleton "thr" [getLastThr childs]) [] (map (\g -> (getGroupId g, 1)) childs)
(updatePointers $ concat $ map _phylo_groupPeriodParents childs) (updatePointers $ concat $ map _phylo_groupPeriodParents childs)
(updatePointers $ concat $ map _phylo_groupPeriodChilds childs) (updatePointers $ concat $ map _phylo_groupPeriodChilds childs)
where where
...@@ -57,42 +101,47 @@ addPhyloLevel lvl phylo = ...@@ -57,42 +101,47 @@ addPhyloLevel lvl phylo =
%~ (insert (phyloPrd ^. phylo_periodPeriod, lvl) (PhyloLevel (phyloPrd ^. phylo_periodPeriod) lvl empty))) phylo %~ (insert (phyloPrd ^. phylo_periodPeriod, lvl) (PhyloLevel (phyloPrd ^. phylo_periodPeriod) lvl empty))) phylo
toNextLevel :: Phylo -> [PhyloGroup] -> Phylo toNextLevel' :: Phylo -> [PhyloGroup] -> Phylo
toNextLevel phylo groups = toNextLevel' phylo groups =
let curLvl = getLastLevel phylo let curLvl = getLastLevel phylo
oldGroups = fromList $ map (\g -> (getGroupId g, getLevelParentId g)) groups oldGroups = fromList $ map (\g -> (getGroupId g, getLevelParentId g)) groups
newGroups = fromListWith (++) newGroups = concat $ groupsToBranches'
-- | 5) group the parents by periods $ fromList $ map (\g -> (getGroupId g, g))
$ foldlWithKey (\acc id groups' -> $ foldlWithKey (\acc id groups' ->
-- | 4) create the parent group -- | 4) create the parent group
let parent = mergeGroups (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [(fst . fst) id]) id oldGroups groups' let parent = mergeGroups (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [(fst . fst) id]) id oldGroups groups'
in acc ++ [(parent ^. phylo_groupPeriod, [parent])]) [] in acc ++ [parent]) []
-- | 3) group the current groups by parentId -- | 3) group the current groups by parentId
$ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups $ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups
newPeriods = fromListWith (++) $ map (\g -> (g ^. phylo_groupPeriod, [g])) newGroups
in traceSynchronyEnd in traceSynchronyEnd
$ over ( phylo_periods . traverse . phylo_periodLevels . traverse $ over ( phylo_periods . traverse . phylo_periodLevels . traverse
-- | 6) update each period at curLvl + 1 -- | 6) update each period at curLvl + 1
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == (curLvl + 1))) . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == (curLvl + 1)))
-- | 7) by adding the parents -- | 7) by adding the parents
(\phyloLvl -> (\phyloLvl ->
if member (phyloLvl ^. phylo_levelPeriod) newGroups if member (phyloLvl ^. phylo_levelPeriod) newPeriods
then phyloLvl & phylo_levelGroups then phyloLvl & phylo_levelGroups
.~ fromList (map (\g -> (getGroupId g, g)) $ newGroups ! (phyloLvl ^. phylo_levelPeriod)) .~ fromList (map (\g -> (getGroupId g, g)) $ newPeriods ! (phyloLvl ^. phylo_levelPeriod))
else phyloLvl) else phyloLvl)
-- | 2) add the curLvl + 1 phyloLevel to the phylo -- | 2) add the curLvl + 1 phyloLevel to the phylo
$ addPhyloLevel (curLvl + 1) $ addPhyloLevel (curLvl + 1)
-- | 1) update the current groups (with level parent pointers) in the phylo -- | 1) update the current groups (with level parent pointers) in the phylo
$ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo $ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo
-------------------- --------------------
-- | Clustering | -- -- | Clustering | --
-------------------- --------------------
toPairs :: SynchronyStrategy -> [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
toPairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)] toPairs strategy groups = case strategy of
toPairs groups = filter (\(g,g') -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)) MergeRegularGroups -> pairs
$ listToCombi' groups $ filter (\g -> all (== 3) $ (g ^. phylo_groupMeta) ! "dynamics") groups
MergeAllGroups -> pairs groups
where
pairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
pairs gs = filter (\(g,g') -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)) (listToCombi' gs)
toDiamonds :: [PhyloGroup] -> [[PhyloGroup]] toDiamonds :: [PhyloGroup] -> [[PhyloGroup]]
...@@ -111,14 +160,14 @@ toDiamonds groups = foldl' (\acc groups' -> ...@@ -111,14 +160,14 @@ toDiamonds groups = foldl' (\acc groups' ->
groupsToEdges :: Proximity -> Synchrony -> Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] groupsToEdges :: Proximity -> Synchrony -> Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
groupsToEdges prox sync docs groups = groupsToEdges prox sync docs groups =
case sync of case sync of
ByProximityThreshold t s -> filter (\(_,w) -> w >= t) ByProximityThreshold thr sens _ strat ->
$ toEdges s filter (\(_,w) -> w >= thr)
$ toPairs groups $ toEdges sens
$ toPairs strat groups
ByProximityDistribution s -> ByProximityDistribution sens strat ->
let diamonds = sortOn snd let diamonds = sortOn snd
$ toEdges s $ concat $ toEdges sens $ concat
$ map toPairs $ toDiamonds groups $ map (\gs -> toPairs strat gs) $ toDiamonds groups
in take (div (length diamonds) 2) diamonds in take (div (length diamonds) 2) diamonds
where where
toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)] toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)]
...@@ -142,8 +191,8 @@ toParentId :: PhyloGroup -> PhyloGroupId ...@@ -142,8 +191,8 @@ toParentId :: PhyloGroup -> PhyloGroupId
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex) toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex)
reduceBranch :: Proximity -> Synchrony -> Map Date Double -> [PhyloGroup] -> [PhyloGroup] reduceGroups :: Proximity -> Synchrony -> Map Date Double -> [PhyloGroup] -> [PhyloGroup]
reduceBranch prox sync docs branch = reduceGroups prox sync docs branch =
-- | 1) reduce a branch as a set of periods & groups -- | 1) reduce a branch as a set of periods & groups
let periods = fromListWith (++) let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch $ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
...@@ -159,16 +208,38 @@ reduceBranch prox sync docs branch = ...@@ -159,16 +208,38 @@ reduceBranch prox sync docs branch =
$ toRelatedComponents groups edges) periods $ toRelatedComponents groups edges) periods
getGroupRealBId :: Double -> PhyloGroup -> [Int]
getGroupRealBId step g =
let nb = round(getGroupThr g / step) + 2
in take nb (snd $ g ^. phylo_groupBranchId)
adjustClustering :: Synchrony -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
adjustClustering sync step branches = case sync of
ByProximityThreshold _ _ scope _ ->
case scope of
SingleBranch -> branches
SiblingBranches -> groupBy (\g g' -> (init $ getGroupRealBId step g) == (init $ getGroupRealBId step g'))
$ sortOn _phylo_groupBranchId $ concat branches
-- SiblingBranches -> elems $ fromListWith (++) $ map (\b -> ((init . snd . _phylo_groupBranchId) $ head' "adjustClustering" b,b)) branches
AllBranches -> [concat branches]
ByProximityDistribution _ _ -> branches
synchronicClustering :: Phylo -> Phylo synchronicClustering :: Phylo -> Phylo
synchronicClustering phylo = synchronicClustering phylo =
let prox = phyloProximity $ getConfig phylo let prox = phyloProximity $ getConfig phylo
sync = phyloSynchrony $ getConfig phylo sync = phyloSynchrony $ getConfig phylo
docs = phylo ^. phylo_timeDocs docs = phylo ^. phylo_timeDocs
branches = map (\branch -> reduceBranch prox sync docs branch) newBranches = map (\branch -> reduceGroups prox sync docs branch)
$ map processDynamics
$ adjustClustering sync (getPhyloThresholdStep phylo)
$ phyloToLastBranches $ phyloToLastBranches
$ traceSynchronyStart phylo $ traceSynchronyStart phylo
branches' = branches `using` parList rdeepseq newBranches' = newBranches `using` parList rdeepseq
in toNextLevel phylo $ concat branches' in toNextLevel' phylo $ concat newBranches'
---------------- ----------------
......
...@@ -16,7 +16,7 @@ Portability : POSIX ...@@ -16,7 +16,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.TemporalMatching where module Gargantext.Viz.Phylo.TemporalMatching where
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, or) import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, or)
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, intersectionWith, findWithDefault, keys, (!), filterWithKey) import Data.Map (Map, fromList, elems, restrictKeys, unionWith, intersectionWith, findWithDefault, keys, (!), filterWithKey, singleton)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
...@@ -285,6 +285,8 @@ reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double ...@@ -285,6 +285,8 @@ reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
reduceFrequency frequency branches = reduceFrequency frequency branches =
restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches) restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
updateThr thr branches = map (\b -> map (\g -> g & phylo_groupMeta .~ (singleton "thr" [thr])) b) branches
seqMatching :: Proximity -> Double -> Map Int Double -> Int -> Double -> Int -> Map Date Double -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)] seqMatching :: Proximity -> Double -> Map Int Double -> Int -> Double -> Int -> Map Date Double -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
seqMatching proximity beta frequency minBranch egoThr frame docs periods done ego rest = seqMatching proximity beta frequency minBranch egoThr frame docs periods done ego rest =
...@@ -320,7 +322,10 @@ seqMatching proximity beta frequency minBranch egoThr frame docs periods done eg ...@@ -320,7 +322,10 @@ seqMatching proximity beta frequency minBranch egoThr frame docs periods done eg
let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ phyloBranchMatching frame periods proximity egoThr docs (fst ego) $ phyloBranchMatching frame periods proximity egoThr docs (fst ego)
branches' = branches `using` parList rdeepseq branches' = branches `using` parList rdeepseq
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch) branches' in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
$ if (length branches' > 1)
then updateThr egoThr branches'
else branches'
-------------------------------------- --------------------------------------
quality' :: Double quality' :: Double
quality' = toPhyloQuality' beta frequency quality' = toPhyloQuality' beta frequency
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment