Commit f49465e4 authored by qlobbe's avatar qlobbe

add adjustable clustering by threshold

parent e3eb8220
......@@ -67,12 +67,19 @@ data Proximity =
deriving (Show,Generic,Eq)
data SynchronyScope = SingleBranch | SiblingBranches | AllBranches deriving (Show,Generic,Eq)
data SynchronyStrategy = MergeRegularGroups | MergeAllGroups deriving (Show,Generic,Eq)
data Synchrony =
ByProximityThreshold
ByProximityThreshold
{ _bpt_threshold :: Double
, _bpt_sensibility :: Double}
, _bpt_sensibility :: Double
, _bpt_scope :: SynchronyScope
, _bpt_strategy :: SynchronyStrategy }
| ByProximityDistribution
{ _bpd_sensibility :: Double}
{ _bpd_sensibility :: Double
, _bpd_strategy :: SynchronyStrategy }
deriving (Show,Generic,Eq)
......@@ -84,12 +91,12 @@ data TimeUnit =
deriving (Show,Generic,Eq)
data ContextualUnit =
data Clique =
Fis
{ _fis_support :: Int
, _fis_size :: Int }
| MaxClique
{ _clique_size :: Int }
{ _mcl_size :: Int }
deriving (Show,Generic,Eq)
......@@ -110,7 +117,7 @@ data Config =
, phyloSynchrony :: Synchrony
, phyloQuality :: Quality
, timeUnit :: TimeUnit
, contextualUnit :: ContextualUnit
, clique :: Clique
, exportLabel :: [PhyloLabel]
, exportSort :: Sort
, exportFilter :: [Filter]
......@@ -126,10 +133,10 @@ defaultConfig =
, phyloName = pack "Default Phylo"
, phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 0 0.1
, phyloSynchrony = ByProximityDistribution 0
, phyloQuality = Quality 0.2 4
, phyloSynchrony = ByProximityThreshold 0.1 0 SiblingBranches MergeAllGroups
, phyloQuality = Quality 0.1 1
, timeUnit = Year 3 1 5
, contextualUnit = Fis 1 5
, clique = Fis 1 5
, exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
, exportSort = ByHierarchy
, exportFilter = [ByBranchSize 2]
......@@ -143,8 +150,8 @@ instance FromJSON Proximity
instance ToJSON Proximity
instance FromJSON TimeUnit
instance ToJSON TimeUnit
instance FromJSON ContextualUnit
instance ToJSON ContextualUnit
instance FromJSON Clique
instance ToJSON Clique
instance FromJSON PhyloLabel
instance ToJSON PhyloLabel
instance FromJSON Tagger
......@@ -155,6 +162,10 @@ instance FromJSON Order
instance ToJSON Order
instance FromJSON Filter
instance ToJSON Filter
instance FromJSON SynchronyScope
instance ToJSON SynchronyScope
instance FromJSON SynchronyStrategy
instance ToJSON SynchronyStrategy
instance FromJSON Synchrony
instance ToJSON Synchrony
instance FromJSON Quality
......@@ -313,17 +324,17 @@ data Filiation = ToParents | ToChilds deriving (Generic, Show)
data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
---------------------------
-- | Frequent Item Set | --
---------------------------
----------------------
-- | Phylo Clique | --
----------------------
-- | Support : Number of Documents where a Clique occurs
type Support = Int
data PhyloCUnit = PhyloCUnit
{ _phyloCUnit_nodes :: Set Ngrams
, _phyloCUnit_support :: Support
, _phyloCUnit_period :: (Date,Date)
data PhyloClique = PhyloClique
{ _phyloClique_nodes :: Set Ngrams
, _phyloClique_support :: Support
, _phyloClique_period :: (Date,Date)
} deriving (Generic,NFData,Show,Eq)
......@@ -372,11 +383,11 @@ data PhyloExport =
makeLenses ''Config
makeLenses ''Proximity
makeLenses ''Quality
makeLenses ''ContextualUnit
makeLenses ''Clique
makeLenses ''PhyloLabel
makeLenses ''TimeUnit
makeLenses ''PhyloFoundations
makeLenses ''PhyloCUnit
makeLenses ''PhyloClique
makeLenses ''Phylo
makeLenses ''PhyloPeriod
makeLenses ''PhyloLevel
......
......@@ -54,16 +54,16 @@ phylo2 = synchronicClustering phylo1
phylo1 :: Phylo
phylo1 = temporalMatching
$ appendGroups fisToGroup 1 phyloFis phyloBase
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
---------------------------------------------
-- | STEP 2 | -- Build the frequent items set
-- | STEP 2 | -- Build the cliques
---------------------------------------------
phyloFis :: Map (Date,Date) [PhyloCUnit]
phyloFis = toPhyloFis docsByPeriods (getContextualUnitSupport $ contextualUnit config) (getContextualUnitSize $ contextualUnit config)
phyloClique :: Map (Date,Date) [PhyloClique]
phyloClique = toPhyloClique phyloBase docsByPeriods
docsByPeriods :: Map (Date,Date) [Document]
......@@ -96,7 +96,7 @@ config =
defaultConfig { phyloName = "Cesar et Cleopatre"
, phyloLevel = 2
, exportFilter = [ByBranchSize 0]
, contextualUnit = Fis 0 0 }
, clique = Fis 0 0 }
docs :: [Document]
......
......@@ -17,7 +17,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloExport where
import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, delete)
import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy)
import Data.Vector (Vector)
......@@ -471,6 +471,7 @@ toPhyloExport phylo = exportToDot phylo
groups :: [PhyloGroup]
groups = traceExportGroups
$ processDynamics
$ map (\g -> g & phylo_groupMeta %~ delete "dynamics")
$ getGroupsFromLevel (phyloLevel $ getConfig phylo)
$ tracePhyloInfo phylo
......
......@@ -16,7 +16,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloMaker where
import Data.List (concat, nub, partition, sort, (++), group)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, singleton)
import Data.Set (size)
import Data.Vector (Vector)
......@@ -63,7 +63,7 @@ toPhylo docs lst conf = traceToPhylo (phyloLevel conf) $
--------------------
appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups :: (a -> Double -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
$ over ( phylo_periods
. traverse
......@@ -76,7 +76,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups)
, f obj pId lvl (length groups) (getRoots phylo)
, f obj(getPhyloThresholdInit phylo) pId lvl (length groups) (getRoots phylo)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
] ) [] phyloCUnit)
else
......@@ -84,27 +84,25 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
phylo
fisToGroup :: PhyloCUnit -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup
fisToGroup fis pId lvl idx fdt coocs =
let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloCUnit_nodes) fdt
cliqueToGroup :: PhyloClique -> Double -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup
cliqueToGroup fis thr pId lvl idx fdt coocs =
let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloClique_nodes) fdt
in PhyloGroup pId lvl idx ""
(fis ^. phyloCUnit_support)
(fis ^. phyloClique_support)
ngrams
(ngramsToCooc ngrams coocs)
(1,[0])
empty
(singleton "thr" [thr])
[] [] [] []
toPhylo1 :: [Document] -> Phylo -> Phylo
toPhylo1 docs phyloBase = temporalMatching
$ appendGroups fisToGroup 1 phyloCUnit phyloBase
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
where
--------------------------------------
phyloCUnit :: Map (Date,Date) [PhyloCUnit]
phyloCUnit = case (contextualUnit $ getConfig phyloBase) of
Fis s s' -> toPhyloFis docs' s s'
MaxClique _ -> undefined
phyloClique :: Map (Date,Date) [PhyloClique]
phyloClique = toPhyloClique phyloBase docs'
--------------------------------------
docs' :: Map (Date,Date) [Document]
docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs
......@@ -117,56 +115,59 @@ toPhylo1 docs phyloBase = temporalMatching
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterFis :: Bool -> Int -> (Int -> [PhyloCUnit] -> [PhyloCUnit]) -> Map (Date, Date) [PhyloCUnit] -> Map (Date, Date) [PhyloCUnit]
filterFis keep thr f m = case keep of
filterClique :: Bool -> Int -> (Int -> [PhyloClique] -> [PhyloClique]) -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterClique keep thr f m = case keep of
False -> map (\l -> f thr l) m
True -> map (\l -> keepFilled (f) thr l) m
-- | To filter Fis with small Support
filterFisBySupport :: Int -> [PhyloCUnit] -> [PhyloCUnit]
filterFisBySupport thr l = filter (\fis -> (fis ^. phyloCUnit_support) >= thr) l
filterCliqueBySupport :: Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySupport thr l = filter (\clq -> (clq ^. phyloClique_support) >= thr) l
-- | To filter Fis with small Clique size
filterFisByClique :: Int -> [PhyloCUnit] -> [PhyloCUnit]
filterFisByClique thr l = filter (\fis -> (size $ fis ^. phyloCUnit_nodes) >= thr) l
filterCliqueBySize :: Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySize thr l = filter (\clq -> (size $ clq ^. phyloClique_nodes) >= thr) l
-- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [PhyloCUnit] -> Map (Date, Date) [PhyloCUnit]
filterFisByNested m =
let fis = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ f' ^. phyloCUnit_nodes) (Set.toList $ f ^. phyloCUnit_nodes)) mem)
filterCliqueByNested :: Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterCliqueByNested m =
let clq = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ f' ^. phyloClique_nodes) (Set.toList $ f ^. phyloClique_nodes)) mem)
then mem
else
let fMax = filter (\f' -> not $ isNested (Set.toList $ f ^. phyloCUnit_nodes) (Set.toList $ f' ^. phyloCUnit_nodes)) mem
let fMax = filter (\f' -> not $ isNested (Set.toList $ f ^. phyloClique_nodes) (Set.toList $ f' ^. phyloClique_nodes)) mem
in fMax ++ [f] ) [] l)
$ elems m
fis' = fis `using` parList rdeepseq
in fromList $ zip (keys m) fis'
clq' = clq `using` parList rdeepseq
in fromList $ zip (keys m) clq'
-- | To transform a time map of docs innto a time map of Fis with some filters
toPhyloFis :: Map (Date, Date) [Document] -> Int -> Int -> Map (Date,Date) [PhyloCUnit]
toPhyloFis phyloDocs support clique =
-- traceFis "Filtered Fis"
filterFisByNested
toPhyloClique :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [PhyloClique]
toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
Fis s s' -> -- traceFis "Filtered Fis"
filterCliqueByNested
-- $ traceFis "Filtered by clique size"
$ filterFis True clique (filterFisByClique)
$ filterClique True s' (filterCliqueBySize)
-- $ traceFis "Filtered by support"
$ filterFis True support (filterFisBySupport)
$ filterClique True s (filterCliqueBySupport)
-- $ traceFis "Unfiltered Fis"
phyloFis
phyloClique
MaxClique _ -> undefined
where
--------------------------------------
phyloFis :: Map (Date,Date) [PhyloCUnit]
phyloFis =
let fis = map (\(prd,docs) -> let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in (prd, map (\f -> PhyloCUnit (fst f) (snd f) prd) lst))
$ toList phyloDocs
fis' = fis `using` parList rdeepseq
in fromList fis'
phyloClique :: Map (Date,Date) [PhyloClique]
phyloClique = case (clique $ getConfig phylo) of
Fis _ _ -> let fis = map (\(prd,docs) ->
let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in (prd, map (\f -> PhyloClique (fst f) (snd f) prd) lst))
$ toList phyloDocs
fis' = fis `using` parList rdeepseq
in fromList fis'
MaxClique _ -> undefined
--------------------------------------
......
......@@ -162,42 +162,42 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l)
else f thr l
traceClique :: Map (Date, Date) [PhyloCUnit] -> String
traceClique :: Map (Date, Date) [PhyloClique] -> String
traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where
--------------------------------------
cliques :: [Double]
cliques = sort $ map (fromIntegral . size . _phyloCUnit_nodes) $ concat $ elems mFis
cliques = sort $ map (fromIntegral . size . _phyloClique_nodes) $ concat $ elems mFis
--------------------------------------
traceSupport :: Map (Date, Date) [PhyloCUnit] -> String
traceSupport :: Map (Date, Date) [PhyloClique] -> String
traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where
--------------------------------------
supports :: [Double]
supports = sort $ map (fromIntegral . _phyloCUnit_support) $ concat $ elems mFis
supports = sort $ map (fromIntegral . _phyloClique_support) $ concat $ elems mFis
--------------------------------------
traceFis :: [Char] -> Map (Date, Date) [PhyloCUnit] -> Map (Date, Date) [PhyloCUnit]
traceFis :: [Char] -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
<> "Support : " <> (traceSupport mFis) <> "\n"
<> "Nb Ngrams : " <> (traceClique mFis) <> "\n" ) mFis
-------------------------
-- | Contextual unit | --
-------------------------
---------------
-- | Clique| --
---------------
getContextualUnitSupport :: ContextualUnit -> Int
getContextualUnitSupport unit = case unit of
getCliqueSupport :: Clique -> Int
getCliqueSupport unit = case unit of
Fis s _ -> s
MaxClique _ -> 0
getContextualUnitSize :: ContextualUnit -> Int
getContextualUnitSize unit = case unit of
getCliqueSize :: Clique -> Int
getCliqueSize unit = case unit of
Fis _ s -> s
MaxClique s -> s
......@@ -243,6 +243,9 @@ ngramsToCooc ngrams coocs =
getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex)
getGroupThr :: PhyloGroup -> Double
getGroupThr group = head' "getGroupThr" ((group ^. phylo_groupMeta) ! "thr")
groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup]
groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups
......@@ -311,6 +314,14 @@ getLevels phylo = nub
. phylo_periodLevels ) phylo
getPhyloThresholdInit :: Phylo -> Double
getPhyloThresholdInit phylo = getThresholdInit (phyloProximity (getConfig phylo))
getPhyloThresholdStep :: Phylo -> Double
getPhyloThresholdStep phylo = getThresholdStep (phyloProximity (getConfig phylo))
getConfig :: Phylo -> Config
getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
......
......@@ -16,7 +16,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.TemporalMatching where
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, or)
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, intersectionWith, findWithDefault, keys, (!), filterWithKey)
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, intersectionWith, findWithDefault, keys, (!), filterWithKey, singleton)
import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo
......@@ -285,6 +285,8 @@ reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
reduceFrequency frequency branches =
restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
updateThr thr branches = map (\b -> map (\g -> g & phylo_groupMeta .~ (singleton "thr" [thr])) b) branches
seqMatching :: Proximity -> Double -> Map Int Double -> Int -> Double -> Int -> Map Date Double -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
seqMatching proximity beta frequency minBranch egoThr frame docs periods done ego rest =
......@@ -320,7 +322,10 @@ seqMatching proximity beta frequency minBranch egoThr frame docs periods done eg
let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ phyloBranchMatching frame periods proximity egoThr docs (fst ego)
branches' = branches `using` parList rdeepseq
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch) branches'
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
$ if (length branches' > 1)
then updateThr egoThr branches'
else branches'
--------------------------------------
quality' :: Double
quality' = toPhyloQuality' beta frequency
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment