Commit 7fef621e authored by qlobbe's avatar qlobbe

add the maxClique (in progress)

parent 4f17f5dd
...@@ -88,12 +88,14 @@ data ContextualUnit = ...@@ -88,12 +88,14 @@ data ContextualUnit =
Fis Fis
{ _fis_support :: Int { _fis_support :: Int
, _fis_size :: Int } , _fis_size :: Int }
| MaxClique
{ _clique_size :: Int }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data Quality = data Quality =
Quality { _qua_relevance :: Double Quality { _qua_granularity :: Double
, _qua_minBranch :: Int } , _qua_minBranch :: Int }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
...@@ -125,7 +127,7 @@ defaultConfig = ...@@ -125,7 +127,7 @@ defaultConfig =
, phyloLevel = 2 , phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 0 0.1 , phyloProximity = WeightedLogJaccard 10 0 0.1
, phyloSynchrony = ByProximityDistribution 0 , phyloSynchrony = ByProximityDistribution 0
, phyloQuality = Quality 10 3 , phyloQuality = Quality 0 3
, timeUnit = Year 3 1 5 , timeUnit = Year 3 1 5
, contextualUnit = Fis 1 5 , contextualUnit = Fis 1 5
, exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2] , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
...@@ -315,17 +317,13 @@ data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show) ...@@ -315,17 +317,13 @@ data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
-- | Frequent Item Set | -- -- | Frequent Item Set | --
--------------------------- ---------------------------
-- | Clique : Set of ngrams cooccurring in the same Document
type Clique = Set Ngrams
-- | Support : Number of Documents where a Clique occurs -- | Support : Number of Documents where a Clique occurs
type Support = Int type Support = Int
-- | Fis : Frequent Items Set (ie: the association between a Clique and a Support) data PhyloCUnit = PhyloCUnit
data PhyloFis = PhyloFis { _phyloCUnit_nodes :: Set Ngrams
{ _phyloFis_clique :: Clique , _phyloCUnit_support :: Support
, _phyloFis_support :: Support , _phyloCUnit_period :: (Date,Date)
, _phyloFis_period :: (Date,Date)
} deriving (Generic,NFData,Show,Eq) } deriving (Generic,NFData,Show,Eq)
...@@ -378,7 +376,7 @@ makeLenses ''ContextualUnit ...@@ -378,7 +376,7 @@ makeLenses ''ContextualUnit
makeLenses ''PhyloLabel makeLenses ''PhyloLabel
makeLenses ''TimeUnit makeLenses ''TimeUnit
makeLenses ''PhyloFoundations makeLenses ''PhyloFoundations
makeLenses ''PhyloFis makeLenses ''PhyloCUnit
makeLenses ''Phylo makeLenses ''Phylo
makeLenses ''PhyloPeriod makeLenses ''PhyloPeriod
makeLenses ''PhyloLevel makeLenses ''PhyloLevel
......
...@@ -62,8 +62,8 @@ phylo1 = temporalMatching ...@@ -62,8 +62,8 @@ phylo1 = temporalMatching
--------------------------------------------- ---------------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis] phyloFis :: Map (Date,Date) [PhyloCUnit]
phyloFis = toPhyloFis docsByPeriods (getFisSupport $ contextualUnit config) (getFisSize $ contextualUnit config) phyloFis = toPhyloFis docsByPeriods (getContextualUnitSupport $ contextualUnit config) (getContextualUnitSize $ contextualUnit config)
docsByPeriods :: Map (Date,Date) [Document] docsByPeriods :: Map (Date,Date) [Document]
......
...@@ -171,7 +171,7 @@ exportToDot phylo export = ...@@ -171,7 +171,7 @@ exportToDot phylo export =
,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo)) ,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo)) ,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo)) ,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "quaFactor") $ pack $ show (_qua_relevance $ phyloQuality $ getConfig phylo)) ,(toAttr (fromStrict "quaFactor") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
]) ])
......
...@@ -72,23 +72,23 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co ...@@ -72,23 +72,23 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
(\phyloLvl -> if lvl == (phyloLvl ^. phylo_levelLevel) (\phyloLvl -> if lvl == (phyloLvl ^. phylo_levelLevel)
then then
let pId = phyloLvl ^. phylo_levelPeriod let pId = phyloLvl ^. phylo_levelPeriod
phyloFis = m ! pId phyloCUnit = m ! pId
in phyloLvl in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj -> & phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups) groups ++ [ (((pId,lvl),length groups)
, f obj pId lvl (length groups) (getRoots phylo) , f obj pId lvl (length groups) (getRoots phylo)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId])) (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
] ) [] phyloFis) ] ) [] phyloCUnit)
else else
phyloLvl ) phyloLvl )
phylo phylo
fisToGroup :: PhyloFis -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup fisToGroup :: PhyloCUnit -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup
fisToGroup fis pId lvl idx fdt coocs = fisToGroup fis pId lvl idx fdt coocs =
let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloFis_clique) fdt let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloCUnit_nodes) fdt
in PhyloGroup pId lvl idx "" in PhyloGroup pId lvl idx ""
(fis ^. phyloFis_support) (fis ^. phyloCUnit_support)
ngrams ngrams
(ngramsToCooc ngrams coocs) (ngramsToCooc ngrams coocs)
(1,[0]) (1,[0])
...@@ -98,11 +98,13 @@ fisToGroup fis pId lvl idx fdt coocs = ...@@ -98,11 +98,13 @@ fisToGroup fis pId lvl idx fdt coocs =
toPhylo1 :: [Document] -> Phylo -> Phylo toPhylo1 :: [Document] -> Phylo -> Phylo
toPhylo1 docs phyloBase = temporalMatching toPhylo1 docs phyloBase = temporalMatching
$ appendGroups fisToGroup 1 phyloFis phyloBase $ appendGroups fisToGroup 1 phyloCUnit phyloBase
where where
-------------------------------------- --------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis] phyloCUnit :: Map (Date,Date) [PhyloCUnit]
phyloFis = toPhyloFis docs' (getFisSupport $ contextualUnit $ getConfig phyloBase) (getFisSize $ contextualUnit $ getConfig phyloBase) phyloCUnit = case (contextualUnit $ getConfig phyloBase) of
Fis s s' -> toPhyloFis docs' s s'
MaxClique _ -> undefined
-------------------------------------- --------------------------------------
docs' :: Map (Date,Date) [Document] docs' :: Map (Date,Date) [Document]
docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs
...@@ -115,30 +117,30 @@ toPhylo1 docs phyloBase = temporalMatching ...@@ -115,30 +117,30 @@ toPhylo1 docs phyloBase = temporalMatching
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False) -- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterFis :: Bool -> Int -> (Int -> [PhyloFis] -> [PhyloFis]) -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis] filterFis :: Bool -> Int -> (Int -> [PhyloCUnit] -> [PhyloCUnit]) -> Map (Date, Date) [PhyloCUnit] -> Map (Date, Date) [PhyloCUnit]
filterFis keep thr f m = case keep of filterFis keep thr f m = case keep of
False -> map (\l -> f thr l) m False -> map (\l -> f thr l) m
True -> map (\l -> keepFilled (f) thr l) m True -> map (\l -> keepFilled (f) thr l) m
-- | To filter Fis with small Support -- | To filter Fis with small Support
filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis] filterFisBySupport :: Int -> [PhyloCUnit] -> [PhyloCUnit]
filterFisBySupport thr l = filter (\fis -> (fis ^. phyloFis_support) >= thr) l filterFisBySupport thr l = filter (\fis -> (fis ^. phyloCUnit_support) >= thr) l
-- | To filter Fis with small Clique size -- | To filter Fis with small Clique size
filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis] filterFisByClique :: Int -> [PhyloCUnit] -> [PhyloCUnit]
filterFisByClique thr l = filter (\fis -> (size $ fis ^. phyloFis_clique) >= thr) l filterFisByClique thr l = filter (\fis -> (size $ fis ^. phyloCUnit_nodes) >= thr) l
-- | To filter nested Fis -- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis] filterFisByNested :: Map (Date, Date) [PhyloCUnit] -> Map (Date, Date) [PhyloCUnit]
filterFisByNested m = filterFisByNested m =
let fis = map (\l -> let fis = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ f' ^. phyloFis_clique) (Set.toList $ f ^. phyloFis_clique)) mem) foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ f' ^. phyloCUnit_nodes) (Set.toList $ f ^. phyloCUnit_nodes)) mem)
then mem then mem
else else
let fMax = filter (\f' -> not $ isNested (Set.toList $ f ^. phyloFis_clique) (Set.toList $ f' ^. phyloFis_clique)) mem let fMax = filter (\f' -> not $ isNested (Set.toList $ f ^. phyloCUnit_nodes) (Set.toList $ f' ^. phyloCUnit_nodes)) mem
in fMax ++ [f] ) [] l) in fMax ++ [f] ) [] l)
$ elems m $ elems m
fis' = fis `using` parList rdeepseq fis' = fis `using` parList rdeepseq
...@@ -146,7 +148,7 @@ filterFisByNested m = ...@@ -146,7 +148,7 @@ filterFisByNested m =
-- | To transform a time map of docs innto a time map of Fis with some filters -- | To transform a time map of docs innto a time map of Fis with some filters
toPhyloFis :: Map (Date, Date) [Document] -> Int -> Int -> Map (Date,Date) [PhyloFis] toPhyloFis :: Map (Date, Date) [Document] -> Int -> Int -> Map (Date,Date) [PhyloCUnit]
toPhyloFis phyloDocs support clique = traceFis "Filtered Fis" toPhyloFis phyloDocs support clique = traceFis "Filtered Fis"
$ filterFisByNested $ filterFisByNested
$ traceFis "Filtered by clique size" $ traceFis "Filtered by clique size"
...@@ -156,10 +158,10 @@ toPhyloFis phyloDocs support clique = traceFis "Filtered Fis" ...@@ -156,10 +158,10 @@ toPhyloFis phyloDocs support clique = traceFis "Filtered Fis"
$ traceFis "Unfiltered Fis" phyloFis $ traceFis "Unfiltered Fis" phyloFis
where where
-------------------------------------- --------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis] phyloFis :: Map (Date,Date) [PhyloCUnit]
phyloFis = phyloFis =
let fis = map (\(prd,docs) -> let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs) let fis = map (\(prd,docs) -> let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in (prd, map (\f -> PhyloFis (fst f) (snd f) prd) lst)) in (prd, map (\f -> PhyloCUnit (fst f) (snd f) prd) lst))
$ toList phyloDocs $ toList phyloDocs
fis' = fis `using` parList rdeepseq fis' = fis `using` parList rdeepseq
in fromList fis' in fromList fis'
...@@ -209,9 +211,9 @@ groupDocsByPeriod f pds es = ...@@ -209,9 +211,9 @@ groupDocsByPeriod f pds es =
docsToTermFreq :: [Document] -> Vector Ngrams -> Map Int Double docsToTermFreq :: [Document] -> Vector Ngrams -> Map Int Double
docsToTermFreq docs fdt = docsToTermFreq docs fdt =
let nbDocs = fromIntegral $ length docs let nbDocs = fromIntegral $ length docs
freqs = map (/nbDocs) freqs = map (/(log nbDocs))
$ fromList $ fromList
$ map (\lst -> (head' "docsToTermFreq" lst, fromIntegral $ length lst)) $ map (\lst -> (head' "docsToTermFreq" lst, log $ fromIntegral $ length lst))
$ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) docs $ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) docs
sumFreqs = sum $ elems freqs sumFreqs = sum $ elems freqs
in map (/sumFreqs) freqs in map (/sumFreqs) freqs
......
...@@ -162,28 +162,28 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l) ...@@ -162,28 +162,28 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l)
else f thr l else f thr l
traceClique :: Map (Date, Date) [PhyloFis] -> String traceClique :: Map (Date, Date) [PhyloCUnit] -> String
traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6] traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where where
-------------------------------------- --------------------------------------
cliques :: [Double] cliques :: [Double]
cliques = sort $ map (fromIntegral . size . _phyloFis_clique) $ concat $ elems mFis cliques = sort $ map (fromIntegral . size . _phyloCUnit_nodes) $ concat $ elems mFis
-------------------------------------- --------------------------------------
traceSupport :: Map (Date, Date) [PhyloFis] -> String traceSupport :: Map (Date, Date) [PhyloCUnit] -> String
traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6] traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where where
-------------------------------------- --------------------------------------
supports :: [Double] supports :: [Double]
supports = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems mFis supports = sort $ map (fromIntegral . _phyloCUnit_support) $ concat $ elems mFis
-------------------------------------- --------------------------------------
traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis] traceFis :: [Char] -> Map (Date, Date) [PhyloCUnit] -> Map (Date, Date) [PhyloCUnit]
traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n" traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
<> "Support : " <> (traceSupport mFis) <> "\n" <> "Support : " <> (traceSupport mFis) <> "\n"
<> "Clique : " <> (traceClique mFis) <> "\n" ) mFis <> "Nb Ngrams : " <> (traceClique mFis) <> "\n" ) mFis
------------------------- -------------------------
...@@ -191,15 +191,15 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l ...@@ -191,15 +191,15 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l
------------------------- -------------------------
getFisSupport :: ContextualUnit -> Int getContextualUnitSupport :: ContextualUnit -> Int
getFisSupport unit = case unit of getContextualUnitSupport unit = case unit of
Fis s _ -> s Fis s _ -> s
-- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support") MaxClique _ -> 0
getFisSize :: ContextualUnit -> Int getContextualUnitSize :: ContextualUnit -> Int
getFisSize unit = case unit of getContextualUnitSize unit = case unit of
Fis _ s -> s Fis _ s -> s
-- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size") MaxClique s -> s
-------------- --------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment