Commit 7fef621e authored by qlobbe's avatar qlobbe

add the maxClique (in progress)

parent 4f17f5dd
Pipeline #596 failed with stage
......@@ -88,12 +88,14 @@ data ContextualUnit =
Fis
{ _fis_support :: Int
, _fis_size :: Int }
| MaxClique
{ _clique_size :: Int }
deriving (Show,Generic,Eq)
data Quality =
Quality { _qua_relevance :: Double
, _qua_minBranch :: Int }
Quality { _qua_granularity :: Double
, _qua_minBranch :: Int }
deriving (Show,Generic,Eq)
......@@ -125,7 +127,7 @@ defaultConfig =
, phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 0 0.1
, phyloSynchrony = ByProximityDistribution 0
, phyloQuality = Quality 10 3
, phyloQuality = Quality 0 3
, timeUnit = Year 3 1 5
, contextualUnit = Fis 1 5
, exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
......@@ -315,17 +317,13 @@ data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
-- | Frequent Item Set | --
---------------------------
-- | Clique : Set of ngrams cooccurring in the same Document
type Clique = Set Ngrams
-- | Support : Number of Documents where a Clique occurs
type Support = Int
-- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
data PhyloFis = PhyloFis
{ _phyloFis_clique :: Clique
, _phyloFis_support :: Support
, _phyloFis_period :: (Date,Date)
data PhyloCUnit = PhyloCUnit
{ _phyloCUnit_nodes :: Set Ngrams
, _phyloCUnit_support :: Support
, _phyloCUnit_period :: (Date,Date)
} deriving (Generic,NFData,Show,Eq)
......@@ -378,7 +376,7 @@ makeLenses ''ContextualUnit
makeLenses ''PhyloLabel
makeLenses ''TimeUnit
makeLenses ''PhyloFoundations
makeLenses ''PhyloFis
makeLenses ''PhyloCUnit
makeLenses ''Phylo
makeLenses ''PhyloPeriod
makeLenses ''PhyloLevel
......
......@@ -62,8 +62,8 @@ phylo1 = temporalMatching
---------------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis]
phyloFis = toPhyloFis docsByPeriods (getFisSupport $ contextualUnit config) (getFisSize $ contextualUnit config)
phyloFis :: Map (Date,Date) [PhyloCUnit]
phyloFis = toPhyloFis docsByPeriods (getContextualUnitSupport $ contextualUnit config) (getContextualUnitSize $ contextualUnit config)
docsByPeriods :: Map (Date,Date) [Document]
......
......@@ -171,7 +171,7 @@ exportToDot phylo export =
,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "quaFactor") $ pack $ show (_qua_relevance $ phyloQuality $ getConfig phylo))
,(toAttr (fromStrict "quaFactor") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
])
......
......@@ -72,23 +72,23 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
(\phyloLvl -> if lvl == (phyloLvl ^. phylo_levelLevel)
then
let pId = phyloLvl ^. phylo_levelPeriod
phyloFis = m ! pId
phyloCUnit = m ! pId
in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups)
, f obj pId lvl (length groups) (getRoots phylo)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
] ) [] phyloFis)
] ) [] phyloCUnit)
else
phyloLvl )
phylo
fisToGroup :: PhyloFis -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup
fisToGroup :: PhyloCUnit -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup
fisToGroup fis pId lvl idx fdt coocs =
let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloFis_clique) fdt
let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloCUnit_nodes) fdt
in PhyloGroup pId lvl idx ""
(fis ^. phyloFis_support)
(fis ^. phyloCUnit_support)
ngrams
(ngramsToCooc ngrams coocs)
(1,[0])
......@@ -98,11 +98,13 @@ fisToGroup fis pId lvl idx fdt coocs =
toPhylo1 :: [Document] -> Phylo -> Phylo
toPhylo1 docs phyloBase = temporalMatching
$ appendGroups fisToGroup 1 phyloFis phyloBase
$ appendGroups fisToGroup 1 phyloCUnit phyloBase
where
--------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis]
phyloFis = toPhyloFis docs' (getFisSupport $ contextualUnit $ getConfig phyloBase) (getFisSize $ contextualUnit $ getConfig phyloBase)
phyloCUnit :: Map (Date,Date) [PhyloCUnit]
phyloCUnit = case (contextualUnit $ getConfig phyloBase) of
Fis s s' -> toPhyloFis docs' s s'
MaxClique _ -> undefined
--------------------------------------
docs' :: Map (Date,Date) [Document]
docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs
......@@ -115,30 +117,30 @@ toPhylo1 docs phyloBase = temporalMatching
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterFis :: Bool -> Int -> (Int -> [PhyloFis] -> [PhyloFis]) -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFis :: Bool -> Int -> (Int -> [PhyloCUnit] -> [PhyloCUnit]) -> Map (Date, Date) [PhyloCUnit] -> Map (Date, Date) [PhyloCUnit]
filterFis keep thr f m = case keep of
False -> map (\l -> f thr l) m
True -> map (\l -> keepFilled (f) thr l) m
-- | To filter Fis with small Support
filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis]
filterFisBySupport thr l = filter (\fis -> (fis ^. phyloFis_support) >= thr) l
filterFisBySupport :: Int -> [PhyloCUnit] -> [PhyloCUnit]
filterFisBySupport thr l = filter (\fis -> (fis ^. phyloCUnit_support) >= thr) l
-- | To filter Fis with small Clique size
filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis]
filterFisByClique thr l = filter (\fis -> (size $ fis ^. phyloFis_clique) >= thr) l
filterFisByClique :: Int -> [PhyloCUnit] -> [PhyloCUnit]
filterFisByClique thr l = filter (\fis -> (size $ fis ^. phyloCUnit_nodes) >= thr) l
-- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFisByNested :: Map (Date, Date) [PhyloCUnit] -> Map (Date, Date) [PhyloCUnit]
filterFisByNested m =
let fis = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ f' ^. phyloFis_clique) (Set.toList $ f ^. phyloFis_clique)) mem)
foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ f' ^. phyloCUnit_nodes) (Set.toList $ f ^. phyloCUnit_nodes)) mem)
then mem
else
let fMax = filter (\f' -> not $ isNested (Set.toList $ f ^. phyloFis_clique) (Set.toList $ f' ^. phyloFis_clique)) mem
let fMax = filter (\f' -> not $ isNested (Set.toList $ f ^. phyloCUnit_nodes) (Set.toList $ f' ^. phyloCUnit_nodes)) mem
in fMax ++ [f] ) [] l)
$ elems m
fis' = fis `using` parList rdeepseq
......@@ -146,7 +148,7 @@ filterFisByNested m =
-- | To transform a time map of docs innto a time map of Fis with some filters
toPhyloFis :: Map (Date, Date) [Document] -> Int -> Int -> Map (Date,Date) [PhyloFis]
toPhyloFis :: Map (Date, Date) [Document] -> Int -> Int -> Map (Date,Date) [PhyloCUnit]
toPhyloFis phyloDocs support clique = traceFis "Filtered Fis"
$ filterFisByNested
$ traceFis "Filtered by clique size"
......@@ -156,10 +158,10 @@ toPhyloFis phyloDocs support clique = traceFis "Filtered Fis"
$ traceFis "Unfiltered Fis" phyloFis
where
--------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis]
phyloFis :: Map (Date,Date) [PhyloCUnit]
phyloFis =
let fis = map (\(prd,docs) -> let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in (prd, map (\f -> PhyloFis (fst f) (snd f) prd) lst))
in (prd, map (\f -> PhyloCUnit (fst f) (snd f) prd) lst))
$ toList phyloDocs
fis' = fis `using` parList rdeepseq
in fromList fis'
......@@ -209,9 +211,9 @@ groupDocsByPeriod f pds es =
docsToTermFreq :: [Document] -> Vector Ngrams -> Map Int Double
docsToTermFreq docs fdt =
let nbDocs = fromIntegral $ length docs
freqs = map (/nbDocs)
freqs = map (/(log nbDocs))
$ fromList
$ map (\lst -> (head' "docsToTermFreq" lst, fromIntegral $ length lst))
$ map (\lst -> (head' "docsToTermFreq" lst, log $ fromIntegral $ length lst))
$ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) docs
sumFreqs = sum $ elems freqs
in map (/sumFreqs) freqs
......
......@@ -162,28 +162,28 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l)
else f thr l
traceClique :: Map (Date, Date) [PhyloFis] -> String
traceClique :: Map (Date, Date) [PhyloCUnit] -> String
traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where
--------------------------------------
cliques :: [Double]
cliques = sort $ map (fromIntegral . size . _phyloFis_clique) $ concat $ elems mFis
cliques = sort $ map (fromIntegral . size . _phyloCUnit_nodes) $ concat $ elems mFis
--------------------------------------
traceSupport :: Map (Date, Date) [PhyloFis] -> String
traceSupport :: Map (Date, Date) [PhyloCUnit] -> String
traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where
--------------------------------------
supports :: [Double]
supports = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems mFis
supports = sort $ map (fromIntegral . _phyloCUnit_support) $ concat $ elems mFis
--------------------------------------
traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
traceFis :: [Char] -> Map (Date, Date) [PhyloCUnit] -> Map (Date, Date) [PhyloCUnit]
traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
<> "Support : " <> (traceSupport mFis) <> "\n"
<> "Clique : " <> (traceClique mFis) <> "\n" ) mFis
<> "Nb Ngrams : " <> (traceClique mFis) <> "\n" ) mFis
-------------------------
......@@ -191,15 +191,15 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l
-------------------------
getFisSupport :: ContextualUnit -> Int
getFisSupport unit = case unit of
getContextualUnitSupport :: ContextualUnit -> Int
getContextualUnitSupport unit = case unit of
Fis s _ -> s
-- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support")
MaxClique _ -> 0
getFisSize :: ContextualUnit -> Int
getFisSize unit = case unit of
getContextualUnitSize :: ContextualUnit -> Int
getContextualUnitSize unit = case unit of
Fis _ s -> s
-- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size")
MaxClique s -> s
--------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment