Commit 8913a00b authored by Quentin Lobbé's avatar Quentin Lobbé

add the mecanisms for filtering the FIS if needed

parent 1a96a74d
Pipeline #329 failed with stage
...@@ -201,8 +201,11 @@ type Clique = Set Ngrams ...@@ -201,8 +201,11 @@ type Clique = Set Ngrams
-- | Support : Number of Documents where a Clique occurs -- | Support : Number of Documents where a Clique occurs
type Support = Int type Support = Int
-- | Fis : Frequent Items Set (ie: the association between a Clique and a Support) -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
type PhyloFis = (Clique,Support) data PhyloFis = PhyloFis
{ _phyloFis_clique :: Clique
, _phyloFis_support :: Support
, _phyloFis_metrics :: Map (Int,Int) (Map Text [Double])
} deriving (Show)
-- | A list of clustered PhyloGroup -- | A list of clustered PhyloGroup
type PhyloCluster = [PhyloGroup] type PhyloCluster = [PhyloGroup]
...@@ -239,8 +242,7 @@ data Cluster = Fis FisParams ...@@ -239,8 +242,7 @@ data Cluster = Fis FisParams
-- | Parameters for Fis clustering -- | Parameters for Fis clustering
data FisParams = FisParams data FisParams = FisParams
{ _fis_filtered :: Bool { _fis_keepMinorFis :: Bool
, _fis_keepMinorFis :: Bool
, _fis_minSupport :: Support , _fis_minSupport :: Support
} deriving (Show) } deriving (Show)
...@@ -334,6 +336,8 @@ data PhyloQuery = PhyloQuery ...@@ -334,6 +336,8 @@ data PhyloQuery = PhyloQuery
-- Clustering method for building the contextual unit of Phylo (ie: level 1) -- Clustering method for building the contextual unit of Phylo (ie: level 1)
, _q_contextualUnit :: Cluster , _q_contextualUnit :: Cluster
, _q_contextualUnitMetrics :: [Metric]
, _q_contextualUnitFilters :: [Filter]
-- Inter-temporal matching method of the Phylo -- Inter-temporal matching method of the Phylo
, _q_interTemporalMatching :: Proximity , _q_interTemporalMatching :: Proximity
...@@ -438,6 +442,7 @@ makeLenses ''PhyloPeaks ...@@ -438,6 +442,7 @@ makeLenses ''PhyloPeaks
makeLenses ''PhyloGroup makeLenses ''PhyloGroup
makeLenses ''PhyloLevel makeLenses ''PhyloLevel
makeLenses ''PhyloPeriod makeLenses ''PhyloPeriod
makeLenses ''PhyloFis
-- --
makeLenses ''Proximity makeLenses ''Proximity
makeLenses ''Cluster makeLenses ''Cluster
...@@ -463,10 +468,13 @@ $(deriveJSON defaultOptions ''Tree ) ...@@ -463,10 +468,13 @@ $(deriveJSON defaultOptions ''Tree )
$(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod ) $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
$(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel ) $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
$(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup ) $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
$(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
-- --
$(deriveJSON (unPrefix "_software_" ) ''Software ) $(deriveJSON (unPrefix "_software_" ) ''Software )
$(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam ) $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
-- --
$(deriveJSON defaultOptions ''Filter )
$(deriveJSON defaultOptions ''Metric )
$(deriveJSON defaultOptions ''Cluster ) $(deriveJSON defaultOptions ''Cluster )
$(deriveJSON defaultOptions ''Proximity ) $(deriveJSON defaultOptions ''Proximity )
-- --
...@@ -475,6 +483,7 @@ $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams ) ...@@ -475,6 +483,7 @@ $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
$(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams ) $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
$(deriveJSON (unPrefix "_rc_" ) ''RCParams ) $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
$(deriveJSON (unPrefix "_wlj_" ) ''WLJParams ) $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
$(deriveJSON (unPrefix "_sb_" ) ''SBParams )
-- --
$(deriveJSON (unPrefix "_q_" ) ''PhyloQuery ) $(deriveJSON (unPrefix "_q_" ) ''PhyloQuery )
......
...@@ -37,15 +37,15 @@ fisToCooc :: Map (Date, Date) [PhyloFis] -> Phylo -> Map (Int, Int) Double ...@@ -37,15 +37,15 @@ fisToCooc :: Map (Date, Date) [PhyloFis] -> Phylo -> Map (Int, Int) Double
fisToCooc m p = map (/docs) fisToCooc m p = map (/docs)
$ foldl (\mem x -> adjust (+1) (getKeyPair x mem) mem) cooc $ foldl (\mem x -> adjust (+1) (getKeyPair x mem) mem) cooc
$ concat $ concat
$ map (\x -> listToUnDirectedCombiWith (\x -> getIdxInPeaks x p) $ (Set.toList . fst) x) $ map (\x -> listToUnDirectedCombiWith (\x -> getIdxInPeaks x p) $ (Set.toList . getClique) x)
$ (concat . elems) m $ (concat . elems) m
where where
-------------------------------------- --------------------------------------
fisNgrams :: [Ngrams] fisNgrams :: [Ngrams]
fisNgrams = foldl (\mem x -> union mem $ (Set.toList . fst) x) [] $ (concat . elems) m fisNgrams = foldl (\mem x -> union mem $ (Set.toList . getClique) x) [] $ (concat . elems) m
-------------------------------------- --------------------------------------
docs :: Double docs :: Double
docs = fromIntegral $ foldl (\mem x -> mem + (snd x)) 0 $ (concat . elems) m docs = fromIntegral $ foldl (\mem x -> mem + (getSupport x)) 0 $ (concat . elems) m
-------------------------------------- --------------------------------------
cooc :: Map (Int, Int) (Double) cooc :: Map (Int, Int) (Double)
cooc = Map.fromList $ map (\x -> (x,0)) (listToUnDirectedCombiWith (\x -> getIdxInPeaks x p) fisNgrams) cooc = Map.fromList $ map (\x -> (x,0)) (listToUnDirectedCombiWith (\x -> getIdxInPeaks x p) fisNgrams)
......
...@@ -17,8 +17,8 @@ Portability : POSIX ...@@ -17,8 +17,8 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Fis module Gargantext.Viz.Phylo.Aggregates.Fis
where where
import Data.List (last,head) import Data.List (last,head,null)
import Data.Map (Map) import Data.Map (Map, empty)
import Data.Text (Text, unwords, toLower, words) import Data.Text (Text, unwords, toLower, words)
import Data.Tuple (fst, snd) import Data.Tuple (fst, snd)
import Data.Tuple.Extra import Data.Tuple.Extra
...@@ -44,16 +44,39 @@ filterFisBySupport keep min m = case keep of ...@@ -44,16 +44,39 @@ filterFisBySupport keep min m = case keep of
-- | To filter Fis with small Support, to preserve nonempty periods please use : filterFisBySupport true -- | To filter Fis with small Support, to preserve nonempty periods please use : filterFisBySupport true
filterMinorFis :: Int -> [PhyloFis] -> [PhyloFis] filterMinorFis :: Int -> [PhyloFis] -> [PhyloFis]
filterMinorFis min l = filter (\fis -> snd fis > min) l filterMinorFis min l = filter (\fis -> getSupport fis > min) l
-- | To filter nested Fis -- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis] filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head $ map fst l) (map fst l) [] filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head $ map getClique l) (map getClique l) []
in filter (\fis -> elem (fst fis) cliqueMax) l) in filter (\fis -> elem (getClique fis) cliqueMax) l)
-- | To transform a list of Documents into a Frequent Items Set -- | To transform a list of Documents into a Frequent Items Set
docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) [PhyloFis] docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) [PhyloFis]
docsToFis docs = map (\d -> Map.toList docsToFis docs = map (\d -> let fs = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text d)
$ fisWithSizePolyMap (Segment 1 20) 1 (map text d)) docs in map (\f -> PhyloFis (fst f) (snd f) empty) fs) docs
\ No newline at end of file
-- | To process a list of Filters on top of the PhyloFis
processFilters :: [Filter] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
processFilters filters phyloFis
| null filters = phyloFis
| otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processFilters] please add some filters for the Fis"
-- | To process a list of Metrics on top of the PhyloFis
processMetrics :: [Metric] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
processMetrics metrics phyloFis
| null metrics = phyloFis
| otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processMetrics] please add some metrics for the Fis"
-- | To transform some Documents into PhyloFis and apply a List of Metrics and Filters
toPhyloFis :: Map (Date, Date) [Document] -> Bool -> Support -> [Metric] -> [Filter] -> Map (Date, Date) [PhyloFis]
toPhyloFis ds k s ms fs = processFilters fs
$ processMetrics ms
$ filterFisByNested
$ filterFisBySupport k s
$ docsToFis ds
\ No newline at end of file
...@@ -123,7 +123,7 @@ queryEx = "title=Cesar et Cleôpatre" ...@@ -123,7 +123,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQuery :: PhyloQuery phyloQuery :: PhyloQuery
phyloQuery = PhyloQuery "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)" phyloQuery = PhyloQuery "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3 defaultFis defaultWeightedLogJaccard 3 defaultRelatedComponents 5 3 defaultFis [] [] defaultWeightedLogJaccard 3 defaultRelatedComponents
...@@ -221,7 +221,7 @@ phylo1 = addPhyloLevel (1) phyloFis phylo ...@@ -221,7 +221,7 @@ phylo1 = addPhyloLevel (1) phyloFis phylo
phyloFis :: Map (Date, Date) [PhyloFis] phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = filterFisBySupport False 1 (filterFisByNested (docsToFis phyloDocs)) phyloFis = filterFisByNested $ filterFisBySupport False 1 (docsToFis phyloDocs)
---------------------------------------- ----------------------------------------
......
...@@ -111,15 +111,15 @@ clusterToGroup prd lvl idx lbl groups m p = ...@@ -111,15 +111,15 @@ clusterToGroup prd lvl idx lbl groups m p =
-- | To transform a Clique into a PhyloGroup -- | To transform a Clique into a PhyloGroup
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> (Clique,Support) -> Map (Date, Date) [PhyloFis] -> Phylo -> PhyloGroup cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Map (Date, Date) [PhyloFis] -> Phylo -> PhyloGroup
cliqueToGroup prd lvl idx lbl fis m p = cliqueToGroup prd lvl idx lbl fis m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ snd fis)) cooc Nothing [] [] [] [] PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) cooc Nothing [] [] [] []
where where
-------------------------------------- --------------------------------------
ngrams :: [Int] ngrams :: [Int]
ngrams = sort $ map (\x -> getIdxInPeaks x p) ngrams = sort $ map (\x -> getIdxInPeaks x p)
$ Set.toList $ Set.toList
$ fst fis $ getClique fis
-------------------------------------- --------------------------------------
cooc :: Map (Int, Int) Double cooc :: Map (Int, Int) Double
cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams) cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
...@@ -163,23 +163,21 @@ toNthLevel lvlMax prox clus p ...@@ -163,23 +163,21 @@ toNthLevel lvlMax prox clus p
-- | To reconstruct the Level 1 of a Phylo based on a Clustering Method -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox d p = case clus of toPhylo1 clus prox metrics filters d p = case clus of
Fis (FisParams f k s) -> setPhyloBranches 1 Fis (FisParams k s) -> setPhyloBranches 1
$ interTempoMatching Descendant 1 prox $ interTempoMatching Descendant 1 prox
$ interTempoMatching Ascendant 1 prox $ interTempoMatching Ascendant 1 prox
$ setLevelLinks (0,1) $ setLevelLinks (0,1)
$ setLevelLinks (1,0) $ setLevelLinks (1,0)
$ addPhyloLevel 1 phyloFis p $ addPhyloLevel 1 phyloFis p
where where
-------------------------------------- --------------------------------------
phyloFis :: Map (Date, Date) [PhyloFis] phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = if f phyloFis = toPhyloFis d k s metrics filters
then filterFisBySupport k s (filterFisByNested (docsToFis d))
else docsToFis d
-------------------------------------- --------------------------------------
_ -> panic "[ERR][Viz.Phylo.PhyloMaker.toPhylo1] fst clustering not recognized" _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
-- | To reconstruct the Level 0 of a Phylo -- | To reconstruct the Level 0 of a Phylo
...@@ -210,7 +208,7 @@ toPhylo q c a ts = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getN ...@@ -210,7 +208,7 @@ toPhylo q c a ts = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getN
where where
-------------------------------------- --------------------------------------
phylo1 :: Phylo phylo1 :: Phylo
phylo1 = toPhylo1 (getFstCluster q) (getInterTemporalMatching q) phyloDocs phylo0 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
-------------------------------------- --------------------------------------
phylo0 :: Phylo phylo0 :: Phylo
phylo0 = toPhylo0 phyloDocs phyloBase phylo0 = toPhylo0 phyloDocs phyloBase
......
...@@ -451,6 +451,24 @@ setPhyloLevelId lvl' (PhyloLevel (id, lvl) groups) ...@@ -451,6 +451,24 @@ setPhyloLevelId lvl' (PhyloLevel (id, lvl) groups)
groups' = over (traverse . phylo_groupId) (\((period, lvl), idx) -> ((period, lvl'), idx)) groups groups' = over (traverse . phylo_groupId) (\((period, lvl), idx) -> ((period, lvl'), idx)) groups
------------------
-- | PhyloFis | --
------------------
-- | To get the clique of a PhyloFis
getClique :: PhyloFis -> Clique
getClique = _phyloFis_clique
-- | To get the metrics of a PhyloFis
getFisMetrics :: PhyloFis -> Map (Int,Int) (Map Text [Double])
getFisMetrics = _phyloFis_metrics
-- | To get the support of a PhyloFis
getSupport :: PhyloFis -> Support
getSupport = _phyloFis_support
---------------------------- ----------------------------
-- | PhyloNodes & Edges | -- -- | PhyloNodes & Edges | --
---------------------------- ----------------------------
...@@ -558,9 +576,19 @@ getViewBranchIds v = map getBranchId $ v ^. phylo_viewBranches ...@@ -558,9 +576,19 @@ getViewBranchIds v = map getBranchId $ v ^. phylo_viewBranches
-- | PhyloQuery & QueryView | -- -- | PhyloQuery & QueryView | --
-------------------------------- --------------------------------
-- | To get the first clustering method to apply to get the level 1 of a Phylo -- | To get the first clustering method to apply to get the contextual units of a Phylo
getFstCluster :: PhyloQuery -> Cluster getContextualUnit :: PhyloQuery -> Cluster
getFstCluster q = q ^. q_contextualUnit getContextualUnit q = q ^. q_contextualUnit
-- | To get the metrics to apply to contextual units
getContextualUnitMetrics :: PhyloQuery -> [Metric]
getContextualUnitMetrics q = q ^. q_contextualUnitMetrics
-- | To get the filters to apply to contextual units
getContextualUnitFilters :: PhyloQuery -> [Filter]
getContextualUnitFilters q = q ^. q_contextualUnitFilters
-- | To get the cluster methods to apply to the Nths levels of a Phylo -- | To get the cluster methods to apply to the Nths levels of a Phylo
...@@ -602,8 +630,8 @@ getProximity cluster = case cluster of ...@@ -602,8 +630,8 @@ getProximity cluster = case cluster of
-- | To initialize all the Cluster / Proximity with their default parameters -- | To initialize all the Cluster / Proximity with their default parameters
initFis :: Maybe Bool -> Maybe Bool -> Maybe Support -> FisParams initFis :: Maybe Bool -> Maybe Support -> FisParams
initFis (def True -> flt) (def True -> kmf) (def 1 -> min) = FisParams flt kmf min initFis (def True -> kmf) (def 1 -> min) = FisParams kmf min
initHamming :: Maybe Double -> HammingParams initHamming :: Maybe Double -> HammingParams
initHamming (def 0.01 -> sens) = HammingParams sens initHamming (def 0.01 -> sens) = HammingParams sens
...@@ -622,10 +650,10 @@ initWeightedLogJaccard (def 0 -> thr) (def 0.01 -> sens) = WLJParams thr sens ...@@ -622,10 +650,10 @@ initWeightedLogJaccard (def 0 -> thr) (def 0.01 -> sens) = WLJParams thr sens
-- | To initialize a PhyloQuery from given and default parameters -- | To initialize a PhyloQuery from given and default parameters
initPhyloQuery :: Text -> Text -> Maybe Int -> Maybe Int -> Maybe Cluster -> Maybe Proximity -> Maybe Level -> Maybe Cluster -> PhyloQuery initPhyloQuery :: Text -> Text -> Maybe Int -> Maybe Int -> Maybe Cluster -> Maybe [Metric] -> Maybe [Filter] -> Maybe Proximity -> Maybe Level -> Maybe Cluster -> PhyloQuery
initPhyloQuery name desc (def 5 -> grain) (def 3 -> steps) (def defaultFis -> cluster) initPhyloQuery name desc (def 5 -> grain) (def 3 -> steps) (def defaultFis -> cluster) (def [] -> metrics) (def [] -> filters)
(def defaultWeightedLogJaccard -> matching) (def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) = (def defaultWeightedLogJaccard -> matching) (def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) =
PhyloQuery name desc grain steps cluster matching nthLevel nthCluster PhyloQuery name desc grain steps cluster metrics filters matching nthLevel nthCluster
-- | To initialize a PhyloQueryView default parameters -- | To initialize a PhyloQueryView default parameters
...@@ -635,9 +663,6 @@ initPhyloQueryView (def 2 -> lvl) (def Descendant -> f) (def False -> c) (def 1 ...@@ -635,9 +663,6 @@ initPhyloQueryView (def 2 -> lvl) (def Descendant -> f) (def False -> c) (def 1
-- | To define some obvious boolean getters -- | To define some obvious boolean getters
shouldFilterFis :: FisParams -> Bool
shouldFilterFis = _fis_filtered
shouldKeepMinorFis :: FisParams -> Bool shouldKeepMinorFis :: FisParams -> Bool
shouldKeepMinorFis = _fis_keepMinorFis shouldKeepMinorFis = _fis_keepMinorFis
...@@ -648,7 +673,7 @@ shouldKeepMinorFis = _fis_keepMinorFis ...@@ -648,7 +673,7 @@ shouldKeepMinorFis = _fis_keepMinorFis
-- Clusters -- Clusters
defaultFis :: Cluster defaultFis :: Cluster
defaultFis = Fis (initFis Nothing Nothing Nothing) defaultFis = Fis (initFis Nothing Nothing)
defaultLouvain :: Cluster defaultLouvain :: Cluster
defaultLouvain = Louvain (initLouvain Nothing) defaultLouvain = Louvain (initLouvain Nothing)
...@@ -678,7 +703,7 @@ defaultWeightedLogJaccard = WeightedLogJaccard (initWeightedLogJaccard Nothing N ...@@ -678,7 +703,7 @@ defaultWeightedLogJaccard = WeightedLogJaccard (initWeightedLogJaccard Nothing N
defaultQuery :: PhyloQuery defaultQuery :: PhyloQuery
defaultQuery = initPhyloQuery "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)" defaultQuery = initPhyloQuery "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
defaultQueryView :: PhyloQueryView defaultQueryView :: PhyloQueryView
defaultQueryView = initPhyloQueryView Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing defaultQueryView = initPhyloQueryView Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment