Commit 8913a00b authored by Quentin Lobbé's avatar Quentin Lobbé

add the mecanisms for filtering the FIS if needed

parent 1a96a74d
......@@ -201,8 +201,11 @@ type Clique = Set Ngrams
-- | Support : Number of Documents where a Clique occurs
type Support = Int
-- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
type PhyloFis = (Clique,Support)
data PhyloFis = PhyloFis
{ _phyloFis_clique :: Clique
, _phyloFis_support :: Support
, _phyloFis_metrics :: Map (Int,Int) (Map Text [Double])
} deriving (Show)
-- | A list of clustered PhyloGroup
type PhyloCluster = [PhyloGroup]
......@@ -239,8 +242,7 @@ data Cluster = Fis FisParams
-- | Parameters for Fis clustering
data FisParams = FisParams
{ _fis_filtered :: Bool
, _fis_keepMinorFis :: Bool
{ _fis_keepMinorFis :: Bool
, _fis_minSupport :: Support
} deriving (Show)
......@@ -334,6 +336,8 @@ data PhyloQuery = PhyloQuery
-- Clustering method for building the contextual unit of Phylo (ie: level 1)
, _q_contextualUnit :: Cluster
, _q_contextualUnitMetrics :: [Metric]
, _q_contextualUnitFilters :: [Filter]
-- Inter-temporal matching method of the Phylo
, _q_interTemporalMatching :: Proximity
......@@ -438,6 +442,7 @@ makeLenses ''PhyloPeaks
makeLenses ''PhyloGroup
makeLenses ''PhyloLevel
makeLenses ''PhyloPeriod
makeLenses ''PhyloFis
--
makeLenses ''Proximity
makeLenses ''Cluster
......@@ -463,10 +468,13 @@ $(deriveJSON defaultOptions ''Tree )
$(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
$(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
$(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
$(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
--
$(deriveJSON (unPrefix "_software_" ) ''Software )
$(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
--
$(deriveJSON defaultOptions ''Filter )
$(deriveJSON defaultOptions ''Metric )
$(deriveJSON defaultOptions ''Cluster )
$(deriveJSON defaultOptions ''Proximity )
--
......@@ -475,6 +483,7 @@ $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
$(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
$(deriveJSON (unPrefix "_rc_" ) ''RCParams )
$(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
$(deriveJSON (unPrefix "_sb_" ) ''SBParams )
--
$(deriveJSON (unPrefix "_q_" ) ''PhyloQuery )
......
......@@ -37,15 +37,15 @@ fisToCooc :: Map (Date, Date) [PhyloFis] -> Phylo -> Map (Int, Int) Double
fisToCooc m p = map (/docs)
$ foldl (\mem x -> adjust (+1) (getKeyPair x mem) mem) cooc
$ concat
$ map (\x -> listToUnDirectedCombiWith (\x -> getIdxInPeaks x p) $ (Set.toList . fst) x)
$ map (\x -> listToUnDirectedCombiWith (\x -> getIdxInPeaks x p) $ (Set.toList . getClique) x)
$ (concat . elems) m
where
--------------------------------------
fisNgrams :: [Ngrams]
fisNgrams = foldl (\mem x -> union mem $ (Set.toList . fst) x) [] $ (concat . elems) m
fisNgrams = foldl (\mem x -> union mem $ (Set.toList . getClique) x) [] $ (concat . elems) m
--------------------------------------
docs :: Double
docs = fromIntegral $ foldl (\mem x -> mem + (snd x)) 0 $ (concat . elems) m
docs = fromIntegral $ foldl (\mem x -> mem + (getSupport x)) 0 $ (concat . elems) m
--------------------------------------
cooc :: Map (Int, Int) (Double)
cooc = Map.fromList $ map (\x -> (x,0)) (listToUnDirectedCombiWith (\x -> getIdxInPeaks x p) fisNgrams)
......
......@@ -17,8 +17,8 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Fis
where
import Data.List (last,head)
import Data.Map (Map)
import Data.List (last,head,null)
import Data.Map (Map, empty)
import Data.Text (Text, unwords, toLower, words)
import Data.Tuple (fst, snd)
import Data.Tuple.Extra
......@@ -44,16 +44,39 @@ filterFisBySupport keep min m = case keep of
-- | To filter Fis with small Support, to preserve nonempty periods please use : filterFisBySupport true
filterMinorFis :: Int -> [PhyloFis] -> [PhyloFis]
filterMinorFis min l = filter (\fis -> snd fis > min) l
filterMinorFis min l = filter (\fis -> getSupport fis > min) l
-- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head $ map fst l) (map fst l) []
in filter (\fis -> elem (fst fis) cliqueMax) l)
filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head $ map getClique l) (map getClique l) []
in filter (\fis -> elem (getClique fis) cliqueMax) l)
-- | To transform a list of Documents into a Frequent Items Set
docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) [PhyloFis]
docsToFis docs = map (\d -> Map.toList
$ fisWithSizePolyMap (Segment 1 20) 1 (map text d)) docs
\ No newline at end of file
docsToFis docs = map (\d -> let fs = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text d)
in map (\f -> PhyloFis (fst f) (snd f) empty) fs) docs
-- | To process a list of Filters on top of the PhyloFis
processFilters :: [Filter] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
processFilters filters phyloFis
| null filters = phyloFis
| otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processFilters] please add some filters for the Fis"
-- | To process a list of Metrics on top of the PhyloFis
processMetrics :: [Metric] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
processMetrics metrics phyloFis
| null metrics = phyloFis
| otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processMetrics] please add some metrics for the Fis"
-- | To transform some Documents into PhyloFis and apply a List of Metrics and Filters
toPhyloFis :: Map (Date, Date) [Document] -> Bool -> Support -> [Metric] -> [Filter] -> Map (Date, Date) [PhyloFis]
toPhyloFis ds k s ms fs = processFilters fs
$ processMetrics ms
$ filterFisByNested
$ filterFisBySupport k s
$ docsToFis ds
\ No newline at end of file
......@@ -123,7 +123,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQuery :: PhyloQuery
phyloQuery = PhyloQuery "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3 defaultFis defaultWeightedLogJaccard 3 defaultRelatedComponents
5 3 defaultFis [] [] defaultWeightedLogJaccard 3 defaultRelatedComponents
......@@ -221,7 +221,7 @@ phylo1 = addPhyloLevel (1) phyloFis phylo
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = filterFisBySupport False 1 (filterFisByNested (docsToFis phyloDocs))
phyloFis = filterFisByNested $ filterFisBySupport False 1 (docsToFis phyloDocs)
----------------------------------------
......
......@@ -111,15 +111,15 @@ clusterToGroup prd lvl idx lbl groups m p =
-- | To transform a Clique into a PhyloGroup
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> (Clique,Support) -> Map (Date, Date) [PhyloFis] -> Phylo -> PhyloGroup
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Map (Date, Date) [PhyloFis] -> Phylo -> PhyloGroup
cliqueToGroup prd lvl idx lbl fis m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ snd fis)) cooc Nothing [] [] [] []
PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) cooc Nothing [] [] [] []
where
--------------------------------------
ngrams :: [Int]
ngrams = sort $ map (\x -> getIdxInPeaks x p)
$ Set.toList
$ fst fis
$ getClique fis
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
......@@ -163,23 +163,21 @@ toNthLevel lvlMax prox clus p
-- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox d p = case clus of
Fis (FisParams f k s) -> setPhyloBranches 1
$ interTempoMatching Descendant 1 prox
$ interTempoMatching Ascendant 1 prox
$ setLevelLinks (0,1)
$ setLevelLinks (1,0)
$ addPhyloLevel 1 phyloFis p
where
toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox metrics filters d p = case clus of
Fis (FisParams k s) -> setPhyloBranches 1
$ interTempoMatching Descendant 1 prox
$ interTempoMatching Ascendant 1 prox
$ setLevelLinks (0,1)
$ setLevelLinks (1,0)
$ addPhyloLevel 1 phyloFis p
where
--------------------------------------
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = if f
then filterFisBySupport k s (filterFisByNested (docsToFis d))
else docsToFis d
phyloFis = toPhyloFis d k s metrics filters
--------------------------------------
_ -> panic "[ERR][Viz.Phylo.PhyloMaker.toPhylo1] fst clustering not recognized"
_ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
-- | To reconstruct the Level 0 of a Phylo
......@@ -210,7 +208,7 @@ toPhylo q c a ts = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getN
where
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 (getFstCluster q) (getInterTemporalMatching q) phyloDocs phylo0
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
--------------------------------------
phylo0 :: Phylo
phylo0 = toPhylo0 phyloDocs phyloBase
......
......@@ -451,6 +451,24 @@ setPhyloLevelId lvl' (PhyloLevel (id, lvl) groups)
groups' = over (traverse . phylo_groupId) (\((period, lvl), idx) -> ((period, lvl'), idx)) groups
------------------
-- | PhyloFis | --
------------------
-- | To get the clique of a PhyloFis
getClique :: PhyloFis -> Clique
getClique = _phyloFis_clique
-- | To get the metrics of a PhyloFis
getFisMetrics :: PhyloFis -> Map (Int,Int) (Map Text [Double])
getFisMetrics = _phyloFis_metrics
-- | To get the support of a PhyloFis
getSupport :: PhyloFis -> Support
getSupport = _phyloFis_support
----------------------------
-- | PhyloNodes & Edges | --
----------------------------
......@@ -558,9 +576,19 @@ getViewBranchIds v = map getBranchId $ v ^. phylo_viewBranches
-- | PhyloQuery & QueryView | --
--------------------------------
-- | To get the first clustering method to apply to get the level 1 of a Phylo
getFstCluster :: PhyloQuery -> Cluster
getFstCluster q = q ^. q_contextualUnit
-- | To get the first clustering method to apply to get the contextual units of a Phylo
getContextualUnit :: PhyloQuery -> Cluster
getContextualUnit q = q ^. q_contextualUnit
-- | To get the metrics to apply to contextual units
getContextualUnitMetrics :: PhyloQuery -> [Metric]
getContextualUnitMetrics q = q ^. q_contextualUnitMetrics
-- | To get the filters to apply to contextual units
getContextualUnitFilters :: PhyloQuery -> [Filter]
getContextualUnitFilters q = q ^. q_contextualUnitFilters
-- | To get the cluster methods to apply to the Nths levels of a Phylo
......@@ -602,8 +630,8 @@ getProximity cluster = case cluster of
-- | To initialize all the Cluster / Proximity with their default parameters
initFis :: Maybe Bool -> Maybe Bool -> Maybe Support -> FisParams
initFis (def True -> flt) (def True -> kmf) (def 1 -> min) = FisParams flt kmf min
initFis :: Maybe Bool -> Maybe Support -> FisParams
initFis (def True -> kmf) (def 1 -> min) = FisParams kmf min
initHamming :: Maybe Double -> HammingParams
initHamming (def 0.01 -> sens) = HammingParams sens
......@@ -622,10 +650,10 @@ initWeightedLogJaccard (def 0 -> thr) (def 0.01 -> sens) = WLJParams thr sens
-- | To initialize a PhyloQuery from given and default parameters
initPhyloQuery :: Text -> Text -> Maybe Int -> Maybe Int -> Maybe Cluster -> Maybe Proximity -> Maybe Level -> Maybe Cluster -> PhyloQuery
initPhyloQuery name desc (def 5 -> grain) (def 3 -> steps) (def defaultFis -> cluster)
initPhyloQuery :: Text -> Text -> Maybe Int -> Maybe Int -> Maybe Cluster -> Maybe [Metric] -> Maybe [Filter] -> Maybe Proximity -> Maybe Level -> Maybe Cluster -> PhyloQuery
initPhyloQuery name desc (def 5 -> grain) (def 3 -> steps) (def defaultFis -> cluster) (def [] -> metrics) (def [] -> filters)
(def defaultWeightedLogJaccard -> matching) (def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) =
PhyloQuery name desc grain steps cluster matching nthLevel nthCluster
PhyloQuery name desc grain steps cluster metrics filters matching nthLevel nthCluster
-- | To initialize a PhyloQueryView default parameters
......@@ -635,9 +663,6 @@ initPhyloQueryView (def 2 -> lvl) (def Descendant -> f) (def False -> c) (def 1
-- | To define some obvious boolean getters
shouldFilterFis :: FisParams -> Bool
shouldFilterFis = _fis_filtered
shouldKeepMinorFis :: FisParams -> Bool
shouldKeepMinorFis = _fis_keepMinorFis
......@@ -648,7 +673,7 @@ shouldKeepMinorFis = _fis_keepMinorFis
-- Clusters
defaultFis :: Cluster
defaultFis = Fis (initFis Nothing Nothing Nothing)
defaultFis = Fis (initFis Nothing Nothing)
defaultLouvain :: Cluster
defaultLouvain = Louvain (initLouvain Nothing)
......@@ -678,7 +703,7 @@ defaultWeightedLogJaccard = WeightedLogJaccard (initWeightedLogJaccard Nothing N
defaultQuery :: PhyloQuery
defaultQuery = initPhyloQuery "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
Nothing Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
defaultQueryView :: PhyloQueryView
defaultQueryView = initPhyloQueryView Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment