Commit 81094b10 authored by Quentin Lobbé's avatar Quentin Lobbé

add a filter for fis with too few ngrams

parent eb035a9d
Pipeline #363 failed with stage
...@@ -93,14 +93,14 @@ main = do ...@@ -93,14 +93,14 @@ main = do
let termListPath = "/home/qlobbe/data/epique/corpus/cultural_evolution/termList.csv" let termListPath = "/home/qlobbe/data/epique/corpus/cultural_evolution/termList.csv"
let outputPath = "/home/qlobbe/data/epique/output/cultural_evolution.dot" let outputPath = "/home/qlobbe/data/epique/output/cultural_evolution.dot"
let query = PhyloQueryBuild "cultural_evolution" "" 5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0 0) let query = PhyloQueryBuild "cultural_evolution" "" 5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.00001 10)
2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.1 10) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.5 10)
let queryView = PhyloQueryView 2 Merge False 1 [BranchAge] [defaultSmallBranch] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True let queryView = PhyloQueryView 2 Merge False 1 [BranchAge] [defaultSmallBranch] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
putStrLn $ show "-- Start parsing the corpus" putStrLn $ show "-- Start parsing the corpus"
corpus <- parse 500 corpusPath termListPath corpus <- parse 2000 corpusPath termListPath
let foundations = DL.nub $ DL.concat $ map text corpus let foundations = DL.nub $ DL.concat $ map text corpus
......
...@@ -244,6 +244,7 @@ data Cluster = Fis FisParams ...@@ -244,6 +244,7 @@ data Cluster = Fis FisParams
data FisParams = FisParams data FisParams = FisParams
{ _fis_keepMinorFis :: Bool { _fis_keepMinorFis :: Bool
, _fis_minSupport :: Support , _fis_minSupport :: Support
, _fis_minSize :: Int
} deriving (Generic, Show, Eq, Read) } deriving (Generic, Show, Eq, Read)
-- | Parameters for RelatedComponents clustering -- | Parameters for RelatedComponents clustering
......
...@@ -20,6 +20,7 @@ module Gargantext.Viz.Phylo.Aggregates.Fis ...@@ -20,6 +20,7 @@ module Gargantext.Viz.Phylo.Aggregates.Fis
import Data.List (null) import Data.List (null)
import Data.Map (Map, empty) import Data.Map (Map, empty)
import Data.Tuple (fst, snd) import Data.Tuple (fst, snd)
import Data.Set (size)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..)) import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
...@@ -34,6 +35,10 @@ filterFisBySupport keep min' m = case keep of ...@@ -34,6 +35,10 @@ filterFisBySupport keep min' m = case keep of
True -> Map.map (\l -> keepFilled (filterMinorFis) min' l) m True -> Map.map (\l -> keepFilled (filterMinorFis) min' l) m
filterFisByNgrams :: Int -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFisByNgrams thr m = Map.map(\lst -> filter (\fis -> (size $ getClique fis) > thr) lst) m
-- | To filter Fis with small Support, to preserve nonempty periods please use : filterFisBySupport true -- | To filter Fis with small Support, to preserve nonempty periods please use : filterFisBySupport true
filterMinorFis :: Int -> [PhyloFis] -> [PhyloFis] filterMinorFis :: Int -> [PhyloFis] -> [PhyloFis]
filterMinorFis min' l = filter (\fis -> getSupport fis > min') l filterMinorFis min' l = filter (\fis -> getSupport fis > min') l
...@@ -66,9 +71,10 @@ processMetrics metrics phyloFis ...@@ -66,9 +71,10 @@ processMetrics metrics phyloFis
-- | To transform some Documents into PhyloFis and apply a List of Metrics and Filters -- | To transform some Documents into PhyloFis and apply a List of Metrics and Filters
toPhyloFis :: Map (Date, Date) [Document] -> Bool -> Support -> [Metric] -> [Filter] -> Map (Date, Date) [PhyloFis] toPhyloFis :: Map (Date, Date) [Document] -> Bool -> Support -> Int -> [Metric] -> [Filter] -> Map (Date, Date) [PhyloFis]
toPhyloFis ds k s ms fs = processFilters fs toPhyloFis ds k s t ms fs = processFilters fs
$ processMetrics ms $ processMetrics ms
$ filterFisByNested $ filterFisByNgrams t
$ filterFisBySupport k s $ filterFisByNested
$ docsToFis ds $ filterFisBySupport k s
$ docsToFis ds
...@@ -160,7 +160,7 @@ toNthLevel lvlMax prox clus p ...@@ -160,7 +160,7 @@ toNthLevel lvlMax prox clus p
-- | To reconstruct the Level 1 of a Phylo based on a Clustering Method -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox metrics filters d p = case clus of toPhylo1 clus prox metrics filters d p = case clus of
Fis (FisParams k s) -> setPhyloBranches 1 Fis (FisParams k s t) -> setPhyloBranches 1
$ interTempoMatching Descendant 1 prox $ interTempoMatching Descendant 1 prox
$ interTempoMatching Ascendant 1 prox $ interTempoMatching Ascendant 1 prox
$ setLevelLinks (0,1) $ setLevelLinks (0,1)
...@@ -169,7 +169,7 @@ toPhylo1 clus prox metrics filters d p = case clus of ...@@ -169,7 +169,7 @@ toPhylo1 clus prox metrics filters d p = case clus of
where where
-------------------------------------- --------------------------------------
phyloFis :: Map (Date, Date) [PhyloFis] phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = toPhyloFis d k s metrics filters phyloFis = toPhyloFis d k s t metrics filters
-------------------------------------- --------------------------------------
_ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized" _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
......
...@@ -18,7 +18,7 @@ module Gargantext.Viz.Phylo.LinkMaker ...@@ -18,7 +18,7 @@ module Gargantext.Viz.Phylo.LinkMaker
where where
import Control.Lens hiding (both, Level) import Control.Lens hiding (both, Level)
import Data.List ((++), nub, sortOn, null, tail, splitAt, elem) import Data.List ((++), nub, sortOn, null, tail, splitAt, elem, concat)
import Data.Tuple.Extra import Data.Tuple.Extra
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
...@@ -26,6 +26,7 @@ import Gargantext.Viz.Phylo.Tools ...@@ -26,6 +26,7 @@ import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity import Gargantext.Viz.Phylo.Metrics.Proximity
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Maybe as Maybe import qualified Data.Maybe as Maybe
-- import Debug.Trace (trace)
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -117,7 +118,7 @@ findBestCandidates to' depth max' prox group p ...@@ -117,7 +118,7 @@ findBestCandidates to' depth max' prox group p
next = getNextPeriods to' (getGroupPeriod group) (getPhyloPeriods p) next = getNextPeriods to' (getGroupPeriod group) (getPhyloPeriods p)
-------------------------------------- --------------------------------------
candidates :: [PhyloGroup] candidates :: [PhyloGroup]
candidates = getGroupsWithFilters (getGroupLevel group) (head' "findBestCandidates" next) p candidates = concat $ map (\prd -> getGroupsWithFilters (getGroupLevel group) prd p) $ (take depth next)
-------------------------------------- --------------------------------------
scores :: [(PhyloGroupId, Double)] scores :: [(PhyloGroupId, Double)]
scores = map (\group' -> applyProximity prox group group') candidates scores = map (\group' -> applyProximity prox group group') candidates
......
...@@ -704,8 +704,8 @@ getProximity cluster = case cluster of ...@@ -704,8 +704,8 @@ getProximity cluster = case cluster of
-- | To initialize all the Cluster / Proximity with their default parameters -- | To initialize all the Cluster / Proximity with their default parameters
initFis :: Maybe Bool -> Maybe Support -> FisParams initFis :: Maybe Bool -> Maybe Support -> Maybe Int -> FisParams
initFis (def True -> kmf) (def 1 -> min') = FisParams kmf min' initFis (def True -> kmf) (def 1 -> min') (def 1 -> thr) = FisParams kmf min' thr
initHamming :: Maybe Double -> HammingParams initHamming :: Maybe Double -> HammingParams
initHamming (def 0.01 -> sens) = HammingParams sens initHamming (def 0.01 -> sens) = HammingParams sens
...@@ -748,7 +748,7 @@ shouldKeepMinorFis = _fis_keepMinorFis ...@@ -748,7 +748,7 @@ shouldKeepMinorFis = _fis_keepMinorFis
-- Clusters -- Clusters
defaultFis :: Cluster defaultFis :: Cluster
defaultFis = Fis (initFis Nothing Nothing) defaultFis = Fis (initFis Nothing Nothing Nothing)
defaultLouvain :: Cluster defaultLouvain :: Cluster
defaultLouvain = Louvain (initLouvain Nothing) defaultLouvain = Louvain (initLouvain Nothing)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment