Commit 81094b10 authored by Quentin Lobbé's avatar Quentin Lobbé

add a filter for fis with too few ngrams

parent eb035a9d
Pipeline #363 failed with stage
......@@ -93,14 +93,14 @@ main = do
let termListPath = "/home/qlobbe/data/epique/corpus/cultural_evolution/termList.csv"
let outputPath = "/home/qlobbe/data/epique/output/cultural_evolution.dot"
let query = PhyloQueryBuild "cultural_evolution" "" 5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0 0)
2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.1 10)
let query = PhyloQueryBuild "cultural_evolution" "" 5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.00001 10)
2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.5 10)
let queryView = PhyloQueryView 2 Merge False 1 [BranchAge] [defaultSmallBranch] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
putStrLn $ show "-- Start parsing the corpus"
corpus <- parse 500 corpusPath termListPath
corpus <- parse 2000 corpusPath termListPath
let foundations = DL.nub $ DL.concat $ map text corpus
......
......@@ -244,6 +244,7 @@ data Cluster = Fis FisParams
data FisParams = FisParams
{ _fis_keepMinorFis :: Bool
, _fis_minSupport :: Support
, _fis_minSize :: Int
} deriving (Generic, Show, Eq, Read)
-- | Parameters for RelatedComponents clustering
......
......@@ -20,6 +20,7 @@ module Gargantext.Viz.Phylo.Aggregates.Fis
import Data.List (null)
import Data.Map (Map, empty)
import Data.Tuple (fst, snd)
import Data.Set (size)
import Gargantext.Prelude
import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Viz.Phylo
......@@ -34,6 +35,10 @@ filterFisBySupport keep min' m = case keep of
True -> Map.map (\l -> keepFilled (filterMinorFis) min' l) m
filterFisByNgrams :: Int -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFisByNgrams thr m = Map.map(\lst -> filter (\fis -> (size $ getClique fis) > thr) lst) m
-- | To filter Fis with small Support, to preserve nonempty periods please use : filterFisBySupport true
filterMinorFis :: Int -> [PhyloFis] -> [PhyloFis]
filterMinorFis min' l = filter (\fis -> getSupport fis > min') l
......@@ -66,9 +71,10 @@ processMetrics metrics phyloFis
-- | To transform some Documents into PhyloFis and apply a List of Metrics and Filters
toPhyloFis :: Map (Date, Date) [Document] -> Bool -> Support -> [Metric] -> [Filter] -> Map (Date, Date) [PhyloFis]
toPhyloFis ds k s ms fs = processFilters fs
$ processMetrics ms
$ filterFisByNested
$ filterFisBySupport k s
$ docsToFis ds
toPhyloFis :: Map (Date, Date) [Document] -> Bool -> Support -> Int -> [Metric] -> [Filter] -> Map (Date, Date) [PhyloFis]
toPhyloFis ds k s t ms fs = processFilters fs
$ processMetrics ms
$ filterFisByNgrams t
$ filterFisByNested
$ filterFisBySupport k s
$ docsToFis ds
......@@ -160,7 +160,7 @@ toNthLevel lvlMax prox clus p
-- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox metrics filters d p = case clus of
Fis (FisParams k s) -> setPhyloBranches 1
Fis (FisParams k s t) -> setPhyloBranches 1
$ interTempoMatching Descendant 1 prox
$ interTempoMatching Ascendant 1 prox
$ setLevelLinks (0,1)
......@@ -169,7 +169,7 @@ toPhylo1 clus prox metrics filters d p = case clus of
where
--------------------------------------
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = toPhyloFis d k s metrics filters
phyloFis = toPhyloFis d k s t metrics filters
--------------------------------------
_ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
......
......@@ -18,7 +18,7 @@ module Gargantext.Viz.Phylo.LinkMaker
where
import Control.Lens hiding (both, Level)
import Data.List ((++), nub, sortOn, null, tail, splitAt, elem)
import Data.List ((++), nub, sortOn, null, tail, splitAt, elem, concat)
import Data.Tuple.Extra
import Gargantext.Prelude
import Gargantext.Viz.Phylo
......@@ -26,6 +26,7 @@ import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity
import qualified Data.List as List
import qualified Data.Maybe as Maybe
-- import Debug.Trace (trace)
------------------------------------------------------------------------
......@@ -117,7 +118,7 @@ findBestCandidates to' depth max' prox group p
next = getNextPeriods to' (getGroupPeriod group) (getPhyloPeriods p)
--------------------------------------
candidates :: [PhyloGroup]
candidates = getGroupsWithFilters (getGroupLevel group) (head' "findBestCandidates" next) p
candidates = concat $ map (\prd -> getGroupsWithFilters (getGroupLevel group) prd p) $ (take depth next)
--------------------------------------
scores :: [(PhyloGroupId, Double)]
scores = map (\group' -> applyProximity prox group group') candidates
......
......@@ -704,8 +704,8 @@ getProximity cluster = case cluster of
-- | To initialize all the Cluster / Proximity with their default parameters
initFis :: Maybe Bool -> Maybe Support -> FisParams
initFis (def True -> kmf) (def 1 -> min') = FisParams kmf min'
initFis :: Maybe Bool -> Maybe Support -> Maybe Int -> FisParams
initFis (def True -> kmf) (def 1 -> min') (def 1 -> thr) = FisParams kmf min' thr
initHamming :: Maybe Double -> HammingParams
initHamming (def 0.01 -> sens) = HammingParams sens
......@@ -748,7 +748,7 @@ shouldKeepMinorFis = _fis_keepMinorFis
-- Clusters
defaultFis :: Cluster
defaultFis = Fis (initFis Nothing Nothing)
defaultFis = Fis (initFis Nothing Nothing Nothing)
defaultLouvain :: Cluster
defaultLouvain = Louvain (initLouvain Nothing)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment