Commit eb9455f7 authored by Quentin Lobbé's avatar Quentin Lobbé

fix the coocurencies

parent 901125c8
......@@ -81,7 +81,8 @@ data Conf =
, phyloLevel :: Int
, viewLevel :: Int
, fisSupport :: Int
, fisClique :: Int
, fisClique :: Int
, minSizeBranch :: Int
} deriving (Show,Generic)
instance FromJSON Conf
......@@ -172,7 +173,7 @@ main = do
(Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (phyloLevel conf)
(RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf))
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [defaultSmallBranch] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [SizeBranch $ SBParams (minSizeBranch conf)] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
let phylo = toPhylo query corpus roots termList
......
......@@ -149,7 +149,6 @@ data PhyloGroup =
, _phylo_groupLabel :: Text
, _phylo_groupNgrams :: [Int]
, _phylo_groupMeta :: Map Text Double
, _phylo_groupCooc :: Map (Int, Int) Double
, _phylo_groupBranchId :: Maybe PhyloBranchId
, _phylo_groupPeriodParents :: [Pointer]
......@@ -280,13 +279,19 @@ data HammingParams = HammingParams
-- | Filter constructors
data Filter = SmallBranch SBParams deriving (Generic, Show, Eq)
data Filter = LonelyBranch LBParams
| SizeBranch SBParams
deriving (Generic, Show, Eq)
-- | Parameters for SmallBranch filter
-- | Parameters for LonelyBranch filter
data LBParams = LBParams
{ _lb_periodsInf :: Int
, _lb_periodsSup :: Int
, _lb_minNodes :: Int } deriving (Generic, Show, Eq)
-- | Parameters for SizeBranch filter
data SBParams = SBParams
{ _sb_periodsInf :: Int
, _sb_periodsSup :: Int
, _sb_minNodes :: Int } deriving (Generic, Show, Eq)
{ _sb_minSize :: Int } deriving (Generic, Show, Eq)
----------------
......@@ -483,6 +488,8 @@ $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
$(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
$(deriveJSON (unPrefix "_rc_" ) ''RCParams )
$(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
--
$(deriveJSON (unPrefix "_lb_" ) ''LBParams )
$(deriveJSON (unPrefix "_sb_" ) ''SBParams )
--
$(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
......
......@@ -77,7 +77,7 @@ type GetPhylo = QueryParam "listId" ListId
getPhylo :: PhyloId -> GargServer GetPhylo
getPhylo _phyloId _lId l f b l' ms x y z ts s o e d b' = do
let
fs' = maybe (Just []) (\p -> Just [p]) $ SmallBranch <$> (SBParams <$> x <*> y <*> z)
fs' = maybe (Just []) (\p -> Just [p]) $ LonelyBranch <$> (LBParams <$> x <*> y <*> z)
so = (,) <$> s <*> o
q = initPhyloQueryView l f b l' ms fs' ts so e d b'
-- | TODO remove phylo for real data here
......@@ -149,6 +149,7 @@ instance ToSchema PhyloPeriod
instance ToSchema PhyloQueryBuild
instance ToSchema PhyloView
instance ToSchema RCParams
instance ToSchema LBParams
instance ToSchema SBParams
instance ToSchema Software
instance ToSchema WLJParams
......
......@@ -17,7 +17,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Cluster
where
import Data.List (null,tail,concat)
import Data.List (null,tail,concat,sort,intersect)
import Data.Map (Map)
import Data.Tuple (fst)
import Gargantext.Prelude
......@@ -25,6 +25,7 @@ import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity
import Gargantext.Viz.Phylo.Metrics.Clustering
import Gargantext.Viz.Phylo.Aggregates.Cooc
import qualified Data.Map as Map
import qualified Data.Vector.Storable as VS
......@@ -32,6 +33,13 @@ import Debug.Trace (trace)
import Numeric.Statistics (percentile)
-- | Optimisation to filter only relevant candidates
getCandidates :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
$ filter (\(g,g') -> g /= g')
$ listToDirectedCombi gs
-- | To transform a Graph into Clusters
graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
graphToClusters clust (nodes,edges) = case clust of
......@@ -41,12 +49,12 @@ graphToClusters clust (nodes,edges) = case clust of
-- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph :: Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
groupsToGraph prox gs = case prox of
WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), weightedLogJaccard sens (getGroupCooc x) (getGroupCooc y)))
$ listToDirectedCombi gs)
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y)))
$ listToDirectedCombi gs)
groupsToGraph :: Proximity -> [PhyloGroup] -> Map (Int, Int) Double -> ([GroupNode],[GroupEdge])
groupsToGraph prox gs cooc = case prox of
WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), weightedLogJaccard sens (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
$ getCandidates gs)
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
$ getCandidates gs)
_ -> undefined
......@@ -73,7 +81,7 @@ phyloToClusters lvl clus p = Map.fromList
--------------------------------------
graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox)
$ map (\prd -> groupsToGraph prox (getGroupsWithFilters lvl prd p)) periods
$ map (\prd -> groupsToGraph prox (getGroupsWithFilters lvl prd p) (getCooc [prd] p)) periods
--------------------------------------
prox :: Proximity
prox = getProximity clus
......@@ -96,7 +104,7 @@ traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (l
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
where
lst = map snd $ concat $ map snd g
lst = sort $ map snd $ concat $ map snd g
traceGraphFiltered :: Level -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
......@@ -107,5 +115,5 @@ traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <>
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
where
lst = map snd $ concat $ map snd g
lst = sort $ map snd $ concat $ map snd g
......@@ -17,8 +17,8 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Cooc
where
import Data.List (union,concat)
import Data.Map (Map, elems, adjust)
import Data.List (union,concat,nub)
import Data.Map (Map,elems,adjust,filterWithKey)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
......@@ -26,12 +26,12 @@ import qualified Data.Map as Map
import qualified Data.Set as Set
-- | To transform the Fis into a coocurency Matrix in a Phylo but as a triangle
fisToCooc' :: Map (Date, Date) [PhyloFis] -> Phylo -> Map (Int, Int) Double
fisToCooc' m p = map (/docs)
$ foldl (\mem x -> adjust (+1) (getKeyPair x mem) mem) cooc
-- | To transform the Fis into a full coocurency Matrix in a Phylo
fisToCooc :: Map (Date, Date) [PhyloFis] -> Phylo -> Map (Int, Int) Double
fisToCooc m p = map (/docs)
$ foldl (\mem x -> adjust (+1) x mem) cooc
$ concat
$ map (\x -> listToUnDirectedCombiWith (\y -> getIdxInRoots y p) $ (Set.toList . getClique) x)
$ map (\x -> listToDirectedCombiWith (\y -> getIdxInRoots y p) $ (Set.toList . getClique) x)
$ (concat . elems) m
where
--------------------------------------
......@@ -42,28 +42,45 @@ fisToCooc' m p = map (/docs)
docs = fromIntegral $ foldl (\mem x -> mem + (getSupport x)) 0 $ (concat . elems) m
--------------------------------------
cooc :: Map (Int, Int) (Double)
cooc = Map.fromList $ map (\x -> (x,0)) (listToUnDirectedCombiWith (\y -> getIdxInRoots y p) fisNgrams)
cooc = Map.fromList $ map (\x -> (x,0)) (listToDirectedCombiWith (\y -> getIdxInRoots y p) fisNgrams)
--------------------------------------
-- | To transform the Fis into a full coocurency Matrix in a Phylo
fisToCooc :: Map (Date, Date) [PhyloFis] -> Phylo -> Map (Int, Int) Double
fisToCooc m p = map (/docs)
$ foldl (\mem x -> adjust (+1) x mem) cooc
$ concat
$ map (\x -> listToDirectedCombiWith (\y -> getIdxInRoots y p) $ (Set.toList . getClique) x)
$ (concat . elems) m
-- | To transform a tuple of group's information into a coocurency Matrix
toCooc :: [([Int],Double)] -> Map (Int, Int) Double
toCooc l = map (/docs)
$ foldl (\mem x -> adjust (+1) x mem) cooc
$ concat
$ map (\x -> listToDirectedCombi $ fst x) l
where
--------------------------------------
fisNgrams :: [Ngrams]
fisNgrams = foldl (\mem x -> union mem $ (Set.toList . getClique) x) [] $ (concat . elems) m
idx :: [Int]
idx = nub $ concat $ map fst l
--------------------------------------
docs :: Double
docs = fromIntegral $ foldl (\mem x -> mem + (getSupport x)) 0 $ (concat . elems) m
docs = sum $ map snd l
--------------------------------------
cooc :: Map (Int, Int) (Double)
cooc = Map.fromList $ map (\x -> (x,0)) (listToDirectedCombiWith (\y -> getIdxInRoots y p) fisNgrams)
cooc = Map.fromList $ map (\x -> (x,0)) $ listToDirectedCombi idx
--------------------------------------
-- | To reduce a coocurency Matrix to some keys
getSubCooc :: [Int] -> Map (Int, Int) Double -> Map (Int, Int) Double
getSubCooc idx cooc = filterWithKey (\k _ -> (elem (fst k) idx)
&& (elem (snd k) idx)) cooc
-- | To get a coocurency Matrix related to a given list of Periods
getCooc :: [PhyloPeriodId] -> Phylo -> Map (Int, Int) Double
getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g)) gs
where
--------------------------------------
-- | Here we need to go back to the level 1 (aka : the Fis level)
gs :: [PhyloGroup]
gs = filter (\g -> elem (getGroupPeriod g) prds ) $ getGroupsWithLevel 1 p
--------------------------------------
-- phyloCooc :: Map (Int, Int) Double
......
......@@ -77,7 +77,7 @@ queryViewEx = "level=3"
phyloQueryView :: PhyloQueryView
phyloQueryView = PhyloQueryView 2 Merge False 1 [BranchAge] [defaultSmallBranch] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
phyloQueryView = PhyloQueryView 4 Merge False 1 [BranchAge] [defaultSizeBranch] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
--------------------------------------------------
......@@ -104,7 +104,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.1 10) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.1 10)
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.1 10) 5 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.1 10)
......
......@@ -21,13 +21,12 @@ module Gargantext.Viz.Phylo.LevelMaker
import Control.Lens hiding (both, Level)
import Data.List ((++), sort, concat, nub, zip, last)
import Data.Map (Map, (!), empty, restrictKeys, filterWithKey, singleton, union)
import Data.Map (Map, (!), empty, singleton)
import Data.Text (Text)
import Data.Tuple.Extra
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Aggregates.Cluster
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.Aggregates.Document
import Gargantext.Viz.Phylo.Aggregates.Fis
import Gargantext.Viz.Phylo.BranchMaker
......@@ -61,7 +60,7 @@ instance PhyloLevelMaker PhyloCluster
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
--------------------------------------
-- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l m p = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
toPhyloGroups lvl (d,d') l m _ = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m) $ zip [1..] l
--------------------------------------
......@@ -74,7 +73,7 @@ instance PhyloLevelMaker PhyloFis
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
--------------------------------------
-- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l m p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis m p) $ zip [1..] l
toPhyloGroups lvl (d,d') l _ p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
--------------------------------------
......@@ -95,26 +94,20 @@ instance PhyloLevelMaker Document
-- | To transform a Cluster into a Phylogroup
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster] -> Phylo -> PhyloGroup
clusterToGroup prd lvl idx lbl groups _m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams empty cooc Nothing [] [] [] (map (\g -> (getGroupId g, 1)) groups)
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> PhyloGroup
clusterToGroup prd lvl idx lbl groups _m =
PhyloGroup ((prd, lvl), idx) lbl ngrams empty Nothing [] [] [] (map (\g -> (getGroupId g, 1)) groups)
where
--------------------------------------
ngrams :: [Int]
ngrams = (sort . nub . concat) $ map getGroupNgrams groups
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
$ foldl union empty
$ map getGroupCooc
$ getGroupsWithFilters 1 prd p
--------------------------------------
-- | To transform a Clique into a PhyloGroup
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Map (Date, Date) [PhyloFis] -> Phylo -> PhyloGroup
cliqueToGroup prd lvl idx lbl fis m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) cooc Nothing [] [] [] []
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
cliqueToGroup prd lvl idx lbl fis p =
PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing [] [] [] []
where
--------------------------------------
ngrams :: [Int]
......@@ -122,16 +115,12 @@ cliqueToGroup prd lvl idx lbl fis m p =
$ Set.toList
$ getClique fis
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
$ fisToCooc (restrictKeys m $ Set.fromList [prd]) p
--------------------------------------
-- | To transform a list of Ngrams into a PhyloGroup
ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
ngramsToGroup prd lvl idx lbl ngrams p =
PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing [] [] [] []
PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing [] [] [] []
-- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
......
......@@ -25,6 +25,7 @@ import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity
import Gargantext.Viz.Phylo.Aggregates.Cooc
import qualified Data.List as List
import qualified Data.Maybe as Maybe
import qualified Data.Map as Map
......@@ -71,7 +72,8 @@ linkGroupToGroups (lvl,lvl') current targets
-- | To set the LevelLink of all the PhyloGroups of a Phylo
setLevelLinks :: (Level,Level) -> Phylo -> Phylo
setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLevel g == lvl
then linkGroupToGroups (lvl,lvl') g (filter (\g' -> getGroupPeriod g' == getGroupPeriod g) gs')
then linkGroupToGroups (lvl,lvl') g (filterCandidates g
$ filter (\g' -> getGroupPeriod g' == getGroupPeriod g) gs')
else g) gs) p
where
--------------------------------------
......@@ -85,12 +87,10 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLeve
-- | To apply the corresponding proximity function based on a given Proximity
applyProximity :: Proximity -> PhyloGroup -> PhyloGroup -> (PhyloGroupId, Double)
applyProximity prox g1 g2 = case prox of
-- WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2),weightedLogJaccard s (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1)))
-- Hamming (HammingParams _) -> ((getGroupId g2),hamming (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1)))
WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard s (getGroupCooc g1) (getGroupCooc g2))
Hamming (HammingParams _) -> ((getGroupId g2), hamming (getGroupCooc g1) (getGroupCooc g2))
applyProximity :: Proximity -> PhyloGroup -> PhyloGroup -> Map (Int, Int) Double -> (PhyloGroupId, Double)
applyProximity prox g1 g2 cooc = case prox of
WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard s (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
Hamming (HammingParams _) -> ((getGroupId g2), hamming (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
_ -> panic ("[ERR][Viz.Phylo.Example.applyProximity] Proximity function not defined")
......@@ -113,21 +113,24 @@ getNextPeriods to' id l = case to' of
-- | To find the best candidates regarding a given proximity
findBestCandidates' :: Filiation -> Int -> Int -> Proximity -> [PhyloPeriodId] -> [PhyloGroup] -> PhyloGroup -> ([Pointer],[Double])
findBestCandidates' fil depth limit prox prds gs g
findBestCandidates' :: Filiation -> Int -> Int -> Proximity -> [PhyloPeriodId] -> [PhyloGroup] -> PhyloGroup -> Phylo -> ([Pointer],[Double])
findBestCandidates' fil depth limit prox prds gs g p
| depth > limit || null next = ([],[])
| (not . null) bestScores = (take 2 bestScores, map snd scores)
| otherwise = findBestCandidates' fil (depth + 1) limit prox prds gs g
| otherwise = findBestCandidates' fil (depth + 1) limit prox prds gs g p
where
--------------------------------------
next :: [PhyloPeriodId]
next = take depth prds
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = getCooc next p
--------------------------------------
candidates :: [PhyloGroup]
candidates = filter (\g' -> elem (getGroupPeriod g') next) gs
--------------------------------------
scores :: [(PhyloGroupId, Double)]
scores = map (\g' -> applyProximity prox g g') candidates
scores = map (\g' -> applyProximity prox g g' cooc) candidates
--------------------------------------
bestScores :: [(PhyloGroupId, Double)]
bestScores = reverse
......@@ -177,7 +180,7 @@ interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) sc
scores = sort $ concat $ map (snd . snd) candidates
--------------------------------------
candidates :: [(PhyloGroupId,([Pointer],[Double]))]
candidates = map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g)) gs
candidates = map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g p)) gs
--------------------------------------
gs :: [PhyloGroup]
gs = getGroupsWithLevel lvl p
......
......@@ -237,11 +237,6 @@ getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId = _phylo_groupId
-- | To get the Cooc Matrix of a PhyloGroup
getGroupCooc :: PhyloGroup -> Map (Int,Int) Double
getGroupCooc = _phylo_groupCooc
-- | To get the level out of the id of a PhyloGroup
getGroupLevel :: PhyloGroup -> Int
getGroupLevel = snd . fst . getGroupId
......@@ -373,7 +368,6 @@ initGroup ngrams lbl idx lvl from' to' p = PhyloGroup
lbl
(sort $ map (\x -> getIdxInRoots x p) ngrams)
(Map.empty)
(Map.empty)
Nothing
[] [] [] []
......@@ -709,11 +703,14 @@ initFis (def True -> kmf) (def 1 -> min') (def 1 -> thr) = FisParams kmf min' th
initHamming :: Maybe Double -> HammingParams
initHamming (def 0.01 -> sens) = HammingParams sens
initSmallBranch' :: Maybe Int -> Maybe Int -> Maybe Int -> SBParams
initSmallBranch' (def 2 -> periodsInf) (def 2 -> periodsSup) (def 1 -> minNodes) = SBParams periodsInf periodsSup minNodes
initLonelyBranch :: Maybe Int -> Maybe Int -> Maybe Int -> LBParams
initLonelyBranch (def 2 -> periodsInf) (def 2 -> periodsSup) (def 1 -> minNodes) = LBParams periodsInf periodsSup minNodes
initSmallBranch :: Maybe Int -> Maybe Int -> Maybe Int -> SBParams
initSmallBranch (def 0 -> periodsInf) (def 0 -> periodsSup) (def 1 -> minNodes) = SBParams periodsInf periodsSup minNodes
initSizeBranch :: Maybe Int -> SBParams
initSizeBranch (def 1 -> minSize) = SBParams minSize
initLonelyBranch' :: Maybe Int -> Maybe Int -> Maybe Int -> LBParams
initLonelyBranch' (def 0 -> periodsInf) (def 0 -> periodsSup) (def 1 -> minNodes) = LBParams periodsInf periodsSup minNodes
initLouvain :: Maybe Proximity -> LouvainParams
initLouvain (def defaultWeightedLogJaccard -> proxi) = LouvainParams proxi
......@@ -760,8 +757,11 @@ defaultRelatedComponents = RelatedComponents (initRelatedComponents Nothing)
-- Filters
defaultSmallBranch :: Filter
defaultSmallBranch = SmallBranch (initSmallBranch Nothing Nothing Nothing)
defaultLonelyBranch :: Filter
defaultLonelyBranch = LonelyBranch (initLonelyBranch Nothing Nothing Nothing)
defaultSizeBranch :: Filter
defaultSizeBranch = SizeBranch (initSizeBranch Nothing)
-- Params
......
......@@ -48,9 +48,9 @@ cleanNodesEdges v v' = v' & pv_nodes %~ (filter (\n -> not $ elem (getNodeId n)
--------------------------------------
-- | To filter all the SmallBranches (ie: isolated one in time & with a small number of nodes) of a PhyloView
filterSmallBranch :: Int -> Int -> Int -> [PhyloPeriodId] -> PhyloView -> PhyloView
filterSmallBranch inf sup min' prds v = cleanNodesEdges v v'
-- | To filter all the LonelyBranches (ie: isolated one in time & with a small number of nodes) of a PhyloView
filterLonelyBranch :: Int -> Int -> Int -> [PhyloPeriodId] -> PhyloView -> PhyloView
filterLonelyBranch inf sup min' prds v = cleanNodesEdges v v'
where
--------------------------------------
v' :: PhyloView
......@@ -61,15 +61,24 @@ filterSmallBranch inf sup min' prds v = cleanNodesEdges v v'
--------------------------------------
isLone :: [PhyloNode] -> [PhyloPeriodId] -> Bool
isLone ns prds' = (length ns <= min')
&& notElem (head' "filterSmallBranch1" prds') (take inf prds)
&& notElem (head' "filterSmallBranch2" prds') (take sup $ reverse prds)
&& notElem (head' "filterLonelyBranch1" prds') (take inf prds)
&& notElem (head' "filterLonelyBranch2" prds') (take sup $ reverse prds)
--------------------------------------
-- | To filter all the branches with a minimal size in a PhyloView
filterSizeBranch :: Int -> PhyloView -> PhyloView
filterSizeBranch min' v = cleanNodesEdges v v'
where
--------------------------------------
v' :: PhyloView
v' = v & pv_branches %~ (filter (\b -> (length $ filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v) > min'))
--------------------------------------
-- | To process a list of QueryFilter to a PhyloView
processFilters :: [Filter] -> Phylo -> PhyloView -> PhyloView
processFilters fs p v = foldl (\v' f -> case f of
SmallBranch (SBParams inf sup min') -> filterSmallBranch inf sup min'
(getPhyloPeriods p) v'
LonelyBranch (LBParams inf sup min') -> filterLonelyBranch inf sup min' (getPhyloPeriods p) v'
SizeBranch (SBParams min') -> filterSizeBranch min' v'
-- _ -> panic "[ERR][Viz.Phylo.View.Filters.processFilters] filter not found"
) v fs
......@@ -22,9 +22,11 @@ import Data.List (concat,nub,groupBy,sortOn,sort)
import Data.Text (Text)
import Data.Tuple (fst, snd)
import Data.Vector (Vector)
import Data.Map (Map)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Aggregates.Cooc
import qualified Data.Map as Map
......@@ -46,11 +48,14 @@ freqToLabel thr ngs l = ngramsToLabel ngs $ mostFreqNgrams thr l
-- | To get the (nth `div` 2) most cooccuring Ngrams in a PhyloGroup
mostOccNgrams :: Int -> PhyloGroup -> [Int]
mostOccNgrams thr group = (nub . concat )
$ map (\((f,s),_d) -> [f,s])
$ take (thr `div` 2)
$ reverse $ sortOn snd $ Map.toList $ getGroupCooc group
mostOccNgrams :: Int -> Phylo -> PhyloGroup -> [Int]
mostOccNgrams thr p g = (nub . concat )
$ map (\((f,s),_d) -> [f,s])
$ take (thr `div` 2)
$ reverse $ sortOn snd $ Map.toList cooc
where
cooc :: Map (Int, Int) Double
cooc = getSubCooc (getGroupNgrams g) $ getCooc [getGroupPeriod g] p
-- | To alter the peak of a PhyloBranch
......@@ -75,7 +80,7 @@ nodeLabelCooc :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelCooc v thr p = over (pv_nodes
. traverse)
(\n -> let lbl = ngramsToLabel (getFoundationsRoots p)
$ mostOccNgrams thr
$ mostOccNgrams thr p
$ head' "nodeLabelCooc" $ getGroupsFromIds [getNodeId n] p
in n & pn_label .~ lbl) v
......
......@@ -18,7 +18,7 @@ module Gargantext.Viz.Phylo.View.ViewMaker
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (concat,nub,(++))
import Data.List (concat,nub,(++),sort)
import Data.Text (Text)
import Data.Map (Map, empty, elems, unionWithKey, fromList)
import Data.Tuple (fst, snd)
......@@ -32,6 +32,9 @@ import Gargantext.Viz.Phylo.View.Metrics
import Gargantext.Viz.Phylo.View.Sort
import Gargantext.Viz.Phylo.View.Taggers
import qualified Data.Vector.Storable as VS
import Debug.Trace (trace)
import Numeric.Statistics (percentile)
-- | To init a PhyloBranch
initPhyloBranch :: PhyloBranchId -> Text -> PhyloBranch
......@@ -139,7 +142,8 @@ addChildNodes shouldDo lvl lvlMin vb fl p v =
-- | To transform a PhyloQuery into a PhyloView
toPhyloView :: PhyloQueryView -> Phylo -> PhyloView
toPhyloView q p = processDisplay (q ^. qv_display) (q ^. qv_export)
toPhyloView q p = traceView
$ processDisplay (q ^. qv_display) (q ^. qv_export)
$ processSort (q ^. qv_sort ) p
$ processTaggers (q ^. qv_taggers) p
$ processFilters (q ^. qv_filters) p
......@@ -160,3 +164,20 @@ getPhyloTitle p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
-- | To get the desc of a Phylo
getPhyloDescription :: Phylo -> Text
getPhyloDescription p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
-----------------
-- | Taggers | --
-----------------
traceView :: PhyloView -> PhyloView
traceView pv = trace ("------------\n--| View |--\n------------\n\n"
<> "view level : " <> show (pv ^. pv_level) <> "\n"
<> show (length $ pv ^. pv_branches) <> " exported branches with " <> show (length $ pv ^. pv_nodes) <> " groups\n"
<> "groups by branches : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
<> show (percentile 50 (VS.fromList lst)) <> " (50%) "
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") pv
where
lst = sort $ map (fromIntegral . length . snd) $ getNodesByBranches pv
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment