Commit a4ad4249 authored by qlobbe's avatar qlobbe

add sort branch by birth date

parent b7ca113d
Pipeline #513 canceled with stage
......@@ -216,7 +216,7 @@ main = do
(reBranchThr conf) (reBranchNth conf) (phyloLevel conf)
(RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf))
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [SizeBranch $ SBParams (minSizeBranch conf)] [GroupLabelIncDyn,BranchPeakInc] (Just (ByBranchAge,Asc)) Json Flat True
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge,BranchBirth] [SizeBranch $ SBParams (minSizeBranch conf)] [GroupLabelIncDyn,BranchPeakInc] (Just (ByBranchBirth,Asc)) Json Flat True
let phylo = toPhylo query corpus termList fis'
......
......@@ -198,7 +198,7 @@ type Ngrams = Text
data Document = Document
{ date :: Date
, text :: [Ngrams]
} deriving (Show,Generic)
} deriving (Show,Generic,NFData)
-- | Clique : Set of ngrams cooccurring in the same Document
type Clique = Set Ngrams
......@@ -209,7 +209,7 @@ data PhyloFis = PhyloFis
{ _phyloFis_clique :: Clique
, _phyloFis_support :: Support
, _phyloFis_period :: (Date,Date)
} deriving (Generic,Show,Eq)
} deriving (Generic,NFData,Show,Eq)
-- | A list of clustered PhyloGroup
type PhyloCluster = [PhyloGroup]
......@@ -309,7 +309,7 @@ data SBParams = SBParams
-- | Metric constructors
data Metric = BranchAge deriving (Generic, Show, Eq, Read)
data Metric = BranchAge | BranchBirth deriving (Generic, Show, Eq, Read)
----------------
......@@ -328,7 +328,7 @@ data Tagger = BranchPeakFreq | BranchPeakCooc | BranchPeakInc
-- | Sort constructors
data Sort = ByBranchAge deriving (Generic, Show, Read, Enum, Bounded)
data Sort = ByBranchAge | ByBranchBirth deriving (Generic, Show, Read, Enum, Bounded)
data Order = Asc | Desc deriving (Generic, Show, Read)
......
......@@ -17,8 +17,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates
where
import Control.Lens hiding (makeLenses, both, Level)
import Control.Parallel.Strategies
import Gargantext.Prelude hiding (elem)
import Gargantext.Text.Context (TermList)
......@@ -29,12 +28,15 @@ import Gargantext.Viz.Phylo.Tools
import Debug.Trace (trace)
import Data.List (partition, concat, nub, elem, sort, (++), null)
import Data.Map (Map, fromList, fromListWith, adjust, filterWithKey, toList, elems, keys, unionWith, mapWithKey)
import Data.List (partition, concat, nub, elem, sort, (++), null, union)
import Data.Map (Map, fromList, fromListWith, adjust, filterWithKey, elems, keys, unionWith, mapWithKey)
import Data.Set (size)
import Data.Text (Text, unwords)
import Data.Vector (Vector)
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Vector as Vector
......@@ -53,9 +55,13 @@ termListToNgrams = map (\(lbl,_) -> unwords lbl)
-------------------
-- | To group a list of Documents by fixed periods
groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es = trace ("----\nGroup docs by periods\n") $ fromList $ zip pds $ map (inPeriode f es) pds
groupDocsByPeriod f pds es =
let periods = map (inPeriode f es) pds
periods' = periods `using` parList rdeepseq
in trace ("----\nGroup docs by periods\n") $ fromList $ zip pds periods'
where
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
......@@ -161,28 +167,45 @@ filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis]
filterFisByClique thr l = filter (\fis -> (size $ getClique fis) >= thr) l
-- | To filter nested Fis
-- | To find if l' is nested in l
isNested :: Eq a => [a] -> [a] -> Bool
isNested l l'
| null l' = True
| length l' > length l = False
| (union l l') == l = True
| otherwise = False
-- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head' "Fis" $ map getClique l) (map getClique l) []
in filter (\fis -> elem (getClique fis) cliqueMax) l)
filterFisByNested m =
let fis = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ getClique f') (Set.toList $ getClique f)) mem)
then mem
else
let fMax = filter (\f' -> not $ isNested (Set.toList $ getClique f) (Set.toList $ getClique f')) mem
in fMax ++ [f] ) [] l)
$ elems m
fis' = fis `using` parList rdeepseq
in fromList $ zip (keys m) fis'
-- | Choose if we use a set of Fis from a file or if we have to create them
docsToFis :: Map (Date,Date) [Document] -> Phylo -> Phylo
docsToFis :: Map (Date,Date) [Document] -> Phylo -> Map (Date, Date) [PhyloFis]
docsToFis m p = if (null $ getPhyloFis p)
then trace("----\nRebuild the Fis from scratch\n")
$ p & phylo_fis .~ mapWithKey (\k docs -> let fis = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
$ mapWithKey (\k docs -> let fis = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in map (\f -> PhyloFis (fst f) (snd f) k) fis) m
else trace("----\nUse Fis from an existing file\n")
$ p & phylo_fis %~ (unionWith (++) (fromList $ map (\k -> (k,[])) $ keys m))
$ unionWith (++) (fromList $ map (\k -> (k,[])) $ keys m) (getPhyloFis p)
-- | Process some filters on top of a set of Fis
refineFis :: Map (Date, Date) [PhyloFis] -> Bool -> Support -> Int -> Map (Date, Date) [PhyloFis]
refineFis fis k s t = traceFis "----\nFiltered Fis by clique size :\n"
$ filterFis k t (filterFisByClique)
$ traceFis "----\nFiltered Fis by nested :\n"
refineFis fis k s t = traceFis "----\nFiltered Fis by nested :\n"
$ filterFisByNested
$ traceFis "----\nFiltered Fis by clique size :\n"
$ filterFis k t (filterFisByClique)
$ traceFis "----\nFiltered Fis by support :\n"
$ filterFis k s (filterFisBySupport)
$ traceFis "----\nUnfiltered Fis :\n" fis
......
......@@ -30,6 +30,7 @@ module Gargantext.Viz.Phylo.Example where
import Data.GraphViz.Types.Generalised (DotGraph)
import Control.Lens hiding (both, Level)
import Data.Text (Text, toLower)
import Data.List ((++))
import Data.Map (Map,empty)
......@@ -82,7 +83,7 @@ queryViewEx = "level=3"
phyloQueryView :: PhyloQueryView
phyloQueryView = PhyloQueryView 2 Merge False 2 [BranchAge] [] [BranchPeakInc,GroupLabelIncDyn] (Just (ByBranchAge,Asc)) Json Flat True
phyloQueryView = PhyloQueryView 2 Merge False 2 [BranchAge,BranchBirth] [] [BranchPeakInc,GroupLabelIncDyn] (Just (ByBranchBirth,Asc)) Json Flat True
--------------------------------------------------
......@@ -205,12 +206,11 @@ phylo1 = addPhyloLevel (1) phyloFis phylo'
-- | STEP 5 | -- Create lists of Frequent Items Set and filter them
-------------------------------------------------------------------
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = refineFis (getPhyloFis phylo') True 1 1
phylo' :: Phylo
phylo' = docsToFis phyloDocs phylo
phylo' = phylo & phylo_fis .~ phyloFis
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = refineFis (docsToFis phyloDocs phylo) True 1 1
----------------------------------------
-- | STEP 2 | -- Init a Phylo of level 0
......
......@@ -21,7 +21,7 @@ module Gargantext.Viz.Phylo.LevelMaker
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level)
import Data.List ((++), sort, concat, nub, zip, last)
import Data.List ((++), sort, concat, nub, zip, last, null)
import Data.Map (Map, (!), empty, singleton)
import Data.Text (Text)
import Data.Tuple.Extra
......@@ -187,11 +187,12 @@ toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching
where
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phyloBase
-- phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo
--------------------------------------
phylo0 :: Phylo
phylo0 = tracePhyloN 0
$ addPhyloLevel 0 phyloDocs phyloBase
-- phylo0 :: Phylo
-- phylo0 = tracePhyloN 0
-- $ addPhyloLevel 0 phyloDocs phyloBase
--------------------------------------
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
......@@ -236,15 +237,14 @@ toPhylo1 clus prox d p = case clus of
$ traceTempoMatching Ascendant 1
$ interTempoMatching Ascendant 1 prox
$ tracePhyloN 1
$ setLevelLinks (0,1)
$ addPhyloLevel 1 phyloFis phylo'
-- $ setLevelLinks (0,1)
$ addPhyloLevel 1 (getPhyloFis phyloFis) phyloFis
where
--------------------------------------
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = refineFis (getPhyloFis phylo') k s t
--------------------------------------
phylo' :: Phylo
phylo' = docsToFis d p
phyloFis :: Phylo
phyloFis = if (null $ getPhyloFis p)
then p & phylo_fis .~ refineFis (docsToFis d p) k s t
else p & phylo_fis .~ docsToFis d p
--------------------------------------
_ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
......
......@@ -227,10 +227,9 @@ transposePeriodLinks :: Level -> Phylo -> Phylo
transposePeriodLinks lvl p = alterPhyloGroups
(\gs -> if ((not . null) gs) && (elem lvl $ map getGroupLevel gs)
then
let groups = map (\g -> g & phylo_groupPeriodParents .~ (trackPointers (reduceGroups g lvlGroups)
$ g ^. phylo_groupPeriodParents)
& phylo_groupPeriodChilds .~ (trackPointers (reduceGroups g lvlGroups)
$ g ^. phylo_groupPeriodChilds )) gs
let groups = map (\g -> let m = reduceGroups g lvlGroups
in g & phylo_groupPeriodParents .~ (trackPointers m $ g ^. phylo_groupPeriodParents)
& phylo_groupPeriodChilds .~ (trackPointers m $ g ^. phylo_groupPeriodChilds )) gs
groups' = groups `using` parList rdeepseq
in groups'
else gs
......
......@@ -47,10 +47,22 @@ branchAge v = foldl (\v' b -> let bId = (fst . (head' "branchAge")) b
$ getNodesInBranches v
-- | To get the age (in year) of all the branches of a PhyloView
branchBirth :: PhyloView -> PhyloView
branchBirth v = foldl (\v' b -> let bId = (fst . (head' "branchBirth")) b
prds = sortOn fst $ map snd b
in addBranchMetrics bId "birth" (fromIntegral $ fst $ head' "branchAge" prds) v') v
$ groupBy ((==) `on` fst)
$ sortOn fst
$ map (\n -> (getNodeBranchId n, (fst . fst) $ getNodeId n))
$ getNodesInBranches v
-- | To process a list of Metrics to a PhyloView
processMetrics :: [Metric] -> Phylo -> PhyloView -> PhyloView
processMetrics ms _p v = foldl (\v' m -> case m of
BranchAge -> branchAge v'
BranchBirth -> branchBirth v'
-- _ -> panic "[ERR][Viz.Phylo.Example.processMetrics] metric not found"
) v ms
......
......@@ -36,10 +36,22 @@ sortBranchByAge o v = v & pv_branches %~ f
Desc -> reverse $ sortOn (getBranchMeta "age") xs
--------------------------------------
-- | To sort a PhyloView by Birth date of a branch
sortBranchByBirth :: Order -> PhyloView -> PhyloView
sortBranchByBirth o v = v & pv_branches %~ f
where
--------------------------------------
f :: [PhyloBranch] -> [PhyloBranch]
f xs = case o of
Asc -> sortOn (getBranchMeta "birth") xs
Desc -> reverse $ sortOn (getBranchMeta "birth") xs
--------------------------------------
-- | To process a Sort to a PhyloView
processSort :: Maybe (Sort,Order) -> Phylo -> PhyloView -> PhyloView
processSort s _p v = case s of
Nothing -> v
Just s' -> case fst s' of
ByBranchAge -> sortBranchByAge (snd s') v
ByBranchAge -> sortBranchByAge (snd s') v
ByBranchBirth -> sortBranchByBirth (snd s') v
--_ -> panic "[ERR][Viz.Phylo.View.Sort.processSort] sort not found"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment