From 4e4c9b58a890caca2d04a8f369455bbf45762af5 Mon Sep 17 00:00:00 2001 From: qlobbe <quentin.lobbe@gmail.com> Date: Wed, 6 Mar 2019 18:02:27 +0100 Subject: [PATCH] Add the branch detection with a bugg --- src/Gargantext/Viz/Phylo.hs | 29 +++++++--- src/Gargantext/Viz/Phylo/Example.hs | 87 +++++++++++++++++++++++------ src/Gargantext/Viz/Phylo/Tools.hs | 54 ++++++++++++------ 3 files changed, 128 insertions(+), 42 deletions(-) diff --git a/src/Gargantext/Viz/Phylo.hs b/src/Gargantext/Viz/Phylo.hs index 347ce0b8..e8cefb99 100644 --- a/src/Gargantext/Viz/Phylo.hs +++ b/src/Gargantext/Viz/Phylo.hs @@ -72,6 +72,7 @@ data Phylo = Phylo { _phylo_duration :: (Start, End) , _phylo_ngrams :: PhyloNgrams , _phylo_periods :: [PhyloPeriod] + , _phylo_branches :: [PhyloBranch] } deriving (Generic, Show) @@ -94,7 +95,6 @@ data PhyloPeriod = } deriving (Generic, Show) -type PhyloPeriodId = (Start, End) -- | PhyloLevel : levels of phylomemy on level axis -- Levels description: @@ -108,7 +108,6 @@ data PhyloLevel = } deriving (Generic, Show) -type PhyloLevelId = (PhyloPeriodId, Int) -- | PhyloGroup : group of ngrams at each level and step -- Label : maybe has a label as text @@ -122,7 +121,7 @@ data PhyloGroup = , _phylo_groupLabel :: Text , _phylo_groupNgrams :: [Int] , _phylo_groupQuality :: Map Text Double - , _phylo_groupCooc :: Map (Int, Int) Double + , _phylo_groupCooc :: Map (Int, Int) Double , _phylo_groupPeriodParents :: [Pointer] , _phylo_groupPeriodChilds :: [Pointer] @@ -132,11 +131,21 @@ data PhyloGroup = } deriving (Generic, Show, Eq) -type PhyloGroupId = (PhyloLevelId, Int) -type Pointer = (PhyloGroupId, Weight) -type Weight = Double +data PhyloBranch = + PhyloBranch { _phylo_branchId :: (Int,Int) + , _phylo_branchLabel :: Text + , _phylo_branchGroups :: [PhyloGroupId] + } + deriving (Generic, Show) + +type PhyloPeriodId = (Start, End) +type PhyloLevelId = (PhyloPeriodId, Int) +type PhyloGroupId = (PhyloLevelId, Int) +type Pointer = (PhyloGroupId, Weight) +type Weight = Double +type PhyloBranchId = (Int, Int) -- | Ngrams : a contiguous sequence of n terms @@ -162,7 +171,7 @@ data LevelLabel = Level_m1 | Level_0 | Level_1 | Level_mN | Level_N | Level_pN data Level = Level { _levelLabel :: LevelLabel , _levelValue :: Int - } deriving (Show) + } deriving (Show, Eq) data LevelLink = LevelLink { _levelFrom :: Level @@ -195,12 +204,14 @@ makeLenses ''PhyloLevel makeLenses ''PhyloPeriod makeLenses ''Level makeLenses ''LevelLink +makeLenses ''PhyloBranch -- | JSON instances -$(deriveJSON (unPrefix "_phylo_" ) ''Phylo ) -$(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod ) +$(deriveJSON (unPrefix "_phylo_" ) ''Phylo ) +$(deriveJSON (unPrefix "_phylo_period" ) 'PhyloPeriod ) $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel ) $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup ) +$(deriveJSON (unPrefix "_phylo_branch" ) ''PhyloBranch ) -- $(deriveJSON (unPrefix "_software_" ) ''Software ) $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam ) diff --git a/src/Gargantext/Viz/Phylo/Example.hs b/src/Gargantext/Viz/Phylo/Example.hs index dfe5340c..66f5cd4e 100644 --- a/src/Gargantext/Viz/Phylo/Example.hs +++ b/src/Gargantext/Viz/Phylo/Example.hs @@ -30,7 +30,7 @@ module Gargantext.Viz.Phylo.Example where import Control.Lens hiding (makeLenses, both, Level) import Data.Bool (Bool, not) -import Data.List (concat, union, intersect, tails, tail, head, last, null, zip, sort, length, any, (++), (!!), nub, sortOn, reverse, splitAt, take) +import Data.List (concat, union, intersect, tails, tail, head, last, null, zip, sort, length, any, (++), (!!), nub, sortOn, reverse, splitAt, take, delete) import Data.Map (Map, elems, member, adjust, singleton, empty, (!), keys, restrictKeys, mapWithKey, filterWithKey, mapKeys, intersectionWith, unionWith) import Data.Semigroup (Semigroup) import Data.Set (Set) @@ -55,13 +55,63 @@ import qualified Data.Vector as Vector ------------------------------------------------------------------------ --- | STEP 13 | -- Incrementaly cluster the PhyloGroups n times, link them through the Periods and build level n of the Phylo +-- | STEP 14 | -- Incrementaly cluster the PhyloGroups n times, link them through the Periods and build level n of the Phylo ------------------------------------------------------------------------ --- | STEP 12 | -- Cluster the Fis +-- | STEP 13 | -- Cluster the Fis +------------------------------------------------------------------------ +-- | STEP 12 | -- Find the Branches + +initPhyloBranch :: (Int,Int) -> Text -> [PhyloGroupId] -> PhyloBranch +initPhyloBranch (lvl,idx) lbl l = PhyloBranch (lvl,idx) lbl l + +addPhyloBranch :: (Int,Int) -> Text -> [PhyloGroupId] -> [PhyloBranch] -> [PhyloBranch] +addPhyloBranch (lvl,idx) lbl ids b = b ++ [initPhyloBranch (lvl,idx) lbl ids] + + +-- cur : current PhyloGroup +-- rst : rest of the initial list of PhyloGroups +-- nxt : next PhyloGroups to be added in the current Branch +-- nbr : direct neighbours (Childs & Parents) of cur +-- ids : PhyloGroupIds allready added in the current Branch +-- mem : memory of the allready created Branches + +getGroupPairs :: PhyloGroup -> Phylo -> [PhyloGroup] +getGroupPairs g p = (getGroupChilds g p) ++ (getGroupParents g p) + + +groupsToBranchs :: (Int,Int) -> PhyloGroup -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroupId] -> [PhyloBranch] -> Phylo -> [PhyloBranch] +groupsToBranchs (lvl,idx) curr rest next memId mem p + | null rest && null next = addPhyloBranch (lvl,idx) "" (memId ++ [getGroupId curr]) mem + | (not . null) next = groupsToBranchs (lvl,idx) (head next') rest' (tail next') (memId ++ [getGroupId curr]) mem p + | otherwise = groupsToBranchs (lvl,idx + 1) (head rest') (tail rest') [] [] (addPhyloBranch (lvl,idx) "" (memId ++ [getGroupId curr]) mem) p + where + next' = nub $ next ++ (getGroupPairs curr p) + rest' = filter (\x -> not $ elem x next') rest + + + + + +setPhyloBranches :: Level -> Phylo -> Phylo +setPhyloBranches lvl p = alterPhyloBranches (\branches + -> branches ++ (groupsToBranchs + (getLevelValue lvl, 0) + (head groups) + (tail groups) + [] [] [] p)) p + where + -------------------------------------- + groups :: [PhyloGroup] + groups = getGroupsWithLevel (getLevelValue lvl) p + -------------------------------------- + + +phyloWithBranches_1 = setPhyloBranches (initLevel 1 Level_1) phyloWithPair_1_Childs + ------------------------------------------------------------------------ -- | STEP 11 | -- Link the PhyloGroups of level 1 through the Periods @@ -130,7 +180,7 @@ getNextPeriods to id l = case to of -- | To find the best set (max = 2) of Childs/Parents candidates based on a given Proximity mesure until a maximum depth (max = Period + 5 units ) findBestCandidates :: PairTo -> Int -> Int -> Double -> Double -> PhyloGroup -> Phylo -> [(PhyloGroupId, Double)] findBestCandidates to depth max thr s group p - | depth > max || (null . head) next = [] + | depth > max || null next = [] | (not . null) best = take 2 best | otherwise = findBestCandidates to (depth + 1) max thr s group p where @@ -166,25 +216,28 @@ makePair to group ids = case to of -- | To pair all the Phylogroups of given PhyloLevel to their best Parents or Childs pairGroupsToGroups :: PairTo -> Level -> Double -> Double -> Phylo -> Phylo -pairGroupsToGroups to lvl thr s p = alterPhyloGroupsWith +pairGroupsToGroups to lvl thr s p = alterPhyloGroups (\groups -> - map (\group -> - let - -------------------------------------- - candidates :: [(PhyloGroupId, Double)] - candidates = findBestCandidates to 1 5 thr s group p - -------------------------------------- - in - makePair to group candidates ) groups) - getGroupLevel (getLevelValue lvl) p + map (\group -> + if (getGroupLevel group) == (getLevelValue lvl) + then + let + -------------------------------------- + candidates :: [(PhyloGroupId, Double)] + candidates = findBestCandidates to 1 5 thr s group p + -------------------------------------- + in + makePair to group candidates + else + group ) groups) p phyloWithPair_1_Childs :: Phylo -phyloWithPair_1_Childs = pairGroupsToGroups Childs (initLevel 1 Level_1) 0.1 0.5 phyloWithPair_1_Parents +phyloWithPair_1_Childs = pairGroupsToGroups Childs (initLevel 1 Level_1) 0.01 0 phyloWithPair_1_Parents phyloWithPair_1_Parents :: Phylo -phyloWithPair_1_Parents = pairGroupsToGroups Parents (initLevel 1 Level_1) 0.1 0.5 phyloLinked_0_1 +phyloWithPair_1_Parents = pairGroupsToGroups Parents (initLevel 1 Level_1) 0.01 0 phyloLinked_0_1 ------------------------------------------------------------------------ @@ -526,7 +579,7 @@ phyloPeriods = groupDocsByPeriod 5 3 phyloDocs phylo -- | To init a Phylomemy initPhylo :: [Document] -> PhyloNgrams -> Phylo -initPhylo docs ngrams = Phylo (both date $ (last &&& head) docs) ngrams [] +initPhylo docs ngrams = Phylo (both date $ (last &&& head) docs) ngrams [] [] -- | To init a PhyloNgrams as a Vector of Ngrams diff --git a/src/Gargantext/Viz/Phylo/Tools.hs b/src/Gargantext/Viz/Phylo/Tools.hs index 4363d668..83ae28bd 100644 --- a/src/Gargantext/Viz/Phylo/Tools.hs +++ b/src/Gargantext/Viz/Phylo/Tools.hs @@ -19,7 +19,7 @@ module Gargantext.Viz.Phylo.Tools import Control.Lens hiding (both, Level) import Data.List (filter, intersect, (++), sort, null, head, tail, last) -import Data.Map (Map) +import Data.Map (Map, mapKeys, member) import Data.Set (Set) import Data.Text (Text) import Data.Tuple.Extra @@ -50,19 +50,6 @@ alterPhyloGroups f p = over ( phylo_periods . phylo_levelGroups ) f p --- | To alter a sub list of PhyloGroups (filtered) following a given function -alterPhyloGroupsWith :: Eq a => ([PhyloGroup] -> [PhyloGroup]) -> (PhyloGroup -> a) -> a -> Phylo -> Phylo -alterPhyloGroupsWith f f' x p = over ( phylo_periods - . traverse - . phylo_periodLevels - . traverse - . phylo_levelGroups - ) (f . subGroups) p - where - -------------------------------------- - subGroups :: [PhyloGroup] -> [PhyloGroup] - subGroups l = filterGroups f' x l - -------------------------------------- -- | To alter each PhyloPeriod of a Phylo following a given function alterPhyloPeriods :: (PhyloPeriod -> PhyloPeriod) -> Phylo -> Phylo @@ -70,6 +57,11 @@ alterPhyloPeriods f p = over ( phylo_periods . traverse) f p +-- | To alter the list of PhyloBranches of a Phylo +alterPhyloBranches :: ([PhyloBranch] -> [PhyloBranch]) -> Phylo -> Phylo +alterPhyloBranches f p = over ( phylo_branches ) f p + + -- | To alter a list of PhyloLevels following a given function alterPhyloLevels :: ([PhyloLevel] -> [PhyloLevel]) -> Phylo -> Phylo alterPhyloLevels f p = over ( phylo_periods @@ -120,6 +112,11 @@ filterNestedSets h l l' | otherwise = filterNestedSets (head l) (tail l) (h : l') +-- | To get the PhyloGroups Childs of a PhyloGroup +getGroupChilds :: PhyloGroup -> Phylo -> [PhyloGroup] +getGroupChilds g p = getGroupsFromIds (map fst $ _phylo_groupPeriodChilds g) p + + -- | To get the id of a PhyloGroup getGroupId :: PhyloGroup -> PhyloGroupId getGroupId = _phylo_groupId @@ -140,6 +137,11 @@ getGroupNgrams :: PhyloGroup -> [Int] getGroupNgrams = _phylo_groupNgrams +-- | To get the PhyloGroups Parents of a PhyloGroup +getGroupParents :: PhyloGroup -> Phylo -> [PhyloGroup] +getGroupParents g p = getGroupsFromIds (map fst $ _phylo_groupPeriodParents g) p + + -- | To get the period out of the id of a PhyloGroup getGroupPeriod :: PhyloGroup -> (Date,Date) getGroupPeriod = fst . fst . getGroupId @@ -155,11 +157,26 @@ getGroups = view ( phylo_periods ) +-- | To all PhyloGroups matching a list of PhyloGroupIds in a Phylo +getGroupsFromIds :: [PhyloGroupId] -> Phylo -> [PhyloGroup] +getGroupsFromIds ids p = filter (\g -> elem (getGroupId g) ids) $ getGroups p + + -- | To get all the PhyloGroup of a Phylo with a given level and period getGroupsWithFilters :: Int -> (Date,Date) -> Phylo -> [PhyloGroup] -getGroupsWithFilters lvl prd p = (filterGroups getGroupLevel lvl (getGroups p)) +getGroupsWithFilters lvl prd p = (getGroupsWithLevel lvl p) `intersect` - (filterGroups getGroupPeriod prd (getGroups p)) + (getGroupsWithPeriod prd p) + + +-- | To get all the PhyloGroup of a Phylo with a given Level +getGroupsWithLevel :: Int -> Phylo -> [PhyloGroup] +getGroupsWithLevel lvl p = filterGroups getGroupLevel lvl (getGroups p) + + +-- | To get all the PhyloGroup of a Phylo with a given Period +getGroupsWithPeriod :: (Date,Date) -> Phylo -> [PhyloGroup] +getGroupsWithPeriod prd p = filterGroups getGroupPeriod prd (getGroups p) -- | To get the index of an element of a Vector @@ -195,6 +212,11 @@ getLevelLinkValue dir link = case dir of _ -> panic "[ERR][Viz.Phylo.Tools.getLevelLinkValue] Wrong direction" +-- | To get the Branches of a Phylo +getPhyloBranches :: Phylo -> [PhyloBranch] +getPhyloBranches = _phylo_branches + + -- | To get all the Phylolevels of a given PhyloPeriod getPhyloLevels :: PhyloPeriod -> [PhyloLevel] getPhyloLevels = view (phylo_periodLevels) -- 2.21.0