diff --git a/src/Gargantext/Viz/AdaptativePhylo.hs b/src/Gargantext/Viz/AdaptativePhylo.hs index c28d313efd2540454548f5bc79229cede425207c..9194af27bf9f4fba3e53dc789d9a03a3d7733a1f 100644 --- a/src/Gargantext/Viz/AdaptativePhylo.hs +++ b/src/Gargantext/Viz/AdaptativePhylo.hs @@ -87,7 +87,6 @@ data Config = , corpusParser :: CorpusParser , phyloName :: Text , phyloLevel :: Int - , phyloQuality :: Double , phyloProximity :: Proximity , timeUnit :: TimeUnit , contextualUnit :: ContextualUnit @@ -103,7 +102,6 @@ defaultConfig = , corpusParser = Csv 1000 , phyloName = pack "Default Phylo" , phyloLevel = 2 - , phyloQuality = 0.5 , phyloProximity = WeightedLogJaccard 10 0 0.05 , timeUnit = Year 3 1 5 , contextualUnit = Fis 2 4 diff --git a/src/Gargantext/Viz/Phylo/PhyloExample.hs b/src/Gargantext/Viz/Phylo/PhyloExample.hs index cb159dd1921485309ddf968efdbdabb75f490c22..78197be1131a42cf8484a47d422dba71cc1cf5fe 100644 --- a/src/Gargantext/Viz/Phylo/PhyloExample.hs +++ b/src/Gargantext/Viz/Phylo/PhyloExample.hs @@ -66,7 +66,7 @@ phyloBase = toPhyloBase docs mapList config phyloCooc :: Map Date Cooc -phyloCooc = docsToTimeScaleCooc docs (foundations ^. foundations_roots) config +phyloCooc = docsToTimeScaleCooc docs (foundations ^. foundations_roots) periods :: [(Date,Date)] diff --git a/src/Gargantext/Viz/Phylo/PhyloMaker.hs b/src/Gargantext/Viz/Phylo/PhyloMaker.hs index c08d2a2502e62aeaa7749404d53cf73d5c90d743..e7c53015d7444b11cc7acb5c9c36e4e196707cfe 100644 --- a/src/Gargantext/Viz/Phylo/PhyloMaker.hs +++ b/src/Gargantext/Viz/Phylo/PhyloMaker.hs @@ -173,8 +173,8 @@ ngramsToCooc ngrams coocs = -- | To transform the docs into a time map of coocurency matrix -docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Config -> Map Date Cooc -docsToTimeScaleCooc docs fdt conf = +docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc +docsToTimeScaleCooc docs fdt = let mCooc = fromListWith sumCooc $ map (\(_d,l) -> (_d, listToMatrix l)) $ map (\doc -> (date doc, sort $ ngramsToIdx (text doc) fdt)) docs @@ -229,7 +229,7 @@ toPhyloBase docs lst conf = periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf) in trace ("\n" <> "-- | Create PhyloBase out of " <> show(length docs) <> " docs \n") $ Phylo foundations - (docsToTimeScaleCooc docs (foundations ^. foundations_roots) conf) + (docsToTimeScaleCooc docs (foundations ^. foundations_roots)) (docsToTimeScaleNb docs) params (fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels (phyloLevel conf) prd))) periods) diff --git a/src/Gargantext/Viz/Phylo/PhyloTools.hs b/src/Gargantext/Viz/Phylo/PhyloTools.hs index b64c367bbdaece226e8167b0a97a4e860a505021..47d3baa8665fbf30007bdcbc0ad47d6866b54cb0 100644 --- a/src/Gargantext/Viz/Phylo/PhyloTools.hs +++ b/src/Gargantext/Viz/Phylo/PhyloTools.hs @@ -19,7 +19,7 @@ module Gargantext.Viz.Phylo.PhyloTools where import Data.Vector (Vector, elemIndex) import Data.List (sort, concat, null, union, (++), tails, sortOn) import Data.Set (Set, size) -import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), toList) +import Data.Map (Map, elems, fromList, unionWith, keys, member, (!)) import Data.String (String) import Gargantext.Prelude @@ -156,12 +156,12 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l getFisSupport :: ContextualUnit -> Int getFisSupport unit = case unit of Fis s _ -> s - _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support") + -- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support") getFisSize :: ContextualUnit -> Int getFisSize unit = case unit of Fis _ s -> s - _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size") + -- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size") -------------- @@ -255,10 +255,10 @@ updatePhyloGroups lvl m phylo = pointersToLinks :: PhyloGroupId -> [Pointer] -> [Link] pointersToLinks id pointers = map (\p -> ((id,fst p),snd p)) pointers -mergeLinks :: [Link] -> [Link] -> [Link] -mergeLinks toChilds toParents = - let toChilds' = fromList $ map (\((from,to),w) -> ((to,from),w)) toChilds - in toList $ unionWith max (fromList toParents) toChilds' +-- mergeLinks :: [Link] -> [Link] -> [Link] +-- mergeLinks toChilds toParents = +-- let toChilds' = fromList $ map (\((from,to),w) -> ((to,from),w)) toChilds +-- in toList $ unionWith max (fromList toParents) toChilds' ------------------- diff --git a/src/Gargantext/Viz/Phylo/SynchronicClustering.hs b/src/Gargantext/Viz/Phylo/SynchronicClustering.hs index 8f62b36fc04d9ef521561ce62b27459b72d14b95..bb5c85015007d06930b805e3af549345e0e37e4d 100644 --- a/src/Gargantext/Viz/Phylo/SynchronicClustering.hs +++ b/src/Gargantext/Viz/Phylo/SynchronicClustering.hs @@ -16,8 +16,8 @@ Portability : POSIX module Gargantext.Viz.Phylo.SynchronicClustering where import Gargantext.Prelude -import Gargantext.Viz.AdaptativePhylo -import Gargantext.Viz.Phylo.PhyloTools +-- import Gargantext.Viz.AdaptativePhylo +-- import Gargantext.Viz.Phylo.PhyloTools import Data.List (foldl', (++), null, intersect, (\\), union, nub, concat) diff --git a/src/Gargantext/Viz/Phylo/TemporalMatching.hs b/src/Gargantext/Viz/Phylo/TemporalMatching.hs index c187263853581bbec9c5a230f118df0125794bd3..b641d913bfb88aeaeba323ac0a8f47c4a4907ad0 100644 --- a/src/Gargantext/Viz/Phylo/TemporalMatching.hs +++ b/src/Gargantext/Viz/Phylo/TemporalMatching.hs @@ -15,8 +15,8 @@ Portability : POSIX module Gargantext.Viz.Phylo.TemporalMatching where -import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, find, groupBy, scanl, any, nub, union) -import Data.Map (Map, fromList, toList, fromListWith, filterWithKey, elems, restrictKeys, unionWith, intersectionWith, member, (!)) +import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, find, groupBy, scanl, nub, union) +import Data.Map (Map, fromList, fromListWith, filterWithKey, elems, restrictKeys, unionWith, intersectionWith) import Gargantext.Prelude import Gargantext.Viz.AdaptativePhylo @@ -27,7 +27,6 @@ import Control.Lens hiding (Level) import qualified Data.Set as Set - ------------------- -- | Proximity | -- ------------------- @@ -79,6 +78,13 @@ pickProximity proximity docs cooc cooc' ngrams ngrams' = case proximity of Hamming -> undefined +filterProximity :: Proximity -> Double -> Double -> Bool +filterProximity proximity thr local = + case proximity of + WeightedLogJaccard _ _ _ -> local >= thr + Hamming -> undefined + + -- | To process the proximity between a current group and a pair of targets group toProximity :: Map Date Double -> Proximity -> PhyloGroup -> PhyloGroup -> PhyloGroup -> Double toProximity docs proximity group target target' = @@ -98,8 +104,8 @@ toProximity docs proximity group target target' = -- | Find pairs of valuable candidates to be matched -makePairs :: [PhyloGroup] -> [PhyloPeriodId] -> Map Date Double -> PhyloGroup -> [(PhyloGroup,PhyloGroup)] -makePairs candidates periods docs group = case null periods of +makePairs :: [PhyloGroup] -> [PhyloPeriodId] -> [(PhyloGroup,PhyloGroup)] +makePairs candidates periods = case null periods of True -> [] -- | at least on of the pair candidates should be from the last added period False -> filter (\(cdt,cdt') -> (inLastPeriod cdt periods) @@ -110,8 +116,8 @@ makePairs candidates periods docs group = case null periods of inLastPeriod g prds = (g ^. phylo_groupPeriod) == (last' "makePairs" prds) -phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Proximity -> Map Date Double -> PhyloGroup -> PhyloGroup -phyloGroupMatching candidates fil proxi docs group = case pointers of +phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Proximity -> Map Date Double -> Double-> PhyloGroup -> PhyloGroup +phyloGroupMatching candidates fil proxi docs thr group = case pointers of Nothing -> addPointers group fil TemporalPointer [] Just pts -> addPointers group fil TemporalPointer $ head' "phyloGroupMatching" @@ -125,8 +131,9 @@ phyloGroupMatching candidates fil proxi docs group = case pointers of -- | for each time frame, process the proximity on relevant pairs of targeted groups $ scanl (\acc groups -> let periods = nub $ map (\g' -> g' ^. phylo_groupPeriod) $ concat groups - pairs = makePairs (concat groups) periods docs group - in acc ++ ( concat + pairs = makePairs (concat groups) periods + in acc ++ ( filter (\(_,proximity) -> filterProximity proxi thr proximity) + $ concat $ map (\(c,c') -> -- | process the proximity between the current group and a pair of candidates let proximity = toProximity (filterDocs docs periods) proxi group c c' @@ -141,51 +148,16 @@ phyloGroupMatching candidates fil proxi docs group = case pointers of filterDocs d pds = restrictKeys d $ periodsToYears pds - ------------------- --- | Pointers | -- ------------------- - - --- ghostHunter :: [[PhyloGroup]] -> [[PhyloGroup]] --- ghostHunter branches = --- map (\branch -> --- -- | il manque une référence au group source de chaque pointer --- let pointers = elems $ fromList --- $ map (\pt -> (groupIds ! (fst pt),pt)) --- $ filter (\pt -> member (fst pt) groupIds) $ concat $ map (\g -> g ^. phylo_groupGhostPointers) branch - --- in undefined --- ) branches --- where --- groupIds :: Map PhyloGroupId Int --- groupIds = fromList $ map (\g -> (getGroupId g, last' "ghostHunter" $ snd $ g ^. phylo_groupBranchId)) $ concat branches --- -------------------------------------- --- selectBest :: [Pointers] -> [Pointers] --- se - - - -filterPointers :: Double -> [PhyloGroup] -> [PhyloGroup] -filterPointers thr groups = - map (\group -> - let ghosts = filter (\(_,w) -> w < thr) $ group ^. phylo_groupPeriodParents - in group & phylo_groupPeriodParents %~ (filter (\(_,w) -> w >= thr)) - & phylo_groupPeriodChilds %~ (filter (\(_,w) -> w >= thr)) - & phylo_groupGhostPointers %~ (++ ghosts) - ) groups - - ----------------------------- --- | Adaptative Matching | -- +-- | Matching Processing | -- ----------------------------- getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId] -getNextPeriods fil max pId pIds = +getNextPeriods fil max' pId pIds = case fil of - ToChilds -> take max $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds - ToParents -> take max $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds + ToChilds -> take max' $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds + ToParents -> take max' $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds getCandidates :: Filiation -> PhyloGroup -> [PhyloPeriodId] -> [PhyloGroup] -> [[PhyloGroup]] @@ -202,12 +174,25 @@ getCandidates fil g pIds targets = $ map (\g' -> (g' ^. phylo_groupPeriod,[g'])) targets -shouldBreak :: Double -> [(Double,[PhyloGroup])] -> Bool -shouldBreak thr branches = any (\(quality,_) -> quality < thr) branches +processMatching :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> [PhyloGroup] -> [PhyloGroup] +processMatching max' periods proximity thr docs groups = + map (\group -> + let childs = getCandidates ToChilds group + (getNextPeriods ToChilds max' (group ^. phylo_groupPeriod) periods) groups + parents = getCandidates ToParents group + (getNextPeriods ToParents max' (group ^. phylo_groupPeriod) periods) groups + in phyloGroupMatching parents ToParents proximity docs thr + $ phyloGroupMatching childs ToChilds proximity docs thr group + ) groups + + +----------------------------- +-- | Adaptative Matching | -- +----------------------------- -toBranchQuality :: [[PhyloGroup]] -> [(Double,[PhyloGroup])] -toBranchQuality branches = undefined +toPhyloQuality :: [[PhyloGroup]] -> Double +toPhyloQuality _ = undefined groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]] @@ -225,64 +210,42 @@ groupsToBranches groups = ) graph - --- findGhostLinks :: [Link] -> [[Link]] -> Map PhyloGroupId - -adaptativeMatching :: Proximity -> Double -> Double -> [PhyloGroup] -> [PhyloGroup] -adaptativeMatching proximity thr thrQua groups = - -- | check if we should break some of the new branches or not - case shouldBreak thrQua branches' of - True -> concat $ map (\(s,b) -> - if s >= thrQua - -- | we keep the branch as it is - then b - -- | we break the branch using an increased temporal matching threshold - else let nextGroups = undefined - in adaptativeMatching proximity (thr + (getThresholdStep proximity)) thrQua nextGroups - ) branches' - -- | the quality of all the new branches is sufficient - False -> concat branches +recursiveMatching :: Proximity -> Double -> Int -> [PhyloPeriodId] -> Map Date Double -> Double -> [PhyloGroup] -> [PhyloGroup] +recursiveMatching proximity thr max' periods docs quality groups = + case quality < quality' of + -- | success : we localy improve the quality of the branch, let's go deeper + True -> concat + $ map (\branch -> + recursiveMatching proximity (thr + (getThresholdStep proximity)) max' periods docs quality' branch + ) branches + -- | failure : last step was the local maximum, let's validate it + False -> groups where - -- | 3) process a quality score for each new branch - branches' :: [(Double,[PhyloGroup])] - branches' = toBranchQuality branches + -- | 3) process a quality score on the local set of branches + quality' :: Double + quality' = toPhyloQuality branches -- | 2) group the new groups into branches branches :: [[PhyloGroup]] branches = groupsToBranches $ fromList $ map (\group -> (getGroupId group, group)) groups' - -- | 1) filter the pointers of each groups regarding the current state of the quality threshold + -- | 1) process a temporal matching for each group groups' :: [PhyloGroup] - groups' = filterPointers thr groups + groups' = processMatching max' periods proximity thr docs groups temporalMatching :: Phylo -> Phylo temporalMatching phylo = updatePhyloGroups 1 branches phylo where - -- | 4) find the ghost links and postprocess the branches - branches' :: Map PhyloGroupId PhyloGroup - branches' = undefined - -- | 3) run the adaptative matching to find the best repartition among branches + -- | 2) run the recursive matching to find the best repartition among branches branches :: Map PhyloGroupId PhyloGroup branches = fromList $ map (\g -> (getGroupId g, g)) - $ adaptativeMatching proximity (getThresholdInit proximity) (phyloQuality $ getConfig phylo) groups' - -- | 2) for each group process an initial temporal Matching + $ recursiveMatching (phyloProximity $ getConfig phylo) + (getThresholdInit $ phyloProximity $ getConfig phylo) + (getTimeFrame $ timeUnit $ getConfig phylo) + (getPeriodIds phylo) + (phylo ^. phylo_timeDocs) (toPhyloQuality [groups']) groups' + -- | 1) for each group process an initial temporal Matching groups' :: [PhyloGroup] - groups' = - let maxTime = getTimeFrame $ timeUnit $ getConfig phylo - periods = getPeriodIds phylo - docs = phylo ^. phylo_timeDocs - -------------------------------------- - in map (\group -> - let childs = getCandidates ToChilds group - (getNextPeriods ToChilds maxTime (group ^. phylo_groupPeriod) periods) groups - parents = getCandidates ToParents group - (getNextPeriods ToParents maxTime (group ^. phylo_groupPeriod) periods) groups - in phyloGroupMatching parents ToParents proximity docs - $ phyloGroupMatching childs ToChilds proximity docs group - ) groups - -- | 1) start with all the groups from a given level - groups :: [PhyloGroup] - groups = getGroupsFromLevel 1 phylo - -------------------------------------- - proximity :: Proximity - proximity = phyloProximity $ getConfig phylo \ No newline at end of file + groups' = processMatching (getTimeFrame $ timeUnit $ getConfig phylo) (getPeriodIds phylo) + (phyloProximity $ getConfig phylo) (getThresholdInit $ phyloProximity $ getConfig phylo) + (phylo ^. phylo_timeDocs) (getGroupsFromLevel 1 phylo)