Commit 7550f605 authored by qlobbe's avatar qlobbe

add rebranching to link distante branches

parent e9fa60c6
...@@ -22,7 +22,7 @@ Phylo binaries ...@@ -22,7 +22,7 @@ Phylo binaries
module Main where module Main where
-- import System.Directory (doesFileExist) import System.Directory (doesFileExist)
import Data.Aeson import Data.Aeson
import Data.Text (Text, unwords) import Data.Text (Text, unwords)
...@@ -45,12 +45,9 @@ import Gargantext.Viz.Phylo.LevelMaker ...@@ -45,12 +45,9 @@ import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Viz.Phylo.View.Export import Gargantext.Viz.Phylo.View.Export
import Gargantext.Viz.Phylo.View.ViewMaker import Gargantext.Viz.Phylo.View.ViewMaker
import Gargantext.Database.Types.Node import Gargantext.Database.Types.Node
import Data.Maybe import Data.Maybe
import qualified Data.Map as DM import qualified Data.Map as DM
import qualified Data.Vector as DV import qualified Data.Vector as DV
import qualified Data.List as DL import qualified Data.List as DL
...@@ -80,6 +77,7 @@ data Conf = ...@@ -80,6 +77,7 @@ data Conf =
, limit :: Limit , limit :: Limit
, timeGrain :: Int , timeGrain :: Int
, timeStep :: Int , timeStep :: Int
, timeFrame :: Int
, timeTh :: Double , timeTh :: Double
, timeSens :: Double , timeSens :: Double
, clusterTh :: Double , clusterTh :: Double
...@@ -158,21 +156,24 @@ parse format limit path l = do ...@@ -158,21 +156,24 @@ parse format limit path l = do
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus
-- -- | To parse an existing Fis file -- | To parse an existing Fis file
-- parseFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> IO [PhyloFis] parseFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> IO [PhyloFis]
-- parseFis path name grain step support clique = do parseFis path name grain step support clique = do
-- let fisPath = path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json" fisExists <- doesFileExist (path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json")
-- fisExists <- doesFileExist (path) if fisExists
-- if fisExists then do
-- then do fisJson <- (eitherDecode <$> getJson (path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json")) :: IO (P.Either P.String [PhyloFis])
-- fis <- L.readFile fisPath case fisJson of
-- pure $ decoder (eitherDecode fis :: P.Either [Char] [PhyloFis]) P.Left err -> do
-- else pure [] putStrLn err
pure []
-- writeFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> DM.Map (Date,Date) [PhyloFis] -> IO () P.Right fis -> pure fis
-- writeFis path name grain step support clique fis = do else pure []
-- let fisPath = path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json"
-- P.writeFile fisPath $ show (encode (DL.concat $ DM.elems fis)) writeFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> DM.Map (Date,Date) [PhyloFis] -> IO ()
writeFis path name grain step support clique fis = do
let fisPath = path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json"
L.writeFile fisPath $ encode (DL.concat $ DM.elems fis)
-------------- --------------
-- | Main | -- -- | Main | --
...@@ -194,23 +195,27 @@ main = do ...@@ -194,23 +195,27 @@ main = do
corpus <- parse (corpusType conf) (limit conf) (corpusPath conf) termList corpus <- parse (corpusType conf) (limit conf) (corpusPath conf) termList
-- fis <- parseFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf) putStrLn $ ("\n" <> show (length corpus) <> " parsed docs")
-- let mFis = DM.fromListWith (++) $ DL.sortOn (fst . fst) $ map (\f -> (getFisPeriod f,[f])) fis
let roots = DL.nub $ DL.concat $ map text corpus let roots = DL.nub $ DL.concat $ map text corpus
putStrLn $ ("\n" <> show (length corpus) <> " parsed docs") putStrLn $ ("\n" <> show (length roots) <> " parsed foundation roots")
fis <- parseFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf)
putStrLn $ ("\n" <> show (length fis) <> " parsed fis")
let mFis = DM.fromListWith (++) $ DL.sortOn (fst . fst) $ map (\f -> (getFisPeriod f,[f])) fis
let query = PhyloQueryBuild (phyloName conf) "" (timeGrain conf) (timeStep conf) let query = PhyloQueryBuild (phyloName conf) "" (timeGrain conf) (timeStep conf)
(Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (phyloLevel conf) (Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (timeFrame conf) (phyloLevel conf)
(RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf)) (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf))
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [SizeBranch $ SBParams (minSizeBranch conf)] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [SizeBranch $ SBParams (minSizeBranch conf)] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
let phylo = toPhylo query corpus roots termList DM.empty let phylo = toPhylo query corpus roots termList mFis
-- writeFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf) (getPhyloFis phylo) writeFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf) (getPhyloFis phylo)
let view = toPhyloView queryView phylo let view = toPhyloView queryView phylo
......
...@@ -347,6 +347,7 @@ data PhyloQueryBuild = PhyloQueryBuild ...@@ -347,6 +347,7 @@ data PhyloQueryBuild = PhyloQueryBuild
-- Inter-temporal matching method of the Phylo -- Inter-temporal matching method of the Phylo
, _q_interTemporalMatching :: Proximity , _q_interTemporalMatching :: Proximity
, _q_interTemporalMatchingFrame :: Int
-- Last level of reconstruction -- Last level of reconstruction
, _q_nthLevel :: Level , _q_nthLevel :: Level
......
...@@ -80,8 +80,7 @@ phyloToClusters lvl clus p = Map.fromList ...@@ -80,8 +80,7 @@ phyloToClusters lvl clus p = Map.fromList
-------------------------------------- --------------------------------------
graphs :: [([GroupNode],[GroupEdge])] graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox) graphs = traceGraph lvl (getThreshold prox)
$ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) $ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
$ trace (show(map (\prd -> (prd,length $ getGroupsWithFilters lvl prd p)) periods)) periods
-------------------------------------- --------------------------------------
prox :: Proximity prox :: Proximity
prox = getProximity clus prox = getProximity clus
......
...@@ -28,6 +28,8 @@ import qualified Data.List as List ...@@ -28,6 +28,8 @@ import qualified Data.List as List
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
import Debug.Trace (trace)
-- | To init a list of Periods framed by a starting Date and an ending Date -- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)] initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
...@@ -38,7 +40,7 @@ initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last' "Doc" l)) ...@@ -38,7 +40,7 @@ initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last' "Doc" l))
-- | To group a list of Documents by fixed periods -- | To group a list of Documents by fixed periods
groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods" groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es = Map.fromList $ zip pds $ map (inPeriode f es) pds groupDocsByPeriod f pds es = trace ("----\nGroup docs by periods\n") $ Map.fromList $ zip pds $ map (inPeriode f es) pds
where where
-------------------------------------- --------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t] inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
......
...@@ -18,8 +18,8 @@ module Gargantext.Viz.Phylo.Aggregates.Fis ...@@ -18,8 +18,8 @@ module Gargantext.Viz.Phylo.Aggregates.Fis
where where
import Control.Lens hiding (makeLenses, both, Level) import Control.Lens hiding (makeLenses, both, Level)
import Data.List (null,concat,sort) import Data.List (null,concat,sort,(++))
import Data.Map (Map,elems,mapWithKey) import Data.Map (Map,elems,mapWithKey,unionWith,fromList,keys)
import Data.Tuple (fst, snd) import Data.Tuple (fst, snd)
import Data.Set (size) import Data.Set (size)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -59,58 +59,23 @@ filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head' "Fis" $ m ...@@ -59,58 +59,23 @@ filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head' "Fis" $ m
in filter (\fis -> elem (getClique fis) cliqueMax) l) in filter (\fis -> elem (getClique fis) cliqueMax) l)
-- | To transform a list of Documents into a Frequent Items Set
docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) [PhyloFis]
docsToFis m = mapWithKey (\k docs -> let fs = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in map (\f -> PhyloFis (fst f) (snd f) k) fs) m
docsToFis' :: Map (Date,Date) [Document] -> Phylo -> Phylo docsToFis' :: Map (Date,Date) [Document] -> Phylo -> Phylo
docsToFis' m p = if (null $ getPhyloFis p) docsToFis' m p = if (null $ getPhyloFis p)
then p & phylo_fis .~ mapWithKey (\k docs -> let fis = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs) then trace("----\nRebuild the Fis from scratch\n")
$ p & phylo_fis .~ mapWithKey (\k docs -> let fis = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in map (\f -> PhyloFis (fst f) (snd f) k) fis) m in map (\f -> PhyloFis (fst f) (snd f) k) fis) m
else p else trace("----\nUse Fis from an existing file\n")
$ p & phylo_fis %~ (unionWith (++) (fromList $ map (\k -> (k,[])) $ keys m))
-- | To process a list of Filters on top of the PhyloFis
processFilters :: [Filter] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis] toPhyloFis' :: Map (Date, Date) [PhyloFis] -> Bool -> Support -> Int -> Map (Date, Date) [PhyloFis]
processFilters filters phyloFis toPhyloFis' fis k s t = traceFis "----\nFiltered Fis by clique size :\n"
| null filters = phyloFis $ filterFis k t (filterFisByClique)
| otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processFilters] please add some filters for the Fis" $ traceFis "----\nFiltered Fis by nested :\n"
$ filterFisByNested
$ traceFis "----\nFiltered Fis by support :\n"
-- | To process a list of Metrics on top of the PhyloFis $ filterFis k s (filterFisBySupport)
processMetrics :: [Metric] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis] $ traceFis "----\nUnfiltered Fis :\n" fis
processMetrics metrics phyloFis
| null metrics = phyloFis
| otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processMetrics] please add some metrics for the Fis"
-- | To transform some Documents into PhyloFis and apply a List of Metrics and Filters
toPhyloFis :: Map (Date, Date) [Document] -> Bool -> Support -> Int -> [Metric] -> [Filter] -> Map (Date, Date) [PhyloFis]
toPhyloFis ds k s t ms fs = processFilters fs
$ processMetrics ms
$ traceFis "----\nFiltered Fis by clique size :\n"
$ filterFis k t (filterFisByClique)
$ traceFis "----\nFiltered Fis by nested :\n"
$ filterFisByNested
$ traceFis "----\nFiltered Fis by support :\n"
$ filterFis k s (filterFisBySupport)
$ traceFis "----\nUnfiltered Fis :\n"
$ docsToFis ds
toPhyloFis' :: Map (Date, Date) [PhyloFis] -> Bool -> Support -> Int -> [Metric] -> [Filter] -> Map (Date, Date) [PhyloFis]
toPhyloFis' fis k s t ms fs = processFilters fs
$ processMetrics ms
$ traceFis "----\nFiltered Fis by clique size :\n"
$ filterFis k t (filterFisByClique)
$ traceFis "----\nFiltered Fis by nested :\n"
$ filterFisByNested
$ traceFis "----\nFiltered Fis by support :\n"
$ filterFis k s (filterFisBySupport)
$ traceFis "----\nUnfiltered Fis :\n" fis
----------------- -----------------
......
...@@ -18,14 +18,119 @@ module Gargantext.Viz.Phylo.BranchMaker ...@@ -18,14 +18,119 @@ module Gargantext.Viz.Phylo.BranchMaker
where where
import Control.Lens hiding (both, Level) import Control.Lens hiding (both, Level)
import Data.List (concat,nub,(++),tail) import Data.List (concat,nub,(++),tail,sortOn,take,reverse,sort,null,intersect,union)
import Data.Map (Map)
import Data.Tuple (fst, snd) import Data.Tuple (fst, snd)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Metrics.Clustering import Gargantext.Viz.Phylo.Metrics.Clustering
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.LinkMaker
import qualified Data.Map as Map
-- import Debug.Trace (trace) -- import Debug.Trace (trace)
---------------------------
-- | Readability links | --
---------------------------
getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
getFramedPeriod :: [PhyloGroup] -> (Date,Date)
getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
getGroupsNgrams :: [PhyloGroup] -> [Int]
getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
getNthMostOcc :: Int -> Map (Int,Int) Double -> [Int]
getNthMostOcc nth cooc = (nub . concat)
$ map (\((idx,idx'),_) -> [idx,idx'])
$ take (nth `div` 2)
$ reverse
$ sortOn snd $ Map.toList cooc
-- | Get the Nth most coocurent Ngrams in a list of Groups
getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
getGroupsPeaks gs nth p = getNthMostOcc nth
$ getSubCooc (getGroupsNgrams gs)
$ getCooc (getGroupsPeriods gs) p
areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
-- | Process a Jaccard on top of two set of Branch Peaks
areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
/ ((fromIntegral . length) $ union ns ns')) >= thr
findSimBranches :: Int -> Double -> Int -> Phylo -> (PhyloBranchId,[PhyloGroup]) -> [(PhyloBranchId,[PhyloGroup])] -> [(PhyloBranchId,[PhyloGroup])]
findSimBranches frame thr nth p (id,gs) bs
= filter (\(_ ,gs') -> areTwinPeaks thr pks (getGroupsPeaks gs' nth p))
$ filter (\(_ ,gs') -> (not . null) $ intersect ns (getGroupsNgrams gs'))
$ filter (\(_ ,gs') -> areDistant prd (getFramedPeriod gs') frame)
$ filter (\(id',_ ) -> id /= id') bs
where
--------------------------------------
prd :: (Date,Date)
prd = getFramedPeriod gs
--------------------------------------
ns :: [Int]
ns = getGroupsNgrams gs
--------------------------------------
pks :: [Int]
pks = getGroupsPeaks gs nth p
--------------------------------------
findBestPointer :: Phylo -> Proximity -> [PhyloGroup] -> [PhyloGroup] -> [(PhyloGroupId,Pointer)]
findBestPointer p prox gs gs' = take 1
$ reverse
$ sortOn (snd . snd)
$ concat
$ map (\g -> let pts = findBestCandidates' prox gs' g p
in map (\pt -> (getGroupId g,pt)) pts) gs
makeBranchLinks :: Phylo -> Proximity -> (PhyloBranchId,[PhyloGroup]) -> [(PhyloBranchId,[PhyloGroup])] -> [(PhyloGroupId,Pointer)] -> [(PhyloGroupId,Pointer)]
makeBranchLinks p prox (id,gs) bs pts
| null bs = pts
| otherwise = makeBranchLinks p prox (head' "makeLink" bs) (tail bs) (pts ++ pts')
where
--------------------------------------
pts' :: [(PhyloGroupId,Pointer)]
pts' = concat $ map (\(_id,gs') -> findBestPointer p prox gs gs') candidates
--------------------------------------
candidates :: [(PhyloBranchId,[PhyloGroup])]
candidates = findSimBranches (getPhyloMatchingFrame p) 0.9 4 p (id,gs) bs
linkPhyloBranches :: Level -> Proximity -> Phylo -> Phylo
linkPhyloBranches lvl prox p = setPhyloBranches lvl
$ updateGroups Descendant lvl pointers p
where
--------------------------------------
pointers :: Map PhyloGroupId [Pointer]
pointers = Map.fromList $ map (\(_id,(_id',_w)) -> (_id,[(_id',100)]))
$ makeBranchLinks p prox (head' "makeLink" branches) (tail branches) []
--------------------------------------
branches :: [(PhyloBranchId,[PhyloGroup])]
branches = sortOn (\(_id,gs) -> fst $ getFramedPeriod gs) $ getGroupsByBranches p
--------------------------------------
------------------
-- | Branches | --
------------------
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
graphToBranches :: Level -> GroupGraph -> Phylo -> [(Int,PhyloGroupId)] graphToBranches :: Level -> GroupGraph -> Phylo -> [(Int,PhyloGroupId)]
......
...@@ -105,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre" ...@@ -105,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)" phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.1 20) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.3 0) 5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.1 20) 5 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.3 0)
...@@ -205,9 +205,12 @@ phylo1 = addPhyloLevel (1) phyloFis phylo ...@@ -205,9 +205,12 @@ phylo1 = addPhyloLevel (1) phyloFis phylo
phyloFis :: Map (Date, Date) [PhyloFis] phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = filterFis True 1 (filterFisByClique) phyloFis = filterFis True 1 (filterFisByClique)
$ filterFisByNested $ filterFisByNested
$ filterFis True 1 (filterFisBySupport) (docsToFis phyloDocs) $ filterFis True 1 (filterFisBySupport) (getPhyloFis phylo')
phylo' :: Phylo
phylo' = docsToFis' phyloDocs phylo
---------------------------------------- ----------------------------------------
-- | STEP 2 | -- Init a Phylo of level 0 -- | STEP 2 | -- Init a Phylo of level 0
---------------------------------------- ----------------------------------------
......
...@@ -21,7 +21,7 @@ module Gargantext.Viz.Phylo.LevelMaker ...@@ -21,7 +21,7 @@ module Gargantext.Viz.Phylo.LevelMaker
import Control.Lens hiding (both, Level) import Control.Lens hiding (both, Level)
import Data.List ((++), sort, concat, nub, zip, last) import Data.List ((++), sort, concat, nub, zip, last)
import Data.Map (Map, (!), empty, singleton,mapWithKey) import Data.Map (Map, (!), empty, singleton)
import Data.Text (Text) import Data.Text (Text)
import Data.Tuple.Extra import Data.Tuple.Extra
import Gargantext.Prelude import Gargantext.Prelude
...@@ -100,8 +100,11 @@ clusterToGroup prd lvl idx lbl groups _m p = ...@@ -100,8 +100,11 @@ clusterToGroup prd lvl idx lbl groups _m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams empty PhyloGroup ((prd, lvl), idx) lbl ngrams empty
Nothing Nothing
(getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)) (getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
[] [] [] (map (\g -> (getGroupId g, 1)) groups) [] [] [] childs
where where
--------------------------------------
childs :: [Pointer]
childs = map (\g -> (getGroupId g, 1)) groups
-------------------------------------- --------------------------------------
ngrams :: [Int] ngrams :: [Int]
ngrams = (sort . nub . concat) $ map getGroupNgrams groups ngrams = (sort . nub . concat) $ map getGroupNgrams groups
...@@ -151,11 +154,11 @@ toNthLevel lvlMax prox clus p ...@@ -151,11 +154,11 @@ toNthLevel lvlMax prox clus p
$ setPhyloBranches (lvl + 1) $ setPhyloBranches (lvl + 1)
$ transposePeriodLinks (lvl + 1) $ transposePeriodLinks (lvl + 1)
$ setLevelLinks (lvl, lvl + 1) $ setLevelLinks (lvl, lvl + 1)
$ trace (show (mapWithKey (\k v -> (k,length v)) clusters))
$ addPhyloLevel (lvl + 1) $ addPhyloLevel (lvl + 1)
(clusters) p (clusters) p
where where
-------------------------------------- --------------------------------------
clusters :: Map (Date,Date) [PhyloCluster]
clusters = phyloToClusters lvl clus p clusters = phyloToClusters lvl clus p
-------------------------------------- --------------------------------------
lvl :: Level lvl :: Level
...@@ -164,9 +167,11 @@ toNthLevel lvlMax prox clus p ...@@ -164,9 +167,11 @@ toNthLevel lvlMax prox clus p
-- | To reconstruct the Level 1 of a Phylo based on a Clustering Method -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox metrics filters d p = case clus of toPhylo1 clus prox d p = case clus of
Fis (FisParams k s t) -> traceBranches 1 Fis (FisParams k s t) -> traceReBranches 1
$ linkPhyloBranches 1 prox
$ traceBranches 1
$ setPhyloBranches 1 $ setPhyloBranches 1
$ traceTempoMatching Descendant 1 $ traceTempoMatching Descendant 1
$ interTempoMatching Descendant 1 prox $ interTempoMatching Descendant 1 prox
...@@ -178,7 +183,7 @@ toPhylo1 clus prox metrics filters d p = case clus of ...@@ -178,7 +183,7 @@ toPhylo1 clus prox metrics filters d p = case clus of
where where
-------------------------------------- --------------------------------------
phyloFis :: Map (Date, Date) [PhyloFis] phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = toPhyloFis' (getPhyloFis phylo') k s t metrics filters phyloFis = toPhyloFis' (getPhyloFis phylo') k s t
-------------------------------------- --------------------------------------
phylo' :: Phylo phylo' :: Phylo
phylo' = docsToFis' d p phylo' = docsToFis' d p
...@@ -205,7 +210,7 @@ instance PhyloMaker [(Date, Text)] ...@@ -205,7 +210,7 @@ instance PhyloMaker [(Date, Text)]
where where
-------------------------------------- --------------------------------------
phylo1 :: Phylo phylo1 :: Phylo
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
-------------------------------------- --------------------------------------
phylo0 :: Phylo phylo0 :: Phylo
phylo0 = toPhylo0 phyloDocs phyloBase phylo0 = toPhylo0 phyloDocs phyloBase
...@@ -244,7 +249,7 @@ instance PhyloMaker [Document] ...@@ -244,7 +249,7 @@ instance PhyloMaker [Document]
where where
-------------------------------------- --------------------------------------
phylo1 :: Phylo phylo1 :: Phylo
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
-------------------------------------- --------------------------------------
phylo0 :: Phylo phylo0 :: Phylo
phylo0 = toPhylo0 phyloDocs phyloBase phylo0 = toPhylo0 phyloDocs phyloBase
...@@ -281,6 +286,16 @@ instance PhyloMaker [Document] ...@@ -281,6 +286,16 @@ instance PhyloMaker [Document]
----------------- -----------------
tracePhylo0 :: Phylo -> Phylo
tracePhylo0 p = trace ("\n---------------\n--| Phylo 0 |--\n---------------\n\n") p
tracePhylo1 :: Phylo -> Phylo
tracePhylo1 p = trace ("\n---------------\n--| Phylo 1 |--\n---------------\n\n") p
tracePhyloN :: Level -> Phylo -> Phylo
tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n") p
tracePhyloBase :: Phylo -> Phylo tracePhyloBase :: Phylo -> Phylo
tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n" tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
<> show (length $ _phylo_periods p) <> " periods from " <> show (length $ _phylo_periods p) <> " periods from "
...@@ -308,6 +323,23 @@ traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temp ...@@ -308,6 +323,23 @@ traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temp
-------------------------------------- --------------------------------------
traceReBranches :: Level -> Phylo -> Phylo
traceReBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " after relinking :\n"
<> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
<> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
<> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
<> show (percentile 50 (VS.fromList brs)) <> " (50%) "
<> show (percentile 75 (VS.fromList brs)) <> " (75%) "
<> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
where
--------------------------------------
brs :: [Double]
brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
$ filter (\(id,_) -> (fst id) == lvl)
$ getGroupsByBranches p
--------------------------------------
traceBranches :: Level -> Phylo -> Phylo traceBranches :: Level -> Phylo -> Phylo
traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n" traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
<> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n" <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
......
...@@ -20,7 +20,7 @@ module Gargantext.Viz.Phylo.LinkMaker ...@@ -20,7 +20,7 @@ module Gargantext.Viz.Phylo.LinkMaker
import Control.Lens hiding (both, Level) import Control.Lens hiding (both, Level)
import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, sort, delete, intersect, nub, groupBy) import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, sort, delete, intersect, nub, groupBy)
import Data.Tuple.Extra import Data.Tuple.Extra
import Data.Map (Map,(!),fromListWith,elems,restrictKeys,unionWith) import Data.Map (Map,(!),fromListWith,elems,restrictKeys,unionWith,member)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
...@@ -40,10 +40,9 @@ import Numeric.Statistics (percentile) ...@@ -40,10 +40,9 @@ import Numeric.Statistics (percentile)
-- | To choose a LevelLink strategy based an a given Level -- | To choose a LevelLink strategy based an a given Level
shouldLink :: (Level,Level) -> PhyloGroup -> PhyloGroup -> Bool shouldLink :: (Level,Level) -> PhyloGroup -> PhyloGroup -> Bool
shouldLink (lvl,_lvl) g g' shouldLink (lvl,lvl') g g'
| lvl <= 1 = doesContainsOrd (getGroupNgrams g) (getGroupNgrams g') | (lvl <= 1) && (lvl' <= 1) = doesContainsOrd (getGroupNgrams g) (getGroupNgrams g')
| lvl > 1 = elem (getGroupId g) (getGroupLevelChildsId g') | otherwise = elem (getGroupId g) (getGroupLevelChildsId g')
| otherwise = panic ("[ERR][Viz.Phylo.LinkMaker.shouldLink] Level not defined")
-- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups -- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
...@@ -70,15 +69,12 @@ linkGroupToGroups (lvl,lvl') current targets ...@@ -70,15 +69,12 @@ linkGroupToGroups (lvl,lvl') current targets
-- | To set the LevelLink of all the PhyloGroups of a Phylo -- | To set the LevelLink of all the PhyloGroups of a Phylo
setLevelLinks :: (Level,Level) -> Phylo -> Phylo setLevelLinks :: (Level,Level) -> Phylo -> Phylo
setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLevel g == lvl setLevelLinks (lvl,lvl') p = alterPhyloGroups (\groups ->
then linkGroupToGroups (lvl,lvl') g (filterCandidates g map (\group -> if getGroupLevel group == lvl
$ filter (\g' -> getGroupPeriod g' == getGroupPeriod g) gs') then linkGroupToGroups (lvl,lvl') group
else g) gs) p $ filterCandidates group
where $ getGroupsWithFilters lvl' (getGroupPeriod group) p
-------------------------------------- else group) groups) p
gs' :: [PhyloGroup]
gs' = getGroupsWithLevel lvl' p
--------------------------------------
------------------------------- -------------------------------
...@@ -154,6 +150,32 @@ findBestCandidates filiation depth limit proximity periods candidates g1 phylo ...@@ -154,6 +150,32 @@ findBestCandidates filiation depth limit proximity periods candidates g1 phylo
-------------------------------------- --------------------------------------
nextPeriods :: [(Date,Date)] nextPeriods :: [(Date,Date)]
nextPeriods = take depth periods nextPeriods = take depth periods
--------------------------------------
findBestCandidates' :: Proximity -> [PhyloGroup] -> PhyloGroup -> Phylo -> [Pointer]
findBestCandidates' proximity candidates g1 phylo = pointers
where
--------------------------------------
pointers :: [(PhyloGroupId, Double)]
pointers = reverse $ sortOn snd $ filter (\(_,score) -> case proximity of
WeightedLogJaccard (WLJParams thr _) -> score >= thr
Hamming (HammingParams thr) -> score <= thr
_ -> panic "[ERR][Viz.Phylo.LinkMaker.findBestCandidates'] Unknown proximity"
) similarities
--------------------------------------
similarities :: [(PhyloGroupId, Double)]
similarities = concat $ map (\(g2,g3) -> let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] phylo
cooc2 = getGroupCooc g2
cooc3 = getGroupCooc g3
score = processProximity proximity cooc1 (unionWith (+) cooc2 cooc3) nbDocs
in nub $ [(getGroupId g2,score),(getGroupId g3,score)]) pairsOfCandidates
--------------------------------------
pairsOfCandidates :: [(PhyloGroup,PhyloGroup)]
pairsOfCandidates = listToFullCombi candidates
--------------------------------------
cooc1 :: Map (Int,Int) Double
cooc1 = getGroupCooc g1
-------------------------------------- --------------------------------------
...@@ -168,7 +190,7 @@ addPointers' fil pts g = g & case fil of ...@@ -168,7 +190,7 @@ addPointers' fil pts g = g & case fil of
-- | To update a list of phyloGroups with some Pointers -- | To update a list of phyloGroups with some Pointers
updateGroups :: Filiation -> Level -> Map PhyloGroupId [Pointer] -> Phylo -> Phylo updateGroups :: Filiation -> Level -> Map PhyloGroupId [Pointer] -> Phylo -> Phylo
updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if (getGroupLevel g) == lvl updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if ((getGroupLevel g) == lvl) && (member (getGroupId g) m)
then addPointers' fil (m ! (getGroupId g)) g then addPointers' fil (m ! (getGroupId g)) g
else g ) gs) p else g ) gs) p
...@@ -220,7 +242,7 @@ interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) de ...@@ -220,7 +242,7 @@ interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) de
pointers = concat pointers = concat
$ map (\branche -> $ map (\branche ->
map (\g -> ( getGroupId g map (\g -> ( getGroupId g
, findBestCandidates fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) (getPhyloPeriods p)) (filterCandidates g branche) g p ) , findBestCandidates fil 1 (getPhyloMatchingFrame p) prox (getNextPeriods fil (getGroupPeriod g) (getPhyloPeriods p)) (filterCandidates g branche) g p )
) branche ) branches ) branche ) branches
-------------------------------------- --------------------------------------
branches :: [[PhyloGroup]] branches :: [[PhyloGroup]]
...@@ -240,7 +262,7 @@ toLevelUp lst p = Map.toList ...@@ -240,7 +262,7 @@ toLevelUp lst p = Map.toList
where where
-------------------------------------- --------------------------------------
pointers :: [Pointer] pointers :: [Pointer]
pointers = trace(show(map (\(id,_) -> length $ getGroupLevelParentId $ getGroupFromId id p) lst)) $ map (\(id,v) -> (getGroupLevelParentId $ getGroupFromId id p, v)) lst pointers = map (\(id,v) -> (getGroupLevelParentId $ getGroupFromId id p, v)) lst
-------------------------------------- --------------------------------------
...@@ -250,7 +272,7 @@ transposePeriodLinks lvl p = alterGroupWithLevel ...@@ -250,7 +272,7 @@ transposePeriodLinks lvl p = alterGroupWithLevel
(\g -> (\g ->
-------------------------------------- --------------------------------------
let childs = getGroupsFromIds (map fst $ getGroupLevelChilds g) p let childs = getGroupsFromIds (map fst $ getGroupLevelChilds g) p
ascLink = trace (show(length childs)) $ toLevelUp (concat $ map getGroupPeriodParents childs) p ascLink = toLevelUp (concat $ map getGroupPeriodParents childs) p
desLink = toLevelUp (concat $ map getGroupPeriodChilds childs) p desLink = toLevelUp (concat $ map getGroupPeriodChilds childs) p
-------------------------------------- --------------------------------------
in g & phylo_groupPeriodParents %~ (++ ascLink) in g & phylo_groupPeriodParents %~ (++ ascLink)
......
...@@ -192,6 +192,8 @@ getPhyloTitle p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p ...@@ -192,6 +192,8 @@ getPhyloTitle p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
getPhyloDescription :: Phylo -> Text getPhyloDescription :: Phylo -> Text
getPhyloDescription p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p getPhyloDescription p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
getPhyloMatchingFrame :: Phylo -> Int
getPhyloMatchingFrame p = _q_interTemporalMatchingFrame $ _phyloParam_query $ getPhyloParams p
getPhyloFis :: Phylo -> Map (Date,Date) [PhyloFis] getPhyloFis :: Phylo -> Map (Date,Date) [PhyloFis]
getPhyloFis = _phylo_fis getPhyloFis = _phylo_fis
...@@ -791,10 +793,10 @@ initWeightedLogJaccard (def 0 -> thr) (def 0.01 -> sens) = WLJParams thr sens ...@@ -791,10 +793,10 @@ initWeightedLogJaccard (def 0 -> thr) (def 0.01 -> sens) = WLJParams thr sens
-- | To initialize a PhyloQueryBuild from given and default parameters -- | To initialize a PhyloQueryBuild from given and default parameters
initPhyloQueryBuild :: Text -> Text -> Maybe Int -> Maybe Int -> Maybe Cluster -> Maybe [Metric] -> Maybe [Filter] -> Maybe Proximity -> Maybe Level -> Maybe Cluster -> PhyloQueryBuild initPhyloQueryBuild :: Text -> Text -> Maybe Int -> Maybe Int -> Maybe Cluster -> Maybe [Metric] -> Maybe [Filter] -> Maybe Proximity -> Maybe Int -> Maybe Level -> Maybe Cluster -> PhyloQueryBuild
initPhyloQueryBuild name desc (def 5 -> grain) (def 3 -> steps) (def defaultFis -> cluster) (def [] -> metrics) (def [] -> filters) initPhyloQueryBuild name desc (def 5 -> grain) (def 3 -> steps) (def defaultFis -> cluster) (def [] -> metrics) (def [] -> filters)
(def defaultWeightedLogJaccard -> matching') (def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) = (def defaultWeightedLogJaccard -> matching') (def 5 -> frame) (def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) =
PhyloQueryBuild name desc grain steps cluster metrics filters matching' nthLevel nthCluster PhyloQueryBuild name desc grain steps cluster metrics filters matching' frame nthLevel nthCluster
...@@ -848,7 +850,7 @@ defaultWeightedLogJaccard = WeightedLogJaccard (initWeightedLogJaccard Nothing N ...@@ -848,7 +850,7 @@ defaultWeightedLogJaccard = WeightedLogJaccard (initWeightedLogJaccard Nothing N
defaultQueryBuild :: PhyloQueryBuild defaultQueryBuild :: PhyloQueryBuild
defaultQueryBuild = initPhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)" defaultQueryBuild = initPhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
defaultQueryView :: PhyloQueryView defaultQueryView :: PhyloQueryView
defaultQueryView = initPhyloQueryView Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing defaultQueryView = initPhyloQueryView Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment