Commit 7550f605 authored by qlobbe's avatar qlobbe

add rebranching to link distante branches

parent e9fa60c6
Pipeline #420 failed with stage
......@@ -22,7 +22,7 @@ Phylo binaries
module Main where
-- import System.Directory (doesFileExist)
import System.Directory (doesFileExist)
import Data.Aeson
import Data.Text (Text, unwords)
......@@ -45,12 +45,9 @@ import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Viz.Phylo.View.Export
import Gargantext.Viz.Phylo.View.ViewMaker
import Gargantext.Database.Types.Node
import Data.Maybe
import qualified Data.Map as DM
import qualified Data.Vector as DV
import qualified Data.List as DL
......@@ -80,6 +77,7 @@ data Conf =
, limit :: Limit
, timeGrain :: Int
, timeStep :: Int
, timeFrame :: Int
, timeTh :: Double
, timeSens :: Double
, clusterTh :: Double
......@@ -158,21 +156,24 @@ parse format limit path l = do
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus
-- -- | To parse an existing Fis file
-- parseFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> IO [PhyloFis]
-- parseFis path name grain step support clique = do
-- let fisPath = path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json"
-- fisExists <- doesFileExist (path)
-- if fisExists
-- then do
-- fis <- L.readFile fisPath
-- pure $ decoder (eitherDecode fis :: P.Either [Char] [PhyloFis])
-- else pure []
-- writeFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> DM.Map (Date,Date) [PhyloFis] -> IO ()
-- writeFis path name grain step support clique fis = do
-- let fisPath = path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json"
-- P.writeFile fisPath $ show (encode (DL.concat $ DM.elems fis))
-- | To parse an existing Fis file
parseFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> IO [PhyloFis]
parseFis path name grain step support clique = do
fisExists <- doesFileExist (path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json")
if fisExists
then do
fisJson <- (eitherDecode <$> getJson (path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json")) :: IO (P.Either P.String [PhyloFis])
case fisJson of
P.Left err -> do
putStrLn err
pure []
P.Right fis -> pure fis
else pure []
writeFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> DM.Map (Date,Date) [PhyloFis] -> IO ()
writeFis path name grain step support clique fis = do
let fisPath = path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json"
L.writeFile fisPath $ encode (DL.concat $ DM.elems fis)
--------------
-- | Main | --
......@@ -194,23 +195,27 @@ main = do
corpus <- parse (corpusType conf) (limit conf) (corpusPath conf) termList
-- fis <- parseFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf)
-- let mFis = DM.fromListWith (++) $ DL.sortOn (fst . fst) $ map (\f -> (getFisPeriod f,[f])) fis
putStrLn $ ("\n" <> show (length corpus) <> " parsed docs")
let roots = DL.nub $ DL.concat $ map text corpus
putStrLn $ ("\n" <> show (length corpus) <> " parsed docs")
putStrLn $ ("\n" <> show (length roots) <> " parsed foundation roots")
fis <- parseFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf)
putStrLn $ ("\n" <> show (length fis) <> " parsed fis")
let mFis = DM.fromListWith (++) $ DL.sortOn (fst . fst) $ map (\f -> (getFisPeriod f,[f])) fis
let query = PhyloQueryBuild (phyloName conf) "" (timeGrain conf) (timeStep conf)
(Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (phyloLevel conf)
(Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (timeFrame conf) (phyloLevel conf)
(RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf))
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [SizeBranch $ SBParams (minSizeBranch conf)] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
let phylo = toPhylo query corpus roots termList DM.empty
let phylo = toPhylo query corpus roots termList mFis
-- writeFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf) (getPhyloFis phylo)
writeFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf) (getPhyloFis phylo)
let view = toPhyloView queryView phylo
......
......@@ -347,6 +347,7 @@ data PhyloQueryBuild = PhyloQueryBuild
-- Inter-temporal matching method of the Phylo
, _q_interTemporalMatching :: Proximity
, _q_interTemporalMatchingFrame :: Int
-- Last level of reconstruction
, _q_nthLevel :: Level
......
......@@ -80,8 +80,7 @@ phyloToClusters lvl clus p = Map.fromList
--------------------------------------
graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox)
$ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p))
$ trace (show(map (\prd -> (prd,length $ getGroupsWithFilters lvl prd p)) periods)) periods
$ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
--------------------------------------
prox :: Proximity
prox = getProximity clus
......
......@@ -28,6 +28,8 @@ import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Vector as Vector
import Debug.Trace (trace)
-- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
......@@ -38,7 +40,7 @@ initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last' "Doc" l))
-- | To group a list of Documents by fixed periods
groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es = Map.fromList $ zip pds $ map (inPeriode f es) pds
groupDocsByPeriod f pds es = trace ("----\nGroup docs by periods\n") $ Map.fromList $ zip pds $ map (inPeriode f es) pds
where
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
......
......@@ -18,8 +18,8 @@ module Gargantext.Viz.Phylo.Aggregates.Fis
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (null,concat,sort)
import Data.Map (Map,elems,mapWithKey)
import Data.List (null,concat,sort,(++))
import Data.Map (Map,elems,mapWithKey,unionWith,fromList,keys)
import Data.Tuple (fst, snd)
import Data.Set (size)
import Gargantext.Prelude
......@@ -59,58 +59,23 @@ filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head' "Fis" $ m
in filter (\fis -> elem (getClique fis) cliqueMax) l)
-- | To transform a list of Documents into a Frequent Items Set
docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) [PhyloFis]
docsToFis m = mapWithKey (\k docs -> let fs = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in map (\f -> PhyloFis (fst f) (snd f) k) fs) m
docsToFis' :: Map (Date,Date) [Document] -> Phylo -> Phylo
docsToFis' m p = if (null $ getPhyloFis p)
then p & phylo_fis .~ mapWithKey (\k docs -> let fis = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
then trace("----\nRebuild the Fis from scratch\n")
$ p & phylo_fis .~ mapWithKey (\k docs -> let fis = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in map (\f -> PhyloFis (fst f) (snd f) k) fis) m
else p
-- | To process a list of Filters on top of the PhyloFis
processFilters :: [Filter] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
processFilters filters phyloFis
| null filters = phyloFis
| otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processFilters] please add some filters for the Fis"
-- | To process a list of Metrics on top of the PhyloFis
processMetrics :: [Metric] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
processMetrics metrics phyloFis
| null metrics = phyloFis
| otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processMetrics] please add some metrics for the Fis"
-- | To transform some Documents into PhyloFis and apply a List of Metrics and Filters
toPhyloFis :: Map (Date, Date) [Document] -> Bool -> Support -> Int -> [Metric] -> [Filter] -> Map (Date, Date) [PhyloFis]
toPhyloFis ds k s t ms fs = processFilters fs
$ processMetrics ms
$ traceFis "----\nFiltered Fis by clique size :\n"
$ filterFis k t (filterFisByClique)
$ traceFis "----\nFiltered Fis by nested :\n"
$ filterFisByNested
$ traceFis "----\nFiltered Fis by support :\n"
$ filterFis k s (filterFisBySupport)
$ traceFis "----\nUnfiltered Fis :\n"
$ docsToFis ds
toPhyloFis' :: Map (Date, Date) [PhyloFis] -> Bool -> Support -> Int -> [Metric] -> [Filter] -> Map (Date, Date) [PhyloFis]
toPhyloFis' fis k s t ms fs = processFilters fs
$ processMetrics ms
$ traceFis "----\nFiltered Fis by clique size :\n"
$ filterFis k t (filterFisByClique)
$ traceFis "----\nFiltered Fis by nested :\n"
$ filterFisByNested
$ traceFis "----\nFiltered Fis by support :\n"
$ filterFis k s (filterFisBySupport)
$ traceFis "----\nUnfiltered Fis :\n" fis
else trace("----\nUse Fis from an existing file\n")
$ p & phylo_fis %~ (unionWith (++) (fromList $ map (\k -> (k,[])) $ keys m))
toPhyloFis' :: Map (Date, Date) [PhyloFis] -> Bool -> Support -> Int -> Map (Date, Date) [PhyloFis]
toPhyloFis' fis k s t = traceFis "----\nFiltered Fis by clique size :\n"
$ filterFis k t (filterFisByClique)
$ traceFis "----\nFiltered Fis by nested :\n"
$ filterFisByNested
$ traceFis "----\nFiltered Fis by support :\n"
$ filterFis k s (filterFisBySupport)
$ traceFis "----\nUnfiltered Fis :\n" fis
-----------------
......
......@@ -18,14 +18,119 @@ module Gargantext.Viz.Phylo.BranchMaker
where
import Control.Lens hiding (both, Level)
import Data.List (concat,nub,(++),tail)
import Data.List (concat,nub,(++),tail,sortOn,take,reverse,sort,null,intersect,union)
import Data.Map (Map)
import Data.Tuple (fst, snd)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Metrics.Clustering
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.LinkMaker
import qualified Data.Map as Map
-- import Debug.Trace (trace)
---------------------------
-- | Readability links | --
---------------------------
getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
getFramedPeriod :: [PhyloGroup] -> (Date,Date)
getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
getGroupsNgrams :: [PhyloGroup] -> [Int]
getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
getNthMostOcc :: Int -> Map (Int,Int) Double -> [Int]
getNthMostOcc nth cooc = (nub . concat)
$ map (\((idx,idx'),_) -> [idx,idx'])
$ take (nth `div` 2)
$ reverse
$ sortOn snd $ Map.toList cooc
-- | Get the Nth most coocurent Ngrams in a list of Groups
getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
getGroupsPeaks gs nth p = getNthMostOcc nth
$ getSubCooc (getGroupsNgrams gs)
$ getCooc (getGroupsPeriods gs) p
areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
-- | Process a Jaccard on top of two set of Branch Peaks
areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
/ ((fromIntegral . length) $ union ns ns')) >= thr
findSimBranches :: Int -> Double -> Int -> Phylo -> (PhyloBranchId,[PhyloGroup]) -> [(PhyloBranchId,[PhyloGroup])] -> [(PhyloBranchId,[PhyloGroup])]
findSimBranches frame thr nth p (id,gs) bs
= filter (\(_ ,gs') -> areTwinPeaks thr pks (getGroupsPeaks gs' nth p))
$ filter (\(_ ,gs') -> (not . null) $ intersect ns (getGroupsNgrams gs'))
$ filter (\(_ ,gs') -> areDistant prd (getFramedPeriod gs') frame)
$ filter (\(id',_ ) -> id /= id') bs
where
--------------------------------------
prd :: (Date,Date)
prd = getFramedPeriod gs
--------------------------------------
ns :: [Int]
ns = getGroupsNgrams gs
--------------------------------------
pks :: [Int]
pks = getGroupsPeaks gs nth p
--------------------------------------
findBestPointer :: Phylo -> Proximity -> [PhyloGroup] -> [PhyloGroup] -> [(PhyloGroupId,Pointer)]
findBestPointer p prox gs gs' = take 1
$ reverse
$ sortOn (snd . snd)
$ concat
$ map (\g -> let pts = findBestCandidates' prox gs' g p
in map (\pt -> (getGroupId g,pt)) pts) gs
makeBranchLinks :: Phylo -> Proximity -> (PhyloBranchId,[PhyloGroup]) -> [(PhyloBranchId,[PhyloGroup])] -> [(PhyloGroupId,Pointer)] -> [(PhyloGroupId,Pointer)]
makeBranchLinks p prox (id,gs) bs pts
| null bs = pts
| otherwise = makeBranchLinks p prox (head' "makeLink" bs) (tail bs) (pts ++ pts')
where
--------------------------------------
pts' :: [(PhyloGroupId,Pointer)]
pts' = concat $ map (\(_id,gs') -> findBestPointer p prox gs gs') candidates
--------------------------------------
candidates :: [(PhyloBranchId,[PhyloGroup])]
candidates = findSimBranches (getPhyloMatchingFrame p) 0.9 4 p (id,gs) bs
linkPhyloBranches :: Level -> Proximity -> Phylo -> Phylo
linkPhyloBranches lvl prox p = setPhyloBranches lvl
$ updateGroups Descendant lvl pointers p
where
--------------------------------------
pointers :: Map PhyloGroupId [Pointer]
pointers = Map.fromList $ map (\(_id,(_id',_w)) -> (_id,[(_id',100)]))
$ makeBranchLinks p prox (head' "makeLink" branches) (tail branches) []
--------------------------------------
branches :: [(PhyloBranchId,[PhyloGroup])]
branches = sortOn (\(_id,gs) -> fst $ getFramedPeriod gs) $ getGroupsByBranches p
--------------------------------------
------------------
-- | Branches | --
------------------
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
graphToBranches :: Level -> GroupGraph -> Phylo -> [(Int,PhyloGroupId)]
......
......@@ -105,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.1 20) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.3 0)
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.1 20) 5 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.3 0)
......@@ -205,9 +205,12 @@ phylo1 = addPhyloLevel (1) phyloFis phylo
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = filterFis True 1 (filterFisByClique)
$ filterFisByNested
$ filterFis True 1 (filterFisBySupport) (docsToFis phyloDocs)
$ filterFis True 1 (filterFisBySupport) (getPhyloFis phylo')
phylo' :: Phylo
phylo' = docsToFis' phyloDocs phylo
----------------------------------------
-- | STEP 2 | -- Init a Phylo of level 0
----------------------------------------
......
......@@ -21,7 +21,7 @@ module Gargantext.Viz.Phylo.LevelMaker
import Control.Lens hiding (both, Level)
import Data.List ((++), sort, concat, nub, zip, last)
import Data.Map (Map, (!), empty, singleton,mapWithKey)
import Data.Map (Map, (!), empty, singleton)
import Data.Text (Text)
import Data.Tuple.Extra
import Gargantext.Prelude
......@@ -100,8 +100,11 @@ clusterToGroup prd lvl idx lbl groups _m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams empty
Nothing
(getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
[] [] [] (map (\g -> (getGroupId g, 1)) groups)
[] [] [] childs
where
--------------------------------------
childs :: [Pointer]
childs = map (\g -> (getGroupId g, 1)) groups
--------------------------------------
ngrams :: [Int]
ngrams = (sort . nub . concat) $ map getGroupNgrams groups
......@@ -151,11 +154,11 @@ toNthLevel lvlMax prox clus p
$ setPhyloBranches (lvl + 1)
$ transposePeriodLinks (lvl + 1)
$ setLevelLinks (lvl, lvl + 1)
$ trace (show (mapWithKey (\k v -> (k,length v)) clusters))
$ addPhyloLevel (lvl + 1)
(clusters) p
where
--------------------------------------
clusters :: Map (Date,Date) [PhyloCluster]
clusters = phyloToClusters lvl clus p
--------------------------------------
lvl :: Level
......@@ -164,9 +167,11 @@ toNthLevel lvlMax prox clus p
-- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox metrics filters d p = case clus of
Fis (FisParams k s t) -> traceBranches 1
toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox d p = case clus of
Fis (FisParams k s t) -> traceReBranches 1
$ linkPhyloBranches 1 prox
$ traceBranches 1
$ setPhyloBranches 1
$ traceTempoMatching Descendant 1
$ interTempoMatching Descendant 1 prox
......@@ -178,7 +183,7 @@ toPhylo1 clus prox metrics filters d p = case clus of
where
--------------------------------------
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = toPhyloFis' (getPhyloFis phylo') k s t metrics filters
phyloFis = toPhyloFis' (getPhyloFis phylo') k s t
--------------------------------------
phylo' :: Phylo
phylo' = docsToFis' d p
......@@ -205,7 +210,7 @@ instance PhyloMaker [(Date, Text)]
where
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
--------------------------------------
phylo0 :: Phylo
phylo0 = toPhylo0 phyloDocs phyloBase
......@@ -244,7 +249,7 @@ instance PhyloMaker [Document]
where
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
--------------------------------------
phylo0 :: Phylo
phylo0 = toPhylo0 phyloDocs phyloBase
......@@ -281,6 +286,16 @@ instance PhyloMaker [Document]
-----------------
tracePhylo0 :: Phylo -> Phylo
tracePhylo0 p = trace ("\n---------------\n--| Phylo 0 |--\n---------------\n\n") p
tracePhylo1 :: Phylo -> Phylo
tracePhylo1 p = trace ("\n---------------\n--| Phylo 1 |--\n---------------\n\n") p
tracePhyloN :: Level -> Phylo -> Phylo
tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n") p
tracePhyloBase :: Phylo -> Phylo
tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
<> show (length $ _phylo_periods p) <> " periods from "
......@@ -308,6 +323,23 @@ traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temp
--------------------------------------
traceReBranches :: Level -> Phylo -> Phylo
traceReBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " after relinking :\n"
<> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
<> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
<> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
<> show (percentile 50 (VS.fromList brs)) <> " (50%) "
<> show (percentile 75 (VS.fromList brs)) <> " (75%) "
<> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
where
--------------------------------------
brs :: [Double]
brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
$ filter (\(id,_) -> (fst id) == lvl)
$ getGroupsByBranches p
--------------------------------------
traceBranches :: Level -> Phylo -> Phylo
traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
<> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
......
......@@ -20,7 +20,7 @@ module Gargantext.Viz.Phylo.LinkMaker
import Control.Lens hiding (both, Level)
import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, sort, delete, intersect, nub, groupBy)
import Data.Tuple.Extra
import Data.Map (Map,(!),fromListWith,elems,restrictKeys,unionWith)
import Data.Map (Map,(!),fromListWith,elems,restrictKeys,unionWith,member)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
......@@ -40,10 +40,9 @@ import Numeric.Statistics (percentile)
-- | To choose a LevelLink strategy based an a given Level
shouldLink :: (Level,Level) -> PhyloGroup -> PhyloGroup -> Bool
shouldLink (lvl,_lvl) g g'
| lvl <= 1 = doesContainsOrd (getGroupNgrams g) (getGroupNgrams g')
| lvl > 1 = elem (getGroupId g) (getGroupLevelChildsId g')
| otherwise = panic ("[ERR][Viz.Phylo.LinkMaker.shouldLink] Level not defined")
shouldLink (lvl,lvl') g g'
| (lvl <= 1) && (lvl' <= 1) = doesContainsOrd (getGroupNgrams g) (getGroupNgrams g')
| otherwise = elem (getGroupId g) (getGroupLevelChildsId g')
-- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
......@@ -70,15 +69,12 @@ linkGroupToGroups (lvl,lvl') current targets
-- | To set the LevelLink of all the PhyloGroups of a Phylo
setLevelLinks :: (Level,Level) -> Phylo -> Phylo
setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLevel g == lvl
then linkGroupToGroups (lvl,lvl') g (filterCandidates g
$ filter (\g' -> getGroupPeriod g' == getGroupPeriod g) gs')
else g) gs) p
where
--------------------------------------
gs' :: [PhyloGroup]
gs' = getGroupsWithLevel lvl' p
--------------------------------------
setLevelLinks (lvl,lvl') p = alterPhyloGroups (\groups ->
map (\group -> if getGroupLevel group == lvl
then linkGroupToGroups (lvl,lvl') group
$ filterCandidates group
$ getGroupsWithFilters lvl' (getGroupPeriod group) p
else group) groups) p
-------------------------------
......@@ -154,6 +150,32 @@ findBestCandidates filiation depth limit proximity periods candidates g1 phylo
--------------------------------------
nextPeriods :: [(Date,Date)]
nextPeriods = take depth periods
--------------------------------------
findBestCandidates' :: Proximity -> [PhyloGroup] -> PhyloGroup -> Phylo -> [Pointer]
findBestCandidates' proximity candidates g1 phylo = pointers
where
--------------------------------------
pointers :: [(PhyloGroupId, Double)]
pointers = reverse $ sortOn snd $ filter (\(_,score) -> case proximity of
WeightedLogJaccard (WLJParams thr _) -> score >= thr
Hamming (HammingParams thr) -> score <= thr
_ -> panic "[ERR][Viz.Phylo.LinkMaker.findBestCandidates'] Unknown proximity"
) similarities
--------------------------------------
similarities :: [(PhyloGroupId, Double)]
similarities = concat $ map (\(g2,g3) -> let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] phylo
cooc2 = getGroupCooc g2
cooc3 = getGroupCooc g3
score = processProximity proximity cooc1 (unionWith (+) cooc2 cooc3) nbDocs
in nub $ [(getGroupId g2,score),(getGroupId g3,score)]) pairsOfCandidates
--------------------------------------
pairsOfCandidates :: [(PhyloGroup,PhyloGroup)]
pairsOfCandidates = listToFullCombi candidates
--------------------------------------
cooc1 :: Map (Int,Int) Double
cooc1 = getGroupCooc g1
--------------------------------------
......@@ -168,7 +190,7 @@ addPointers' fil pts g = g & case fil of
-- | To update a list of phyloGroups with some Pointers
updateGroups :: Filiation -> Level -> Map PhyloGroupId [Pointer] -> Phylo -> Phylo
updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if (getGroupLevel g) == lvl
updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if ((getGroupLevel g) == lvl) && (member (getGroupId g) m)
then addPointers' fil (m ! (getGroupId g)) g
else g ) gs) p
......@@ -220,7 +242,7 @@ interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) de
pointers = concat
$ map (\branche ->
map (\g -> ( getGroupId g
, findBestCandidates fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) (getPhyloPeriods p)) (filterCandidates g branche) g p )
, findBestCandidates fil 1 (getPhyloMatchingFrame p) prox (getNextPeriods fil (getGroupPeriod g) (getPhyloPeriods p)) (filterCandidates g branche) g p )
) branche ) branches
--------------------------------------
branches :: [[PhyloGroup]]
......@@ -240,7 +262,7 @@ toLevelUp lst p = Map.toList
where
--------------------------------------
pointers :: [Pointer]
pointers = trace(show(map (\(id,_) -> length $ getGroupLevelParentId $ getGroupFromId id p) lst)) $ map (\(id,v) -> (getGroupLevelParentId $ getGroupFromId id p, v)) lst
pointers = map (\(id,v) -> (getGroupLevelParentId $ getGroupFromId id p, v)) lst
--------------------------------------
......@@ -250,7 +272,7 @@ transposePeriodLinks lvl p = alterGroupWithLevel
(\g ->
--------------------------------------
let childs = getGroupsFromIds (map fst $ getGroupLevelChilds g) p
ascLink = trace (show(length childs)) $ toLevelUp (concat $ map getGroupPeriodParents childs) p
ascLink = toLevelUp (concat $ map getGroupPeriodParents childs) p
desLink = toLevelUp (concat $ map getGroupPeriodChilds childs) p
--------------------------------------
in g & phylo_groupPeriodParents %~ (++ ascLink)
......
......@@ -192,6 +192,8 @@ getPhyloTitle p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
getPhyloDescription :: Phylo -> Text
getPhyloDescription p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
getPhyloMatchingFrame :: Phylo -> Int
getPhyloMatchingFrame p = _q_interTemporalMatchingFrame $ _phyloParam_query $ getPhyloParams p
getPhyloFis :: Phylo -> Map (Date,Date) [PhyloFis]
getPhyloFis = _phylo_fis
......@@ -791,10 +793,10 @@ initWeightedLogJaccard (def 0 -> thr) (def 0.01 -> sens) = WLJParams thr sens
-- | To initialize a PhyloQueryBuild from given and default parameters
initPhyloQueryBuild :: Text -> Text -> Maybe Int -> Maybe Int -> Maybe Cluster -> Maybe [Metric] -> Maybe [Filter] -> Maybe Proximity -> Maybe Level -> Maybe Cluster -> PhyloQueryBuild
initPhyloQueryBuild :: Text -> Text -> Maybe Int -> Maybe Int -> Maybe Cluster -> Maybe [Metric] -> Maybe [Filter] -> Maybe Proximity -> Maybe Int -> Maybe Level -> Maybe Cluster -> PhyloQueryBuild
initPhyloQueryBuild name desc (def 5 -> grain) (def 3 -> steps) (def defaultFis -> cluster) (def [] -> metrics) (def [] -> filters)
(def defaultWeightedLogJaccard -> matching') (def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) =
PhyloQueryBuild name desc grain steps cluster metrics filters matching' nthLevel nthCluster
(def defaultWeightedLogJaccard -> matching') (def 5 -> frame) (def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) =
PhyloQueryBuild name desc grain steps cluster metrics filters matching' frame nthLevel nthCluster
......@@ -848,7 +850,7 @@ defaultWeightedLogJaccard = WeightedLogJaccard (initWeightedLogJaccard Nothing N
defaultQueryBuild :: PhyloQueryBuild
defaultQueryBuild = initPhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
defaultQueryView :: PhyloQueryView
defaultQueryView = initPhyloQueryView Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment