Commit f5393047 authored by qlobbe's avatar qlobbe

fix the weightedlogjaccard

parent f49465e4
...@@ -133,8 +133,8 @@ defaultConfig = ...@@ -133,8 +133,8 @@ defaultConfig =
, phyloName = pack "Default Phylo" , phyloName = pack "Default Phylo"
, phyloLevel = 2 , phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 0 0.1 , phyloProximity = WeightedLogJaccard 10 0 0.1
, phyloSynchrony = ByProximityThreshold 0.1 0 SiblingBranches MergeAllGroups , phyloSynchrony = ByProximityThreshold 0.2 0 SiblingBranches MergeAllGroups
, phyloQuality = Quality 0.1 1 , phyloQuality = Quality 10 1
, timeUnit = Year 3 1 5 , timeUnit = Year 3 1 5
, clique = Fis 1 5 , clique = Fis 1 5
, exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2] , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
......
...@@ -171,11 +171,15 @@ exportToDot phylo export = ...@@ -171,11 +171,15 @@ exportToDot phylo export =
, Ratio FillRatio , Ratio FillRatio
, Style [SItem Filled []],Color [toWColor White]] , Style [SItem Filled []],Color [toWColor White]]
-- | home made attributes -- | home made attributes
<> [(toAttr (fromStrict "nbDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs)) <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo)) ,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo)) ,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo)) ,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "quaFactor") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo)) ,(toAttr (fromStrict "quaGranularity") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
]) ])
......
...@@ -91,7 +91,7 @@ cliqueToGroup fis thr pId lvl idx fdt coocs = ...@@ -91,7 +91,7 @@ cliqueToGroup fis thr pId lvl idx fdt coocs =
(fis ^. phyloClique_support) (fis ^. phyloClique_support)
ngrams ngrams
(ngramsToCooc ngrams coocs) (ngramsToCooc ngrams coocs)
(1,[0]) (1,[0]) -- | branchid (lvl,[path in the branching tree])
(singleton "thr" [thr]) (singleton "thr" [thr])
[] [] [] [] [] [] [] []
......
...@@ -227,6 +227,8 @@ sumCooc cooc cooc' = unionWith (+) cooc cooc' ...@@ -227,6 +227,8 @@ sumCooc cooc cooc' = unionWith (+) cooc cooc'
getTrace :: Cooc -> Double getTrace :: Cooc -> Double
getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
coocToDiago :: Cooc -> Cooc
coocToDiago cooc = filterWithKey (\(k,k') _ -> k == k') cooc
-- | To build the local cooc matrix of each phylogroup -- | To build the local cooc matrix of each phylogroup
ngramsToCooc :: [Int] -> [Cooc] -> Cooc ngramsToCooc :: [Int] -> [Cooc] -> Cooc
......
...@@ -18,7 +18,7 @@ module Gargantext.Viz.Phylo.SynchronicClustering where ...@@ -18,7 +18,7 @@ module Gargantext.Viz.Phylo.SynchronicClustering where
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard) import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
import Gargantext.Viz.Phylo.PhyloExport (processDynamics) import Gargantext.Viz.Phylo.PhyloExport (processDynamics)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn, init, all, group, maximum, groupBy) import Data.List ((++), null, intersect, nub, concat, sort, sortOn, init, all, group, maximum, groupBy)
...@@ -56,26 +56,16 @@ mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq) ids ...@@ -56,26 +56,16 @@ mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq) ids
groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]] groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches' groups = groupsToBranches' groups =
-- | run the related component algorithm -- | run the related component algorithm
let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs')) let egos = map (\g -> [getGroupId g]
$ sortOn (\gs -> fst $ fst $ head' "egos" gs) ++ (map fst $ g ^. phylo_groupPeriodParents)
$ map (\g -> [getGroupId g] ++ (map fst $ g ^. phylo_groupPeriodChilds) ) $ elems groups
++ (map fst $ g ^. phylo_groupPeriodParents) graph = relatedComponents egos
++ (map fst $ g ^. phylo_groupPeriodChilds) ) $ elems groups
-- | first find the related components by inside each ego's period
graph' = map relatedComponents egos
-- | then run it for the all the periods
graph = relatedComponents $ concat (graph' `using` parList rdeepseq)
-- | update each group's branch id -- | update each group's branch id
in map (\ids -> in map (\ids ->
-- intervenir ici
let groups' = elems $ restrictKeys groups (Set.fromList ids) let groups' = elems $ restrictKeys groups (Set.fromList ids)
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups' bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl + 1,bId))) groups') graph in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl + 1,bId))) groups') graph
-- toBranchId :: PhyloGroup -> PhyloBranchId
-- toBranchId child = ((child ^. phylo_groupLevel) + 1, snd (child ^. phylo_groupBranchId))
getLastThr :: [PhyloGroup] -> Double getLastThr :: [PhyloGroup] -> Double
getLastThr childs = maximum $ concat $ map (\g -> (g ^. phylo_groupMeta) ! "thr") childs getLastThr childs = maximum $ concat $ map (\g -> (g ^. phylo_groupMeta) ! "thr") childs
...@@ -157,8 +147,8 @@ toDiamonds groups = foldl' (\acc groups' -> ...@@ -157,8 +147,8 @@ toDiamonds groups = foldl' (\acc groups' ->
$ foldl' (\acc g -> acc ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodParents) ) [] groups $ foldl' (\acc g -> acc ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodParents) ) [] groups
groupsToEdges :: Proximity -> Synchrony -> Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] groupsToEdges :: Proximity -> Synchrony -> Double -> Map Int Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
groupsToEdges prox sync docs groups = groupsToEdges prox sync nbDocs diago groups =
case sync of case sync of
ByProximityThreshold thr sens _ strat -> ByProximityThreshold thr sens _ strat ->
filter (\(_,w) -> w >= thr) filter (\(_,w) -> w >= thr)
...@@ -174,8 +164,7 @@ groupsToEdges prox sync docs groups = ...@@ -174,8 +164,7 @@ groupsToEdges prox sync docs groups =
toEdges sens edges = toEdges sens edges =
case prox of case prox of
WeightedLogJaccard _ _ _ -> map (\(g,g') -> WeightedLogJaccard _ _ _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard sens docs ((g,g'), weightedLogJaccard' sens nbDocs diago
(g ^. phylo_groupCooc) (g' ^. phylo_groupCooc)
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
_ -> undefined _ -> undefined
...@@ -191,15 +180,16 @@ toParentId :: PhyloGroup -> PhyloGroupId ...@@ -191,15 +180,16 @@ toParentId :: PhyloGroup -> PhyloGroupId
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex) toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex)
reduceGroups :: Proximity -> Synchrony -> Map Date Double -> [PhyloGroup] -> [PhyloGroup] reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
reduceGroups prox sync docs branch = reduceGroups prox sync docs diagos branch =
-- | 1) reduce a branch as a set of periods & groups -- | 1) reduce a branch as a set of periods & groups
let periods = fromListWith (++) let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch $ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems) in (concat . concat . elems)
$ mapWithKey (\prd groups -> $ mapWithKey (\prd groups ->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold -- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
let edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) groups let diago = reduceDiagos $ filterDiago diagos [prd]
edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) diago groups
in map (\comp -> in map (\comp ->
-- | 4) add to each groups their futur level parent group -- | 4) add to each groups their futur level parent group
let parentId = toParentId (head' "parentId" comp) let parentId = toParentId (head' "parentId" comp)
...@@ -233,7 +223,8 @@ synchronicClustering phylo = ...@@ -233,7 +223,8 @@ synchronicClustering phylo =
let prox = phyloProximity $ getConfig phylo let prox = phyloProximity $ getConfig phylo
sync = phyloSynchrony $ getConfig phylo sync = phyloSynchrony $ getConfig phylo
docs = phylo ^. phylo_timeDocs docs = phylo ^. phylo_timeDocs
newBranches = map (\branch -> reduceGroups prox sync docs branch) diagos = map coocToDiago $ phylo ^. phylo_timeCooc
newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
$ map processDynamics $ map processDynamics
$ adjustClustering sync (getPhyloThresholdStep phylo) $ adjustClustering sync (getPhyloThresholdStep phylo)
$ phyloToLastBranches $ phyloToLastBranches
......
...@@ -16,7 +16,7 @@ Portability : POSIX ...@@ -16,7 +16,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.TemporalMatching where module Gargantext.Viz.Phylo.TemporalMatching where
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, or) import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, or)
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, intersectionWith, findWithDefault, keys, (!), filterWithKey, singleton) import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), singleton, empty, mapKeys)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
...@@ -35,30 +35,29 @@ import qualified Data.Set as Set ...@@ -35,30 +35,29 @@ import qualified Data.Set as Set
------------------- -------------------
-- | Process the inverse sumLog -- | To compute a jaccard similarity between two lists
sumInvLog :: Double -> [Double] -> Double jaccard :: [Int] -> [Int] -> Double
sumInvLog s l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
-- | Process the sumLog -- | Process the inverse sumLog
sumLog :: Double -> [Double] -> Double sumInvLog' :: Double -> Double -> [Double] -> Double
sumLog s l = foldl (\mem x -> mem + log (s + x)) 0 l sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + s) / log (nb + s)))) 0 diago
-- | To compute a jaccard similarity between two lists -- | Process the sumLog
jaccard :: [Int] -> [Int] -> Double sumLog' :: Double -> Double -> [Double] -> Double
jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union') sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + s) / log (nb + s))) 0 diago
-- | To process a WeighedLogJaccard distance between to coocurency matrix weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
weightedLogJaccard :: Double -> Double -> Cooc -> Cooc -> [Int] -> [Int] -> Double weightedLogJaccard' sens nbDocs diago ngrams ngrams'
weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
| null ngramsInter = 0 | null ngramsInter = 0
| ngramsInter == ngramsUnion = 1 | ngramsInter == ngramsUnion = 1
| sens == 0 = jaccard ngramsInter ngramsUnion | sens == 0 = jaccard ngramsInter ngramsUnion
| sens > 0 = (sumInvLog sens coocInter) / (sumInvLog sens coocUnion) | sens > 0 = (sumInvLog' sens nbDocs diagoInter) / (sumInvLog' sens nbDocs diagoUnion)
| otherwise = (sumLog sens coocInter) / (sumLog sens coocUnion) | otherwise = (sumLog' sens nbDocs diagoInter) / (sumLog' sens nbDocs diagoUnion)
where where
-------------------------------------- --------------------------------------
ngramsInter :: [Int] ngramsInter :: [Int]
ngramsInter = intersect ngrams ngrams' ngramsInter = intersect ngrams ngrams'
...@@ -66,85 +65,83 @@ weightedLogJaccard sens docs cooc cooc' ngrams ngrams' ...@@ -66,85 +65,83 @@ weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
ngramsUnion :: [Int] ngramsUnion :: [Int]
ngramsUnion = union ngrams ngrams' ngramsUnion = union ngrams ngrams'
-------------------------------------- --------------------------------------
coocInter :: [Double] diagoInter :: [Double]
coocInter = elems $ map (/docs) $ filterWithKey (\(k,k') _ -> k == k') $ intersectionWith (+) cooc cooc' diagoInter = elems $ restrictKeys diago (Set.fromList ngramsInter)
-- coocInter = elems $ map (/docs) $ intersectionWith (+) cooc cooc' --------------------------------------
-------------------------------------- diagoUnion :: [Double]
coocUnion :: [Double] diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
coocUnion = elems $ map (/docs) $ filterWithKey (\(k,k') _ -> k == k') $ unionWith (+) cooc cooc' --------------------------------------
--------------------------------------
-- | To choose a proximity function
pickProximity :: Proximity -> Double -> Cooc -> Cooc -> [Int] -> [Int] -> Double
pickProximity proximity docs cooc cooc' ngrams ngrams' = case proximity of
WeightedLogJaccard sens _ _ -> weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
Hamming -> undefined
-- | To process the proximity between a current group and a pair of targets group -- | To process the proximity between a current group and a pair of targets group
toProximity :: Map Date Double -> Proximity -> PhyloGroup -> PhyloGroup -> PhyloGroup -> Double toProximity :: Double -> Map Int Double -> Proximity -> PhyloGroup -> PhyloGroup -> PhyloGroup -> Double
toProximity docs proximity ego target target' = toProximity nbDocs diago proximity ego target target' =
let docs' = sum $ elems docs case proximity of
cooc = if target == target' WeightedLogJaccard sens _ _ ->
then (target ^. phylo_groupCooc) let targetsNgrams = if target == target'
else sumCooc (target ^. phylo_groupCooc) (target' ^. phylo_groupCooc) then (target ^. phylo_groupNgrams)
ngrams = if target == target' else union (target ^. phylo_groupNgrams) (target' ^. phylo_groupNgrams)
then (target ^. phylo_groupNgrams) in weightedLogJaccard' sens nbDocs diago (ego ^. phylo_groupNgrams) targetsNgrams
else union (target ^. phylo_groupNgrams) (target' ^. phylo_groupNgrams) Hamming -> undefined
in pickProximity proximity docs' (ego ^. phylo_groupCooc) cooc (ego ^. phylo_groupNgrams) ngrams
------------------------ ------------------------
-- | Local Matching | -- -- | Local Matching | --
------------------------ ------------------------
toLastPeriod :: Filiation -> [PhyloPeriodId] -> PhyloPeriodId findLastPeriod :: Filiation -> [PhyloPeriodId] -> PhyloPeriodId
toLastPeriod fil periods = case fil of findLastPeriod fil periods = case fil of
ToParents -> head' "toLastPeriod" (sortOn fst periods) ToParents -> head' "findLastPeriod" (sortOn fst periods)
ToChilds -> last' "toLastPeriod" (sortOn fst periods) ToChilds -> last' "findLastPeriod" (sortOn fst periods)
toLazyPairs :: [Pointer] -> Filiation -> Double -> Proximity -> PhyloPeriodId -> [(PhyloGroup,PhyloGroup)] -> [(PhyloGroup,PhyloGroup)] -- | To filter pairs of candidates related to old pointers periods
toLazyPairs pointers fil thr prox prd pairs = removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> PhyloPeriodId -> [(PhyloGroup,PhyloGroup)] -> [(PhyloGroup,PhyloGroup)]
if null pointers then pairs removeOldPointers oldPointers fil thr prox prd pairs
else let rest = filterPointers prox thr pointers | null oldPointers = pairs
in if null rest | null (filterPointers prox thr oldPointers) =
then let prd' = toLastPeriod fil (map (fst . fst . fst) pointers) let lastMatchedPrd = findLastPeriod fil (map (fst . fst . fst) oldPointers)
in if prd' == prd in if lastMatchedPrd == prd
then [] then []
else filter (\(g,g') -> else filter (\(g,g') ->
case fil of case fil of
ToParents -> ((fst $ g ^. phylo_groupPeriod) < (fst prd')) ToParents -> ((fst $ g ^. phylo_groupPeriod) < (fst lastMatchedPrd))
|| ((fst $ g' ^. phylo_groupPeriod) < (fst prd')) || ((fst $ g' ^. phylo_groupPeriod) < (fst lastMatchedPrd))
ToChilds -> ((fst $ g ^. phylo_groupPeriod) > (fst prd')) ToChilds -> ((fst $ g ^. phylo_groupPeriod) > (fst lastMatchedPrd))
|| ((fst $ g' ^. phylo_groupPeriod) > (fst prd'))) pairs || ((fst $ g' ^. phylo_groupPeriod) > (fst lastMatchedPrd))) pairs
else [] | otherwise = []
-- | Find pairs of valuable candidates to be matched -- | Find pairs of valuable candidates to be matched
makePairs' :: PhyloGroup -> [PhyloGroup] -> [PhyloPeriodId] -> [Pointer] -> Filiation -> Double -> Proximity -> Map Date Double -> [(PhyloGroup,PhyloGroup)] makePairs' :: PhyloGroup -> [PhyloGroup] -> [PhyloPeriodId] -> [Pointer] -> Filiation -> Double -> Proximity -> Map Date Double -> Map Date Cooc -> [(PhyloGroup,PhyloGroup)]
makePairs' ego candidates periods pointers fil thr prox docs = makePairs' ego candidates periods oldPointers fil thr prox docs diagos =
case null periods of case null periods of
True -> [] True -> []
False -> toLazyPairs pointers fil thr prox lastPrd False -> removeOldPointers oldPointers fil thr prox lastPrd
-- | at least on of the pair candidates should be from the last added period -- | at least on of the pair candidates should be from the last added period
$ filter (\(g,g') -> ((g ^. phylo_groupPeriod) == lastPrd) $ filter (\(g,g') -> ((g ^. phylo_groupPeriod) == lastPrd)
|| ((g' ^. phylo_groupPeriod) == lastPrd)) || ((g' ^. phylo_groupPeriod) == lastPrd))
$ listToKeys $ listToKeys
$ filter (\g -> (g ^. phylo_groupPeriod == lastPrd) $ filter (\g -> let nbDocs = sum $ elems $ (filterDocs docs ([ego ^. phylo_groupPeriod, g ^. phylo_groupPeriod]))
|| ((toProximity docs prox ego ego g) >= thr)) candidates diago = reduceDiagos $ filterDiago diagos ([ego ^. phylo_groupPeriod, g ^. phylo_groupPeriod])
in (g ^. phylo_groupPeriod == lastPrd)
|| ((toProximity nbDocs diago prox ego ego g) >= thr)) candidates
where where
lastPrd :: PhyloPeriodId lastPrd :: PhyloPeriodId
lastPrd = toLastPeriod fil periods lastPrd = findLastPeriod fil periods
filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer] filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer]
filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts
phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Proximity -> Map Date Double -> Double -> PhyloGroup -> PhyloGroup reduceDiagos :: Map Date Cooc -> Map Int Double
phyloGroupMatching candidates fil proxi docs thr ego = reduceDiagos diagos = mapKeys (\(k,_) -> k)
$ foldl (\acc diago -> unionWith (+) acc diago) empty (elems diagos)
phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc -> Double -> PhyloGroup -> PhyloGroup
phyloGroupMatching candidates fil proxi docs diagos thr ego =
if (null $ filterPointers proxi thr $ getPeriodPointers fil ego) if (null $ filterPointers proxi thr $ getPeriodPointers fil ego)
-- | let's find new pointers -- | let's find new pointers
then if null nextPointers then if null nextPointers
...@@ -155,8 +152,7 @@ phyloGroupMatching candidates fil proxi docs thr ego = ...@@ -155,8 +152,7 @@ phyloGroupMatching candidates fil proxi docs thr ego =
$ groupBy (\pt pt' -> snd pt == snd pt') $ groupBy (\pt pt' -> snd pt == snd pt')
$ reverse $ sortOn snd $ head' "pointers" nextPointers $ reverse $ sortOn snd $ head' "pointers" nextPointers
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold -- | Find the first time frame where at leats one pointer satisfies the proximity threshold
else addPointers ego fil TemporalPointer else ego
$ filterPointers proxi thr $ getPeriodPointers fil ego
where where
nextPointers :: [[Pointer]] nextPointers :: [[Pointer]]
nextPointers = take 1 nextPointers = take 1
...@@ -164,13 +160,16 @@ phyloGroupMatching candidates fil proxi docs thr ego = ...@@ -164,13 +160,16 @@ phyloGroupMatching candidates fil proxi docs thr ego =
-- | for each time frame, process the proximity on relevant pairs of targeted groups -- | for each time frame, process the proximity on relevant pairs of targeted groups
$ scanl (\acc groups -> $ scanl (\acc groups ->
let periods = nub $ map _phylo_groupPeriod $ concat groups let periods = nub $ map _phylo_groupPeriod $ concat groups
docs' = (filterDocs docs ([ego ^. phylo_groupPeriod] ++ periods)) nbdocs = sum $ elems $ (filterDocs docs ([ego ^. phylo_groupPeriod] ++ periods))
pairs = makePairs' ego (concat groups) periods (getPeriodPointers fil ego) fil thr proxi docs diago = reduceDiagos
$ filterDiago diagos ([ego ^. phylo_groupPeriod] ++ periods)
-- | important resize nbdocs et diago dans le make pairs
pairs = makePairs' ego (concat groups) periods (getPeriodPointers fil ego) fil thr proxi docs diagos
in acc ++ ( filterPointers proxi thr in acc ++ ( filterPointers proxi thr
$ concat $ concat
$ map (\(c,c') -> $ map (\(c,c') ->
-- | process the proximity between the current group and a pair of candidates -- | process the proximity between the current group and a pair of candidates
let proximity = toProximity docs' proxi ego c c' let proximity = toProximity nbdocs diago proxi ego c c'
in if (c == c') in if (c == c')
then [(getGroupId c,proximity)] then [(getGroupId c,proximity)]
else [(getGroupId c,proximity),(getGroupId c',proximity)] ) pairs )) [] else [(getGroupId c,proximity),(getGroupId c',proximity)] ) pairs )) []
...@@ -180,6 +179,9 @@ phyloGroupMatching candidates fil proxi docs thr ego = ...@@ -180,6 +179,9 @@ phyloGroupMatching candidates fil proxi docs thr ego =
filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
filterDocs d pds = restrictKeys d $ periodsToYears pds filterDocs d pds = restrictKeys d $ periodsToYears pds
filterDiago :: Map Date Cooc -> [PhyloPeriodId] -> Map Date Cooc
filterDiago diago pds = restrictKeys diago $ periodsToYears pds
----------------------------- -----------------------------
-- | Matching Processing | -- -- | Matching Processing | --
...@@ -200,26 +202,29 @@ getCandidates ego targets = ...@@ -200,26 +202,29 @@ getCandidates ego targets =
) groups') targets ) groups') targets
phyloBranchMatching :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> [PhyloGroup] -> [PhyloGroup] matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
phyloBranchMatching frame periods proximity thr docs branch = matchGroupsToGroups frame periods proximity thr docs coocs groups =
-- traceBranchMatching proximity thr let groups' = groupByField _phylo_groupPeriod groups
matchByPeriods in foldl' (\acc prd ->
$ groupByField _phylo_groupPeriod branch let -- | 1) find the parents/childs matching periods
where periodsPar = getNextPeriods ToParents frame prd periods
-------------------------------------- periodsChi = getNextPeriods ToChilds frame prd periods
matchByPeriods :: Map PhyloPeriodId [PhyloGroup] -> [PhyloGroup] -- | 2) find the parents/childs matching candidates
matchByPeriods branch' = foldl' (\acc prd -> candidatesPar = map (\prd' -> findWithDefault [] prd' groups') periodsPar
let periodsPar = getNextPeriods ToParents frame prd periods candidatesChi = map (\prd' -> findWithDefault [] prd' groups') periodsChi
periodsChi = getNextPeriods ToChilds frame prd periods -- | 3) find the parents/child number of docs by years
candidatesPar = map (\prd' -> findWithDefault [] prd' branch') periodsPar docsPar = filterDocs docs ([prd] ++ periodsPar)
candidatesChi = map (\prd' -> findWithDefault [] prd' branch') periodsChi docsChi = filterDocs docs ([prd] ++ periodsChi)
docsPar = filterDocs docs ([prd] ++ periodsPar) -- | 4) find the parents/child diago by years
docsChi = filterDocs docs ([prd] ++ periodsChi) diagoPar = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
egos = map (\ego -> phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar thr diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
$ phyloGroupMatching (getCandidates ego candidatesChi) ToChilds proximity docsChi thr ego) -- | 5) match in parallel all the groups (egos) to their possible candidates
$ findWithDefault [] prd branch' egos = map (\ego -> phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar diagoPar thr
egos' = egos `using` parList rdeepseq $ phyloGroupMatching (getCandidates ego candidatesChi) ToChilds proximity docsChi diagoChi thr ego)
in acc ++ egos' ) [] periods $ findWithDefault [] prd groups'
egos' = egos `using` parList rdeepseq
in acc ++ egos'
) [] periods
----------------------- -----------------------
...@@ -270,6 +275,7 @@ groupsToBranches groups = ...@@ -270,6 +275,7 @@ groupsToBranches groups =
++ (map fst $ group ^. phylo_groupPeriodParents) ++ (map fst $ group ^. phylo_groupPeriodParents)
++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups ++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
-- | first find the related components by inside each ego's period -- | first find the related components by inside each ego's period
-- | a supprimer
graph' = map relatedComponents egos graph' = map relatedComponents egos
-- | then run it for the all the periods -- | then run it for the all the periods
graph = zip [1..] graph = zip [1..]
...@@ -288,8 +294,13 @@ reduceFrequency frequency branches = ...@@ -288,8 +294,13 @@ reduceFrequency frequency branches =
updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]] updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
updateThr thr branches = map (\b -> map (\g -> g & phylo_groupMeta .~ (singleton "thr" [thr])) b) branches updateThr thr branches = map (\b -> map (\g -> g & phylo_groupMeta .~ (singleton "thr" [thr])) b) branches
seqMatching :: Proximity -> Double -> Map Int Double -> Int -> Double -> Int -> Map Date Double -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
seqMatching proximity beta frequency minBranch egoThr frame docs periods done ego rest = -- | Sequentially break each branch of a phylo where
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
breakBranches proximity beta frequency minBranch thr frame docs coocs periods done ego rest =
-- | 1) keep or not the new division of ego -- | 1) keep or not the new division of ego
let done' = done ++ (if snd ego let done' = done ++ (if snd ego
then (if ((null (fst ego')) || (quality > quality')) then (if ((null (fst ego')) || (quality > quality'))
...@@ -310,8 +321,8 @@ seqMatching proximity beta frequency minBranch egoThr frame docs periods done eg ...@@ -310,8 +321,8 @@ seqMatching proximity beta frequency minBranch egoThr frame docs periods done eg
-- | 2) if there is no more branches in rest then return else continue -- | 2) if there is no more branches in rest then return else continue
if null rest if null rest
then done' then done'
else seqMatching proximity beta frequency minBranch egoThr frame docs periods else breakBranches proximity beta frequency minBranch thr frame docs coocs periods
done' (head' "seqMatching" rest) (tail' "seqMatching" rest) done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
where where
-------------------------------------- --------------------------------------
quality :: Double quality :: Double
...@@ -320,11 +331,11 @@ seqMatching proximity beta frequency minBranch egoThr frame docs periods done eg ...@@ -320,11 +331,11 @@ seqMatching proximity beta frequency minBranch egoThr frame docs periods done eg
ego' :: ([[PhyloGroup]],[[PhyloGroup]]) ego' :: ([[PhyloGroup]],[[PhyloGroup]])
ego' = ego' =
let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ phyloBranchMatching frame periods proximity egoThr docs (fst ego) $ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego)
branches' = branches `using` parList rdeepseq branches' = branches `using` parList rdeepseq
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch) in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
$ if (length branches' > 1) $ if (length branches' > 1)
then updateThr egoThr branches' then updateThr thr branches'
else branches' else branches'
-------------------------------------- --------------------------------------
quality' :: Double quality' :: Double
...@@ -332,15 +343,17 @@ seqMatching proximity beta frequency minBranch egoThr frame docs periods done eg ...@@ -332,15 +343,17 @@ seqMatching proximity beta frequency minBranch egoThr frame docs periods done eg
((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest)) ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
recursiveMatching' :: Proximity -> Double -> Int -> Map Int Double -> Double -> Int -> [PhyloPeriodId] -> Map Date Double -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)] seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
recursiveMatching' proximity beta minBranch frequency egoThr frame periods docs branches = seaLevelMatching proximity beta minBranch frequency thr frame periods docs coocs branches =
if (egoThr >= 1) || ((not . or) $ map snd branches) -- | if there is no branch to break or if sea level > 1 then end
if (thr >= 1) || ((not . or) $ map snd branches)
then branches then branches
else else
let branches' = seqMatching proximity beta frequency minBranch egoThr frame docs periods -- | break all the possible branches at the current sea level
[] (head' "recursiveMatching" branches) (tail' "recursiveMatching" branches) let branches' = breakBranches proximity beta frequency minBranch thr frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches') frequency' = reduceFrequency frequency (map fst branches')
in recursiveMatching' proximity beta minBranch frequency' (egoThr + (getThresholdStep proximity)) frame periods docs branches' in seaLevelMatching proximity beta minBranch frequency' (thr + (getThresholdStep proximity)) frame periods docs coocs branches'
temporalMatching :: Phylo -> Phylo temporalMatching :: Phylo -> Phylo
...@@ -348,23 +361,27 @@ temporalMatching phylo = updatePhyloGroups 1 ...@@ -348,23 +361,27 @@ temporalMatching phylo = updatePhyloGroups 1
(fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches) (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
phylo phylo
where where
-- | 2) init the recursiveMatching -- | 2) process the temporal matching by elevating sea level
branches :: [[PhyloGroup]] branches :: [[PhyloGroup]]
branches = map fst branches = map fst
$ recursiveMatching' (phyloProximity $ getConfig phylo) $ seaLevelMatching (phyloProximity $ getConfig phylo)
(_qua_granularity $ phyloQuality $ getConfig phylo) (_qua_granularity $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo) (_qua_minBranch $ phyloQuality $ getConfig phylo)
(phylo ^. phylo_termFreq) (phylo ^. phylo_termFreq)
(getThresholdInit $ phyloProximity $ getConfig phylo) (getThresholdInit $ phyloProximity $ getConfig phylo)
(getTimeFrame $ timeUnit $ getConfig phylo) (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (getPeriodIds phylo)
(phylo ^. phylo_timeDocs) (phylo ^. phylo_timeDocs)
groups (phylo ^. phylo_timeCooc)
groups
-- | 1) for each group process an initial temporal Matching -- | 1) for each group process an initial temporal Matching
-- | here we suppose that all the groups of level 1 are part of the same big branch
groups :: [([PhyloGroup],Bool)] groups :: [([PhyloGroup],Bool)]
groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo))) groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
$ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ phyloBranchMatching (getTimeFrame $ timeUnit $ getConfig phylo) (getPeriodIds phylo) $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
(phyloProximity $ getConfig phylo) (getThresholdInit $ phyloProximity $ getConfig phylo) (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
(phylo ^. phylo_timeDocs) (getThresholdInit $ phyloProximity $ getConfig phylo)
(traceTemporalMatching $ getGroupsFromLevel 1 phylo) (phylo ^. phylo_timeDocs)
\ No newline at end of file (phylo ^. phylo_timeCooc)
(traceTemporalMatching $ getGroupsFromLevel 1 phylo)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment