Commit f5393047 authored by qlobbe's avatar qlobbe

fix the weightedlogjaccard

parent f49465e4
......@@ -133,8 +133,8 @@ defaultConfig =
, phyloName = pack "Default Phylo"
, phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 0 0.1
, phyloSynchrony = ByProximityThreshold 0.1 0 SiblingBranches MergeAllGroups
, phyloQuality = Quality 0.1 1
, phyloSynchrony = ByProximityThreshold 0.2 0 SiblingBranches MergeAllGroups
, phyloQuality = Quality 10 1
, timeUnit = Year 3 1 5
, clique = Fis 1 5
, exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
......
......@@ -171,11 +171,15 @@ exportToDot phylo export =
, Ratio FillRatio
, Style [SItem Filled []],Color [toWColor White]]
-- | home made attributes
<> [(toAttr (fromStrict "nbDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
<> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "quaFactor") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
,(toAttr (fromStrict "quaGranularity") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
])
......
......@@ -91,7 +91,7 @@ cliqueToGroup fis thr pId lvl idx fdt coocs =
(fis ^. phyloClique_support)
ngrams
(ngramsToCooc ngrams coocs)
(1,[0])
(1,[0]) -- | branchid (lvl,[path in the branching tree])
(singleton "thr" [thr])
[] [] [] []
......
......@@ -227,6 +227,8 @@ sumCooc cooc cooc' = unionWith (+) cooc cooc'
getTrace :: Cooc -> Double
getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
coocToDiago :: Cooc -> Cooc
coocToDiago cooc = filterWithKey (\(k,k') _ -> k == k') cooc
-- | To build the local cooc matrix of each phylogroup
ngramsToCooc :: [Int] -> [Cooc] -> Cooc
......
......@@ -18,7 +18,7 @@ module Gargantext.Viz.Phylo.SynchronicClustering where
import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard)
import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
import Gargantext.Viz.Phylo.PhyloExport (processDynamics)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn, init, all, group, maximum, groupBy)
......@@ -56,26 +56,16 @@ mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq) ids
groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches' groups =
-- | run the related component algorithm
let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
$ sortOn (\gs -> fst $ fst $ head' "egos" gs)
$ map (\g -> [getGroupId g]
++ (map fst $ g ^. phylo_groupPeriodParents)
++ (map fst $ g ^. phylo_groupPeriodChilds) ) $ elems groups
-- | first find the related components by inside each ego's period
graph' = map relatedComponents egos
-- | then run it for the all the periods
graph = relatedComponents $ concat (graph' `using` parList rdeepseq)
let egos = map (\g -> [getGroupId g]
++ (map fst $ g ^. phylo_groupPeriodParents)
++ (map fst $ g ^. phylo_groupPeriodChilds) ) $ elems groups
graph = relatedComponents egos
-- | update each group's branch id
in map (\ids ->
-- intervenir ici
let groups' = elems $ restrictKeys groups (Set.fromList ids)
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl + 1,bId))) groups') graph
-- toBranchId :: PhyloGroup -> PhyloBranchId
-- toBranchId child = ((child ^. phylo_groupLevel) + 1, snd (child ^. phylo_groupBranchId))
getLastThr :: [PhyloGroup] -> Double
getLastThr childs = maximum $ concat $ map (\g -> (g ^. phylo_groupMeta) ! "thr") childs
......@@ -157,8 +147,8 @@ toDiamonds groups = foldl' (\acc groups' ->
$ foldl' (\acc g -> acc ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodParents) ) [] groups
groupsToEdges :: Proximity -> Synchrony -> Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
groupsToEdges prox sync docs groups =
groupsToEdges :: Proximity -> Synchrony -> Double -> Map Int Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
groupsToEdges prox sync nbDocs diago groups =
case sync of
ByProximityThreshold thr sens _ strat ->
filter (\(_,w) -> w >= thr)
......@@ -174,8 +164,7 @@ groupsToEdges prox sync docs groups =
toEdges sens edges =
case prox of
WeightedLogJaccard _ _ _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard sens docs
(g ^. phylo_groupCooc) (g' ^. phylo_groupCooc)
((g,g'), weightedLogJaccard' sens nbDocs diago
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
_ -> undefined
......@@ -191,15 +180,16 @@ toParentId :: PhyloGroup -> PhyloGroupId
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex)
reduceGroups :: Proximity -> Synchrony -> Map Date Double -> [PhyloGroup] -> [PhyloGroup]
reduceGroups prox sync docs branch =
reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
reduceGroups prox sync docs diagos branch =
-- | 1) reduce a branch as a set of periods & groups
let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems)
$ mapWithKey (\prd groups ->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
let edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) groups
let diago = reduceDiagos $ filterDiago diagos [prd]
edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) diago groups
in map (\comp ->
-- | 4) add to each groups their futur level parent group
let parentId = toParentId (head' "parentId" comp)
......@@ -233,7 +223,8 @@ synchronicClustering phylo =
let prox = phyloProximity $ getConfig phylo
sync = phyloSynchrony $ getConfig phylo
docs = phylo ^. phylo_timeDocs
newBranches = map (\branch -> reduceGroups prox sync docs branch)
diagos = map coocToDiago $ phylo ^. phylo_timeCooc
newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
$ map processDynamics
$ adjustClustering sync (getPhyloThresholdStep phylo)
$ phyloToLastBranches
......
......@@ -16,7 +16,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.TemporalMatching where
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, or)
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, intersectionWith, findWithDefault, keys, (!), filterWithKey, singleton)
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), singleton, empty, mapKeys)
import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo
......@@ -35,30 +35,29 @@ import qualified Data.Set as Set
-------------------
-- | Process the inverse sumLog
sumInvLog :: Double -> [Double] -> Double
sumInvLog s l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
-- | To compute a jaccard similarity between two lists
jaccard :: [Int] -> [Int] -> Double
jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
-- | Process the sumLog
sumLog :: Double -> [Double] -> Double
sumLog s l = foldl (\mem x -> mem + log (s + x)) 0 l
-- | Process the inverse sumLog
sumInvLog' :: Double -> Double -> [Double] -> Double
sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + s) / log (nb + s)))) 0 diago
-- | To compute a jaccard similarity between two lists
jaccard :: [Int] -> [Int] -> Double
jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
-- | Process the sumLog
sumLog' :: Double -> Double -> [Double] -> Double
sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + s) / log (nb + s))) 0 diago
-- | To process a WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard :: Double -> Double -> Cooc -> Cooc -> [Int] -> [Int] -> Double
weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
weightedLogJaccard' sens nbDocs diago ngrams ngrams'
| null ngramsInter = 0
| ngramsInter == ngramsUnion = 1
| sens == 0 = jaccard ngramsInter ngramsUnion
| sens > 0 = (sumInvLog sens coocInter) / (sumInvLog sens coocUnion)
| otherwise = (sumLog sens coocInter) / (sumLog sens coocUnion)
where
| sens > 0 = (sumInvLog' sens nbDocs diagoInter) / (sumInvLog' sens nbDocs diagoUnion)
| otherwise = (sumLog' sens nbDocs diagoInter) / (sumLog' sens nbDocs diagoUnion)
where
--------------------------------------
ngramsInter :: [Int]
ngramsInter = intersect ngrams ngrams'
......@@ -66,85 +65,83 @@ weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
ngramsUnion :: [Int]
ngramsUnion = union ngrams ngrams'
--------------------------------------
coocInter :: [Double]
coocInter = elems $ map (/docs) $ filterWithKey (\(k,k') _ -> k == k') $ intersectionWith (+) cooc cooc'
-- coocInter = elems $ map (/docs) $ intersectionWith (+) cooc cooc'
--------------------------------------
coocUnion :: [Double]
coocUnion = elems $ map (/docs) $ filterWithKey (\(k,k') _ -> k == k') $ unionWith (+) cooc cooc'
--------------------------------------
-- | To choose a proximity function
pickProximity :: Proximity -> Double -> Cooc -> Cooc -> [Int] -> [Int] -> Double
pickProximity proximity docs cooc cooc' ngrams ngrams' = case proximity of
WeightedLogJaccard sens _ _ -> weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
Hamming -> undefined
diagoInter :: [Double]
diagoInter = elems $ restrictKeys diago (Set.fromList ngramsInter)
--------------------------------------
diagoUnion :: [Double]
diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
--------------------------------------
-- | To process the proximity between a current group and a pair of targets group
toProximity :: Map Date Double -> Proximity -> PhyloGroup -> PhyloGroup -> PhyloGroup -> Double
toProximity docs proximity ego target target' =
let docs' = sum $ elems docs
cooc = if target == target'
then (target ^. phylo_groupCooc)
else sumCooc (target ^. phylo_groupCooc) (target' ^. phylo_groupCooc)
ngrams = if target == target'
then (target ^. phylo_groupNgrams)
else union (target ^. phylo_groupNgrams) (target' ^. phylo_groupNgrams)
in pickProximity proximity docs' (ego ^. phylo_groupCooc) cooc (ego ^. phylo_groupNgrams) ngrams
toProximity :: Double -> Map Int Double -> Proximity -> PhyloGroup -> PhyloGroup -> PhyloGroup -> Double
toProximity nbDocs diago proximity ego target target' =
case proximity of
WeightedLogJaccard sens _ _ ->
let targetsNgrams = if target == target'
then (target ^. phylo_groupNgrams)
else union (target ^. phylo_groupNgrams) (target' ^. phylo_groupNgrams)
in weightedLogJaccard' sens nbDocs diago (ego ^. phylo_groupNgrams) targetsNgrams
Hamming -> undefined
------------------------
-- | Local Matching | --
------------------------
toLastPeriod :: Filiation -> [PhyloPeriodId] -> PhyloPeriodId
toLastPeriod fil periods = case fil of
ToParents -> head' "toLastPeriod" (sortOn fst periods)
ToChilds -> last' "toLastPeriod" (sortOn fst periods)
toLazyPairs :: [Pointer] -> Filiation -> Double -> Proximity -> PhyloPeriodId -> [(PhyloGroup,PhyloGroup)] -> [(PhyloGroup,PhyloGroup)]
toLazyPairs pointers fil thr prox prd pairs =
if null pointers then pairs
else let rest = filterPointers prox thr pointers
in if null rest
then let prd' = toLastPeriod fil (map (fst . fst . fst) pointers)
in if prd' == prd
then []
else filter (\(g,g') ->
case fil of
ToParents -> ((fst $ g ^. phylo_groupPeriod) < (fst prd'))
|| ((fst $ g' ^. phylo_groupPeriod) < (fst prd'))
ToChilds -> ((fst $ g ^. phylo_groupPeriod) > (fst prd'))
|| ((fst $ g' ^. phylo_groupPeriod) > (fst prd'))) pairs
else []
findLastPeriod :: Filiation -> [PhyloPeriodId] -> PhyloPeriodId
findLastPeriod fil periods = case fil of
ToParents -> head' "findLastPeriod" (sortOn fst periods)
ToChilds -> last' "findLastPeriod" (sortOn fst periods)
-- | To filter pairs of candidates related to old pointers periods
removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> PhyloPeriodId -> [(PhyloGroup,PhyloGroup)] -> [(PhyloGroup,PhyloGroup)]
removeOldPointers oldPointers fil thr prox prd pairs
| null oldPointers = pairs
| null (filterPointers prox thr oldPointers) =
let lastMatchedPrd = findLastPeriod fil (map (fst . fst . fst) oldPointers)
in if lastMatchedPrd == prd
then []
else filter (\(g,g') ->
case fil of
ToParents -> ((fst $ g ^. phylo_groupPeriod) < (fst lastMatchedPrd))
|| ((fst $ g' ^. phylo_groupPeriod) < (fst lastMatchedPrd))
ToChilds -> ((fst $ g ^. phylo_groupPeriod) > (fst lastMatchedPrd))
|| ((fst $ g' ^. phylo_groupPeriod) > (fst lastMatchedPrd))) pairs
| otherwise = []
-- | Find pairs of valuable candidates to be matched
makePairs' :: PhyloGroup -> [PhyloGroup] -> [PhyloPeriodId] -> [Pointer] -> Filiation -> Double -> Proximity -> Map Date Double -> [(PhyloGroup,PhyloGroup)]
makePairs' ego candidates periods pointers fil thr prox docs =
makePairs' :: PhyloGroup -> [PhyloGroup] -> [PhyloPeriodId] -> [Pointer] -> Filiation -> Double -> Proximity -> Map Date Double -> Map Date Cooc -> [(PhyloGroup,PhyloGroup)]
makePairs' ego candidates periods oldPointers fil thr prox docs diagos =
case null periods of
True -> []
False -> toLazyPairs pointers fil thr prox lastPrd
-- | at least on of the pair candidates should be from the last added period
False -> removeOldPointers oldPointers fil thr prox lastPrd
-- | at least on of the pair candidates should be from the last added period
$ filter (\(g,g') -> ((g ^. phylo_groupPeriod) == lastPrd)
|| ((g' ^. phylo_groupPeriod) == lastPrd))
$ listToKeys
$ filter (\g -> (g ^. phylo_groupPeriod == lastPrd)
|| ((toProximity docs prox ego ego g) >= thr)) candidates
$ filter (\g -> let nbDocs = sum $ elems $ (filterDocs docs ([ego ^. phylo_groupPeriod, g ^. phylo_groupPeriod]))
diago = reduceDiagos $ filterDiago diagos ([ego ^. phylo_groupPeriod, g ^. phylo_groupPeriod])
in (g ^. phylo_groupPeriod == lastPrd)
|| ((toProximity nbDocs diago prox ego ego g) >= thr)) candidates
where
lastPrd :: PhyloPeriodId
lastPrd = toLastPeriod fil periods
lastPrd = findLastPeriod fil periods
filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer]
filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts
phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Proximity -> Map Date Double -> Double -> PhyloGroup -> PhyloGroup
phyloGroupMatching candidates fil proxi docs thr ego =
reduceDiagos :: Map Date Cooc -> Map Int Double
reduceDiagos diagos = mapKeys (\(k,_) -> k)
$ foldl (\acc diago -> unionWith (+) acc diago) empty (elems diagos)
phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc -> Double -> PhyloGroup -> PhyloGroup
phyloGroupMatching candidates fil proxi docs diagos thr ego =
if (null $ filterPointers proxi thr $ getPeriodPointers fil ego)
-- | let's find new pointers
then if null nextPointers
......@@ -155,8 +152,7 @@ phyloGroupMatching candidates fil proxi docs thr ego =
$ groupBy (\pt pt' -> snd pt == snd pt')
$ reverse $ sortOn snd $ head' "pointers" nextPointers
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
else addPointers ego fil TemporalPointer
$ filterPointers proxi thr $ getPeriodPointers fil ego
else ego
where
nextPointers :: [[Pointer]]
nextPointers = take 1
......@@ -164,13 +160,16 @@ phyloGroupMatching candidates fil proxi docs thr ego =
-- | for each time frame, process the proximity on relevant pairs of targeted groups
$ scanl (\acc groups ->
let periods = nub $ map _phylo_groupPeriod $ concat groups
docs' = (filterDocs docs ([ego ^. phylo_groupPeriod] ++ periods))
pairs = makePairs' ego (concat groups) periods (getPeriodPointers fil ego) fil thr proxi docs
nbdocs = sum $ elems $ (filterDocs docs ([ego ^. phylo_groupPeriod] ++ periods))
diago = reduceDiagos
$ filterDiago diagos ([ego ^. phylo_groupPeriod] ++ periods)
-- | important resize nbdocs et diago dans le make pairs
pairs = makePairs' ego (concat groups) periods (getPeriodPointers fil ego) fil thr proxi docs diagos
in acc ++ ( filterPointers proxi thr
$ concat
$ map (\(c,c') ->
-- | process the proximity between the current group and a pair of candidates
let proximity = toProximity docs' proxi ego c c'
let proximity = toProximity nbdocs diago proxi ego c c'
in if (c == c')
then [(getGroupId c,proximity)]
else [(getGroupId c,proximity),(getGroupId c',proximity)] ) pairs )) []
......@@ -180,6 +179,9 @@ phyloGroupMatching candidates fil proxi docs thr ego =
filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
filterDocs d pds = restrictKeys d $ periodsToYears pds
filterDiago :: Map Date Cooc -> [PhyloPeriodId] -> Map Date Cooc
filterDiago diago pds = restrictKeys diago $ periodsToYears pds
-----------------------------
-- | Matching Processing | --
......@@ -200,26 +202,29 @@ getCandidates ego targets =
) groups') targets
phyloBranchMatching :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> [PhyloGroup] -> [PhyloGroup]
phyloBranchMatching frame periods proximity thr docs branch =
-- traceBranchMatching proximity thr
matchByPeriods
$ groupByField _phylo_groupPeriod branch
where
--------------------------------------
matchByPeriods :: Map PhyloPeriodId [PhyloGroup] -> [PhyloGroup]
matchByPeriods branch' = foldl' (\acc prd ->
let periodsPar = getNextPeriods ToParents frame prd periods
periodsChi = getNextPeriods ToChilds frame prd periods
candidatesPar = map (\prd' -> findWithDefault [] prd' branch') periodsPar
candidatesChi = map (\prd' -> findWithDefault [] prd' branch') periodsChi
docsPar = filterDocs docs ([prd] ++ periodsPar)
docsChi = filterDocs docs ([prd] ++ periodsChi)
egos = map (\ego -> phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar thr
$ phyloGroupMatching (getCandidates ego candidatesChi) ToChilds proximity docsChi thr ego)
$ findWithDefault [] prd branch'
egos' = egos `using` parList rdeepseq
in acc ++ egos' ) [] periods
matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
matchGroupsToGroups frame periods proximity thr docs coocs groups =
let groups' = groupByField _phylo_groupPeriod groups
in foldl' (\acc prd ->
let -- | 1) find the parents/childs matching periods
periodsPar = getNextPeriods ToParents frame prd periods
periodsChi = getNextPeriods ToChilds frame prd periods
-- | 2) find the parents/childs matching candidates
candidatesPar = map (\prd' -> findWithDefault [] prd' groups') periodsPar
candidatesChi = map (\prd' -> findWithDefault [] prd' groups') periodsChi
-- | 3) find the parents/child number of docs by years
docsPar = filterDocs docs ([prd] ++ periodsPar)
docsChi = filterDocs docs ([prd] ++ periodsChi)
-- | 4) find the parents/child diago by years
diagoPar = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
-- | 5) match in parallel all the groups (egos) to their possible candidates
egos = map (\ego -> phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar diagoPar thr
$ phyloGroupMatching (getCandidates ego candidatesChi) ToChilds proximity docsChi diagoChi thr ego)
$ findWithDefault [] prd groups'
egos' = egos `using` parList rdeepseq
in acc ++ egos'
) [] periods
-----------------------
......@@ -270,6 +275,7 @@ groupsToBranches groups =
++ (map fst $ group ^. phylo_groupPeriodParents)
++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
-- | first find the related components by inside each ego's period
-- | a supprimer
graph' = map relatedComponents egos
-- | then run it for the all the periods
graph = zip [1..]
......@@ -288,8 +294,13 @@ reduceFrequency frequency branches =
updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
updateThr thr branches = map (\b -> map (\g -> g & phylo_groupMeta .~ (singleton "thr" [thr])) b) branches
seqMatching :: Proximity -> Double -> Map Int Double -> Int -> Double -> Int -> Map Date Double -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
seqMatching proximity beta frequency minBranch egoThr frame docs periods done ego rest =
-- | Sequentially break each branch of a phylo where
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
breakBranches proximity beta frequency minBranch thr frame docs coocs periods done ego rest =
-- | 1) keep or not the new division of ego
let done' = done ++ (if snd ego
then (if ((null (fst ego')) || (quality > quality'))
......@@ -310,8 +321,8 @@ seqMatching proximity beta frequency minBranch egoThr frame docs periods done eg
-- | 2) if there is no more branches in rest then return else continue
if null rest
then done'
else seqMatching proximity beta frequency minBranch egoThr frame docs periods
done' (head' "seqMatching" rest) (tail' "seqMatching" rest)
else breakBranches proximity beta frequency minBranch thr frame docs coocs periods
done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
where
--------------------------------------
quality :: Double
......@@ -320,11 +331,11 @@ seqMatching proximity beta frequency minBranch egoThr frame docs periods done eg
ego' :: ([[PhyloGroup]],[[PhyloGroup]])
ego' =
let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ phyloBranchMatching frame periods proximity egoThr docs (fst ego)
$ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego)
branches' = branches `using` parList rdeepseq
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
$ if (length branches' > 1)
then updateThr egoThr branches'
then updateThr thr branches'
else branches'
--------------------------------------
quality' :: Double
......@@ -332,15 +343,17 @@ seqMatching proximity beta frequency minBranch egoThr frame docs periods done eg
((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
recursiveMatching' :: Proximity -> Double -> Int -> Map Int Double -> Double -> Int -> [PhyloPeriodId] -> Map Date Double -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
recursiveMatching' proximity beta minBranch frequency egoThr frame periods docs branches =
if (egoThr >= 1) || ((not . or) $ map snd branches)
seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
seaLevelMatching proximity beta minBranch frequency thr frame periods docs coocs branches =
-- | if there is no branch to break or if sea level > 1 then end
if (thr >= 1) || ((not . or) $ map snd branches)
then branches
else
let branches' = seqMatching proximity beta frequency minBranch egoThr frame docs periods
[] (head' "recursiveMatching" branches) (tail' "recursiveMatching" branches)
-- | break all the possible branches at the current sea level
let branches' = breakBranches proximity beta frequency minBranch thr frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches')
in recursiveMatching' proximity beta minBranch frequency' (egoThr + (getThresholdStep proximity)) frame periods docs branches'
in seaLevelMatching proximity beta minBranch frequency' (thr + (getThresholdStep proximity)) frame periods docs coocs branches'
temporalMatching :: Phylo -> Phylo
......@@ -348,23 +361,27 @@ temporalMatching phylo = updatePhyloGroups 1
(fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
phylo
where
-- | 2) init the recursiveMatching
-- | 2) process the temporal matching by elevating sea level
branches :: [[PhyloGroup]]
branches = map fst
$ recursiveMatching' (phyloProximity $ getConfig phylo)
(_qua_granularity $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo)
(phylo ^. phylo_termFreq)
(getThresholdInit $ phyloProximity $ getConfig phylo)
(getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo)
(phylo ^. phylo_timeDocs)
groups
$ seaLevelMatching (phyloProximity $ getConfig phylo)
(_qua_granularity $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo)
(phylo ^. phylo_termFreq)
(getThresholdInit $ phyloProximity $ getConfig phylo)
(getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo)
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
groups
-- | 1) for each group process an initial temporal Matching
-- | here we suppose that all the groups of level 1 are part of the same big branch
groups :: [([PhyloGroup],Bool)]
groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
$ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ phyloBranchMatching (getTimeFrame $ timeUnit $ getConfig phylo) (getPeriodIds phylo)
(phyloProximity $ getConfig phylo) (getThresholdInit $ phyloProximity $ getConfig phylo)
(phylo ^. phylo_timeDocs)
(traceTemporalMatching $ getGroupsFromLevel 1 phylo)
\ No newline at end of file
$ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (phyloProximity $ getConfig phylo)
(getThresholdInit $ phyloProximity $ getConfig phylo)
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
(traceTemporalMatching $ getGroupsFromLevel 1 phylo)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment