Commit adf09142 authored by qlobbe's avatar qlobbe

fix accuracy

parent 09db0b41
Pipeline #589 failed with stage
...@@ -125,7 +125,7 @@ defaultConfig = ...@@ -125,7 +125,7 @@ defaultConfig =
, phyloLevel = 2 , phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 0 0.1 , phyloProximity = WeightedLogJaccard 10 0 0.1
, phyloSynchrony = ByProximityDistribution 0 , phyloSynchrony = ByProximityDistribution 0
, phyloQuality = Quality 0.1 1 , phyloQuality = Quality 0.5 3
, timeUnit = Year 3 1 5 , timeUnit = Year 3 1 5
, contextualUnit = Fis 1 5 , contextualUnit = Fis 1 5
, exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2] , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
......
...@@ -182,13 +182,15 @@ exportToDot phylo export = ...@@ -182,13 +182,15 @@ exportToDot phylo export =
graphAttrs [Rank SameRank] graphAttrs [Rank SameRank]
-- | 3) group the branches by hierarchy -- | 3) group the branches by hierarchy
mapM (\branches -> -- mapM (\branches ->
subgraph (Str "Branches clade") $ do -- subgraph (Str "Branches clade") $ do
graphAttrs [Rank SameRank] -- graphAttrs [Rank SameRank]
-- | 4) create a node for each branch -- -- | 4) create a node for each branch
mapM branchToDotNode branches -- mapM branchToDotNode branches
) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
mapM branchToDotNode $ export ^. export_branches
-- | 5) create a layer for each period -- | 5) create a layer for each period
_ <- mapM (\period -> _ <- mapM (\period ->
......
...@@ -15,7 +15,7 @@ Portability : POSIX ...@@ -15,7 +15,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.TemporalMatching where module Gargantext.Viz.Phylo.TemporalMatching where
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile) import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, delete)
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, intersectionWith, findWithDefault, keys, (!), filterWithKey) import Data.Map (Map, fromList, elems, restrictKeys, unionWith, intersectionWith, findWithDefault, keys, (!), filterWithKey)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -242,13 +242,12 @@ count x = length . filter (== x) ...@@ -242,13 +242,12 @@ count x = length . filter (== x)
termFreq' :: Int -> [PhyloGroup] -> Double termFreq' :: Int -> [PhyloGroup] -> Double
termFreq' term groups = termFreq' term groups =
let ngrams = concat $ map _phylo_groupNgrams groups let ngrams = concat $ map _phylo_groupNgrams groups
in log((fromIntegral $ count term ngrams) in log ((fromIntegral $ count term ngrams)
/ (fromIntegral $ length ngrams)) / (fromIntegral $ length ngrams))
relevantBranches :: Int -> Int -> [[PhyloGroup]] -> [[PhyloGroup]] relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]]
relevantBranches term thr branches = relevantBranches term branches =
filter (\groups -> (length groups >= thr) filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches
&& (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches
branchCov' :: [PhyloGroup] -> [[PhyloGroup]] -> Double branchCov' :: [PhyloGroup] -> [[PhyloGroup]] -> Double
branchCov' branch branches = branchCov' branch branches =
...@@ -256,7 +255,7 @@ branchCov' branch branches = ...@@ -256,7 +255,7 @@ branchCov' branch branches =
toRecall :: Double -> Int -> Int -> [[PhyloGroup]] -> Double toRecall :: Double -> Int -> Int -> [[PhyloGroup]] -> Double
toRecall freq term thr branches = toRecall freq term border branches =
-- | given a random term in a phylo -- | given a random term in a phylo
freq freq
-- | for each relevant branches -- | for each relevant branches
...@@ -265,51 +264,52 @@ toRecall freq term thr branches = ...@@ -265,51 +264,52 @@ toRecall freq term thr branches =
((branchCov' branch branches') / (sum $ map (\b -> branchCov' b branches') branches')) ((branchCov' branch branches') / (sum $ map (\b -> branchCov' b branches') branches'))
-- | compute the local recall -- | compute the local recall
* ( (fromIntegral $ length $ filter (\group -> elem term $ group ^. phylo_groupNgrams) branch) * ( (fromIntegral $ length $ filter (\group -> elem term $ group ^. phylo_groupNgrams) branch)
/ (fromIntegral $ length $ filter (\group -> elem term $ group ^. phylo_groupNgrams) $ concat branches'))) branches') / ( (fromIntegral $ length $ filter (\group -> elem term $ group ^. phylo_groupNgrams) $ concat branches')
-- | with a ponderation from border branches
+ (fromIntegral border)) )) branches')
where where
branches' :: [[PhyloGroup]] branches' :: [[PhyloGroup]]
branches' = relevantBranches term thr branches branches' = relevantBranches term branches
toAccuracy :: Double -> Int -> Int -> [[PhyloGroup]] -> Double toAccuracy :: Double -> Int -> [[PhyloGroup]] -> Double
toAccuracy freq term thr branches = toAccuracy freq term branches =
-- | given a random term in a phylo if (null branches)
freq then 0
-- | for each relevant branches else
* (sum $ map (\branch -> -- | given a random term in a phylo
-- | given its local coverage freq
((branchCov' branch branches') / (sum $ map (\b -> branchCov' b branches') branches')) -- | for each relevant branches
-- | compute the local accuracy * (sum $ map (\branch ->
* ( (fromIntegral $ length $ filter (\group -> elem term $ group ^. phylo_groupNgrams) branch) -- | given its local coverage
/ (fromIntegral $ length branch))) branches') ((branchCov' branch branches') / (sum $ map (\b -> branchCov' b branches') branches'))
-- | compute the local accuracy
* ( (fromIntegral $ length $ filter (\group -> elem term $ group ^. phylo_groupNgrams) branch)
/ (fromIntegral $ length branch))) branches')
where where
branches' :: [[PhyloGroup]] branches' :: [[PhyloGroup]]
branches' = relevantBranches term thr branches branches' = relevantBranches term branches
toRecallWeighted :: Double -> Double -> Double
toRecallWeighted old curr = curr / (old + curr)
toRecall' :: Int -> Map Int Double -> [[PhyloGroup]] -> Double
toRecall' minBranch frequency branches =
let terms = keys frequency
in sum $ map (\term -> toRecall (frequency ! term) term minBranch branches) terms
toPhyloQuality :: Double -> Map Int Double -> Int -> Double -> [[PhyloGroup]] -> Double
toPhyloQuality :: Double -> Int -> Map Int Double -> Double -> [[PhyloGroup]] -> Double toPhyloQuality beta frequency border oldAcc branches =
toPhyloQuality beta minBranch frequency recall branches = -- trace (" rec : " <> show(recall)) $
if (foldl' (\acc b -> acc && (length b < minBranch)) True branches) -- trace (" acc : " <> show(accuracy)) $
-- | the local phylo is composed of small branches if (null branches)
then 0 then 0
else ((1 + beta ** 2) * accuracy * recall) else ((1 + beta ** 2) * accuracy * recall)
/ (((beta ** 2) * accuracy + recall)) / (((beta ** 2) * accuracy + recall))
where where
terms :: [Int]
terms = keys frequency
-- | for each term compute the global accuracy -- | for each term compute the global accuracy
accuracy :: Double accuracy :: Double
accuracy = sum $ map (\term -> toAccuracy (frequency ! term) term minBranch branches) terms accuracy = oldAcc + (sum $ map (\term -> toAccuracy (frequency ! term) term branches) $ keys frequency)
-- | for each term compute the global recall
recall :: Double
recall = sum $ map (\term -> toRecall (frequency ! term) term border branches) $ keys frequency
toBorderAccuracy :: Map Int Double -> [[PhyloGroup]] -> Double
toBorderAccuracy freq branches = sum $ map (\t -> toAccuracy (freq ! t) t branches) $ keys freq
----------------------------- -----------------------------
...@@ -337,74 +337,82 @@ groupsToBranches groups = ...@@ -337,74 +337,82 @@ groupsToBranches groups =
in groups' `using` parList rdeepseq ) graph in groups' `using` parList rdeepseq ) graph
recursiveMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Int -> [PhyloPeriodId] -> Map Date Double -> Double -> Double -> [PhyloGroup] -> [PhyloGroup] reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
recursiveMatching proximity beta minBranch frequency egoThr frame periods docs quality recall groups = reduceFrequency frequency branches =
if (length groups == 1) restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
then trace ("stop : just one group")
$ groups
else if (egoThr >= 1) alterBorder :: Int -> [[PhyloGroup]] -> [PhyloGroup] -> Int
then trace ("stop : thr >= 1") alterBorder border branches branch = border + (length $ concat branches) - (length branch)
$ groups
else if (quality > quality')
then trace ("stop : " <> show(quality) <> " > " <> show(quality')) recursiveMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Int -> [PhyloPeriodId] -> Map Date Double -> Double -> Int -> Double -> [PhyloGroup] -> [PhyloGroup]
-- $ trace (show(length groups) <> " groups " <> show(length branches')) recursiveMatching proximity beta minBranch frequency egoThr frame periods docs quality border oldAcc groups =
-- $ trace (show(recall) <> " recall " <> show(recall')) if ((egoThr >= 1) || (quality > quality') || ((length $ concat $ snd branches') == (length groups)))
$ groups then
else trace ("go : " <> show(quality) <> " <= " <> show(quality')) trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality') <> "\n"
$ concat <> " |✓ " <> show(length $ fst branches') <> show(map length $ fst branches')
$ map (\branch -> recursiveMatching proximity beta minBranch frequency (egoThr + (getThresholdStep proximity)) <> " |✗ " <> show(length $ snd branches') <> "[" <> show(length $ concat $ snd branches') <> "]") $
frame periods docs quality' recall' branch) groups
$ branches' else
let next = map (\b -> recursiveMatching proximity beta minBranch
(reduceFrequency frequency (fst branches'))
(egoThr + (getThresholdStep proximity))
frame periods docs quality'
(alterBorder border (fst branches') b)
(oldAcc + (toBorderAccuracy frequency (delete b ((fst branches') ++ (snd branches')))))
b ) (fst branches')
in trace (" ✓ F(β) = " <> show(quality) <> " (vs) " <> show(quality') <> "\n"
<> " |✓ " <> show(length $ fst branches') <> show(map length $ fst branches')
<> " |✗ " <> show(length $ snd branches') <> "[" <> show(length $ concat $ snd branches') <> "]") $
concat (next ++ (snd branches'))
where where
-- | 2) for each of the possible next branches process the phyloQuality score -- | 2) for each of the possible next branches process the phyloQuality score
quality' :: Double quality' :: Double
quality' = toPhyloQuality beta minBranch frequency recall' branches' quality' = toPhyloQuality beta frequency border oldAcc ((fst branches') ++ (snd branches'))
-- | 3) process a new recall weigted by the last one
recall' :: Double
recall' = toRecallWeighted recall
$ toRecall' minBranch frequency branches'
-- | 1) for each local branch process a temporal matching then find the resulting branches -- | 1) for each local branch process a temporal matching then find the resulting branches
branches' :: [[PhyloGroup]] branches' :: ([[PhyloGroup]],[[PhyloGroup]])
branches' = branches' =
let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ phyloBranchMatching frame periods proximity egoThr docs groups $ phyloBranchMatching frame periods proximity egoThr docs groups
in branches `using` parList rdeepseq in partition (\b -> length b >= minBranch) (branches `using` parList rdeepseq)
temporalMatching :: Phylo -> Phylo temporalMatching :: Phylo -> Phylo
temporalMatching phylo = updatePhyloGroups 1 branches' phylo temporalMatching phylo = updatePhyloGroups 1 branches' phylo
where where
-- | 6) apply the recursive matching -- | 5) apply the recursive matching
branches' :: Map PhyloGroupId PhyloGroup branches' :: Map PhyloGroupId PhyloGroup
branches' = fromList branches' =
$ map (\g -> (getGroupId g, g)) let next = trace (" ✓ F(β) = " <> show(quality)
$ traceMatchEnd <> " |✓ " <> show(length $ fst branches) <> show(map length $ fst branches)
$ concat <> " |✗ " <> show(length $ snd branches) <> "[" <> show(length $ concat $ snd branches) <> "]")
$ map (\branch -> $ map (\branch -> recursiveMatching (phyloProximity $ getConfig phylo)
recursiveMatching (phyloProximity $ getConfig phylo)
(_qua_relevance $ phyloQuality $ getConfig phylo) (_qua_relevance $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo) (_qua_minBranch $ phyloQuality $ getConfig phylo)
frequency (reduceFrequency frequency (fst branches))
( (getThresholdInit $ phyloProximity $ getConfig phylo) ( (getThresholdInit $ phyloProximity $ getConfig phylo)
+ (getThresholdStep $ phyloProximity $ getConfig phylo)) + (getThresholdStep $ phyloProximity $ getConfig phylo))
(getTimeFrame $ timeUnit $ getConfig phylo) (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (getPeriodIds phylo)
(phylo ^. phylo_timeDocs) quality recall branch (phylo ^. phylo_timeDocs) quality (alterBorder 0 (fst branches) branch)
) branches (toBorderAccuracy frequency (delete branch ((fst branches) ++ (snd branches))))
-- | 5) process the quality score branch
) (fst branches)
in fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd (concat (next ++ (snd branches)))
-- | 4) process the quality score
quality :: Double quality :: Double
quality = toPhyloQuality (_qua_relevance $ phyloQuality $ getConfig phylo) (_qua_minBranch $ phyloQuality $ getConfig phylo) frequency recall branches quality = toPhyloQuality (_qua_relevance $ phyloQuality $ getConfig phylo) frequency 0 0 ((fst branches) ++ (snd branches))
-- | 4) find the recall
recall :: Double
recall = toRecall' (_qua_minBranch $ phyloQuality $ getConfig phylo) frequency branches
-- | 3) process the constants of the quality score -- | 3) process the constants of the quality score
frequency :: Map Int Double frequency :: Map Int Double
frequency = frequency =
let terms = ngramsInBranches branches let terms = ngramsInBranches ((fst branches) ++ (snd branches))
in fromList $ map (\t -> (t, ((termFreq' t $ concat branches) / (sum $ map (\t' -> termFreq' t' $ concat branches) terms)))) terms freqs = map (\t -> termFreq' t $ concat ((fst branches) ++ (snd branches))) terms
in fromList $ map (\(t,freq) -> (t,freq/(sum freqs))) $ zip terms freqs
-- | 2) group into branches -- | 2) group into branches
branches :: [[PhyloGroup]] branches :: ([[PhyloGroup]],[[PhyloGroup]])
branches = groupsToBranches $ fromList $ map (\group -> (getGroupId group, group)) groups' branches = partition (\b -> length b >= (_qua_minBranch $ phyloQuality $ getConfig phylo))
$ groupsToBranches $ fromList $ map (\group -> (getGroupId group, group)) groups'
-- | 1) for each group process an initial temporal Matching -- | 1) for each group process an initial temporal Matching
groups' :: [PhyloGroup] groups' :: [PhyloGroup]
groups' = phyloBranchMatching (getTimeFrame $ timeUnit $ getConfig phylo) (getPeriodIds phylo) groups' = phyloBranchMatching (getTimeFrame $ timeUnit $ getConfig phylo) (getPeriodIds phylo)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment