Commit 55d8b0ad authored by qlobbe's avatar qlobbe

change the ponderation of the quality function

parent 8bb9ff6c
...@@ -27,7 +27,7 @@ import Text.Printf ...@@ -27,7 +27,7 @@ import Text.Printf
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Vector as Vector -- import qualified Data.Vector as Vector
...@@ -313,16 +313,16 @@ wk :: [PhyloGroup] -> Double ...@@ -313,16 +313,16 @@ wk :: [PhyloGroup] -> Double
wk bk = fromIntegral $ length bk wk bk = fromIntegral $ length bk
toPhyloQuality' :: Double -> Map Int Double -> [[PhyloGroup]] -> Double -- toPhyloQuality' :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
toPhyloQuality' lambda freq branches = -- toPhyloQuality' lambda freq branches =
if (null branches) -- if (null branches)
then 0 -- then 0
else sum -- else sum
$ map (\i -> -- $ map (\i ->
let bks = relevantBranches i branches -- let bks = relevantBranches i branches
periods = nub $ map _phylo_groupPeriod $ filter (\g -> elem i $ g ^. phylo_groupNgrams) $ concat bks -- periods = nub $ map _phylo_groupPeriod $ filter (\g -> elem i $ g ^. phylo_groupNgrams) $ concat bks
in (freq ! i) * (sum $ map (\bk -> ((wk bk) / (sum $ map wk bks)) * (fScore lambda i periods bk bks)) bks)) -- in (freq ! i) * (sum $ map (\bk -> ((wk bk) / (sum $ map wk bks)) * (fScore lambda i periods bk bks)) bks))
$ keys freq -- $ keys freq
toRecall :: Map Int Double -> [[PhyloGroup]] -> Double toRecall :: Map Int Double -> [[PhyloGroup]] -> Double
toRecall freq branches = toRecall freq branches =
...@@ -359,24 +359,25 @@ toAccuracy freq branches = ...@@ -359,24 +359,25 @@ toAccuracy freq branches =
-- | here we do the average of all the local f_scores -- | here we do the average of all the local f_scores
toPhyloQuality :: Double -> Double -> Map Int Double -> [[PhyloGroup]] -> Double toPhyloQuality :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
toPhyloQuality fdt lambda freq branches = toPhyloQuality lambda freq branches =
if (null branches) if (null branches)
then 0 then 0
else sum else sum
$ map (\x -> $ map (\x ->
-- let px = freq ! x let px = freq ! x
let bx = relevantBranches x branches bx = relevantBranches x branches
-- | periods containing x -- | periods containing x
periods = nub $ map _phylo_groupPeriod $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx periods = nub $ map _phylo_groupPeriod $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx
wks = sum $ map wk bx wks = sum $ map wk bx
-- in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore beta x bk bx)) bx)) -- in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore beta x bk bx)) bx))
-- in (1 / fdt) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore beta x periods bk bx)) bx)) -- in (1 / fdt) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore beta x periods bk bx)) bx))
in (1 / fdt) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore (tan (lambda * pi / 2)) x periods bk bx)) bx)) -- in (1 / fdt) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore (tan (lambda * pi / 2)) x periods bk bx)) bx))
in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore (tan (lambda * pi / 2)) x periods bk bx)) bx))
$ keys freq $ keys freq
-- where where
-- pys :: Double pys :: Double
-- pys = sum (elems freq) pys = sum (elems freq)
-- 1 / nb de foundation -- 1 / nb de foundation
...@@ -419,9 +420,9 @@ updateThr thr branches = map (\b -> map (\g -> ...@@ -419,9 +420,9 @@ updateThr thr branches = map (\b -> map (\g ->
-- done = all the allready broken branches -- done = all the allready broken branches
-- ego = the current branch we want to break -- ego = the current branch we want to break
-- rest = the branches we still have to break -- rest = the branches we still have to break
breakBranches :: Double -> Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
-> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)] -> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods done ego rest = breakBranches proximity lambda frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
-- 1) keep or not the new division of ego -- 1) keep or not the new division of ego
let done' = done ++ (if snd ego let done' = done ++ (if snd ego
then then
...@@ -444,12 +445,12 @@ breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame ...@@ -444,12 +445,12 @@ breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame
-- 2) if there is no more branches in rest then return else continue -- 2) if there is no more branches in rest then return else continue
if null rest if null rest
then done' then done'
else breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods else breakBranches proximity lambda frequency minBranch thr depth elevation frame docs coocs periods
done' (head' "breakBranches" rest) (tail' "breakBranches" rest) done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
where where
-------------------------------------- --------------------------------------
quality :: Double quality :: Double
quality = toPhyloQuality fdt lambda frequency ((map fst done) ++ [fst ego] ++ (map fst rest)) quality = toPhyloQuality lambda frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
-------------------------------------- --------------------------------------
ego' :: ([[PhyloGroup]],[[PhyloGroup]]) ego' :: ([[PhyloGroup]],[[PhyloGroup]])
ego' = ego' =
...@@ -461,29 +462,29 @@ breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame ...@@ -461,29 +462,29 @@ breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame
$ depthToMeta (elevation - depth) branches' $ depthToMeta (elevation - depth) branches'
-------------------------------------- --------------------------------------
quality' :: Double quality' :: Double
quality' = toPhyloQuality fdt lambda frequency quality' = toPhyloQuality lambda frequency
((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest)) ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
seaLevelMatching :: Double -> Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
-> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)] -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
seaLevelMatching fdt proximity lambda minBranch frequency thr step depth elevation frame periods docs coocs branches = seaLevelMatching proximity lambda minBranch frequency thr step depth elevation frame periods docs coocs branches =
-- if there is no branch to break or if seaLvl level > 1 then end -- if there is no branch to break or if seaLvl level > 1 then end
if (thr >= 1) || ((not . or) $ map snd branches) if (thr >= 1) || ((not . or) $ map snd branches)
then branches then branches
else else
-- break all the possible branches at the current seaLvl level -- break all the possible branches at the current seaLvl level
let quality = toPhyloQuality fdt lambda frequency (map fst branches) let quality = toPhyloQuality lambda frequency (map fst branches)
acc = toAccuracy frequency (map fst branches) acc = toAccuracy frequency (map fst branches)
rec = toRecall frequency (map fst branches) rec = toRecall frequency (map fst branches)
branches' = trace ("↑ level = " <> printf "%.3f" thr <> " F(λ) = " <> printf "%.5f" quality branches' = trace ("↑ level = " <> printf "%.3f" thr <> " F(λ) = " <> printf "%.5f" quality
<> " ξ = " <> printf "%.5f" acc <> " ξ = " <> printf "%.5f" acc
<> " ρ = " <> printf "%.5f" rec <> " ρ = " <> printf "%.5f" rec
<> " branches = " <> show(length branches) <> " ↴") <> " branches = " <> show(length branches) <> " ↴")
$ breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods $ breakBranches proximity lambda frequency minBranch thr depth elevation frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches) [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches') frequency' = reduceFrequency frequency (map fst branches')
in seaLevelMatching fdt proximity lambda minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches' in seaLevelMatching proximity lambda minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches'
constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo
...@@ -494,8 +495,7 @@ constanteTemporalMatching start step phylo = updatePhyloGroups 1 ...@@ -494,8 +495,7 @@ constanteTemporalMatching start step phylo = updatePhyloGroups 1
-- 2) process the temporal matching by elevating seaLvl level -- 2) process the temporal matching by elevating seaLvl level
branches :: [[PhyloGroup]] branches :: [[PhyloGroup]]
branches = map fst branches = map fst
$ seaLevelMatching (fromIntegral $ Vector.length $ getRoots phylo) $ seaLevelMatching (phyloProximity $ getConfig phylo)
(phyloProximity $ getConfig phylo)
(_qua_granularity $ phyloQuality $ getConfig phylo) (_qua_granularity $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo) (_qua_minBranch $ phyloQuality $ getConfig phylo)
(phylo ^. phylo_termFreq) (phylo ^. phylo_termFreq)
...@@ -574,11 +574,11 @@ toThreshold lvl proxiGroups = ...@@ -574,11 +574,11 @@ toThreshold lvl proxiGroups =
-- done = all the allready broken branches -- done = all the allready broken branches
-- ego = the current branch we want to break -- ego = the current branch we want to break
-- rest = the branches we still have to break -- rest = the branches we still have to break
adaptativeBreakBranches :: Double -> Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double adaptativeBreakBranches :: Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double
-> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc -> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc
-> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))] -> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
-> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods done ego rest = adaptativeBreakBranches proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods done ego rest =
-- 1) keep or not the new division of ego -- 1) keep or not the new division of ego
let done' = done ++ (if (fst . snd) ego let done' = done ++ (if (fst . snd) ego
then (if ((null (fst ego')) || (quality > quality')) then (if ((null (fst ego')) || (quality > quality'))
...@@ -598,7 +598,7 @@ adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda freque ...@@ -598,7 +598,7 @@ adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda freque
-- 2) if there is no more branches in rest then return else continue -- 2) if there is no more branches in rest then return else continue
if null rest if null rest
then done' then done'
else adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods else adaptativeBreakBranches proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods
done' (head' "breakBranches" rest) (tail' "breakBranches" rest) done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
where where
-------------------------------------- --------------------------------------
...@@ -606,7 +606,7 @@ adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda freque ...@@ -606,7 +606,7 @@ adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda freque
thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi
-------------------------------------- --------------------------------------
quality :: Double quality :: Double
quality = toPhyloQuality fdt lambda frequency ((map fst done) ++ [fst ego] ++ (map fst rest)) quality = toPhyloQuality lambda frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
-------------------------------------- --------------------------------------
ego' :: ([[PhyloGroup]],[[PhyloGroup]]) ego' :: ([[PhyloGroup]],[[PhyloGroup]])
ego' = ego' =
...@@ -618,21 +618,21 @@ adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda freque ...@@ -618,21 +618,21 @@ adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda freque
$ depthToMeta (elevation - depth) branches' $ depthToMeta (elevation - depth) branches'
-------------------------------------- --------------------------------------
quality' :: Double quality' :: Double
quality' = toPhyloQuality fdt lambda frequency quality' = toPhyloQuality lambda frequency
((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest)) ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
adaptativeSeaLevelMatching :: Double -> Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double adaptativeSeaLevelMatching :: Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double
-> Double -> Int -> Map Int Double -> Double -> Int -> Map Int Double
-> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc
-> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
adaptativeSeaLevelMatching fdt proxiConf depth elevation groupsProxi lambda minBranch frequency frame periods docs coocs branches = adaptativeSeaLevelMatching proxiConf depth elevation groupsProxi lambda minBranch frequency frame periods docs coocs branches =
-- if there is no branch to break or if seaLvl level >= depth then end -- if there is no branch to break or if seaLvl level >= depth then end
if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches) if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
then branches then branches
else else
-- break all the possible branches at the current seaLvl level -- break all the possible branches at the current seaLvl level
let branches' = adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods let branches' = adaptativeBreakBranches proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches) [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches') frequency' = reduceFrequency frequency (map fst branches')
groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi
...@@ -641,7 +641,7 @@ adaptativeSeaLevelMatching fdt proxiConf depth elevation groupsProxi lambda minB ...@@ -641,7 +641,7 @@ adaptativeSeaLevelMatching fdt proxiConf depth elevation groupsProxi lambda minB
<> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches') <> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches')
<> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]" <> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]"
<> " thr = ") <> " thr = ")
$ adaptativeSeaLevelMatching fdt proxiConf (depth - 1) elevation groupsProxi' lambda minBranch frequency' frame periods docs coocs branches' $ adaptativeSeaLevelMatching proxiConf (depth - 1) elevation groupsProxi' lambda minBranch frequency' frame periods docs coocs branches'
adaptativeTemporalMatching :: Double -> Phylo -> Phylo adaptativeTemporalMatching :: Double -> Phylo -> Phylo
...@@ -652,8 +652,7 @@ adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1 ...@@ -652,8 +652,7 @@ adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
-- 2) process the temporal matching by elevating seaLvl level -- 2) process the temporal matching by elevating seaLvl level
branches :: [[PhyloGroup]] branches :: [[PhyloGroup]]
branches = map fst branches = map fst
$ adaptativeSeaLevelMatching (fromIntegral $ Vector.length $ getRoots phylo) $ adaptativeSeaLevelMatching (phyloProximity $ getConfig phylo)
(phyloProximity $ getConfig phylo)
(elevation - 1) (elevation - 1)
elevation elevation
(phylo ^. phylo_groupsProxi) (phylo ^. phylo_groupsProxi)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment