Commit 3517130e authored by qlobbe's avatar qlobbe

looking at the recall

parent 406ae431
Pipeline #586 failed with stage
......@@ -125,7 +125,7 @@ defaultConfig =
, phyloLevel = 1
, phyloProximity = WeightedLogJaccard 10 0 0.1
, phyloSynchrony = ByProximityDistribution 0
, phyloQuality = Quality 1 1
, phyloQuality = Quality 0.5 1
, timeUnit = Year 3 1 5
, contextualUnit = Fis 1 4
, exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
......
......@@ -114,7 +114,7 @@ branchToDotNode b =
([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
<> (metaToAttr $ b ^. branch_meta)
<> [ toAttr "nodeType" "branch"
, toAttr "branchId" (pack $ show (snd $ b ^. branch_id)) ])
, toAttr "branchId" (pack $ unwords (init $ map show $ snd $ b ^. branch_id)) ])
periodToDotNode :: (Date,Date) -> Dot DotId
periodToDotNode prd =
......@@ -132,7 +132,7 @@ groupToDotNode fdt g =
<> [ toAttr "nodeType" "group"
, toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
, toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
, toAttr "branchId" (pack $ show (snd $ g ^. phylo_groupBranchId))])
, toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))])
toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
......@@ -164,8 +164,14 @@ exportToDot phylo export =
graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
<> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
, Ratio FillRatio
, Style [SItem Filled []],Color [toWColor White]
, (toAttr (fromStrict "nbDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))])
, Style [SItem Filled []],Color [toWColor White]]
-- | home made attributes
<> [(toAttr (fromStrict "nbDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "quaFactor") $ pack $ show (_qua_relevance $ phyloQuality $ getConfig phylo))
])
-- toAttr (fromStrict k) $ (pack . unwords) $ map show v
......
......@@ -66,6 +66,9 @@ roundToStr = printf "%0.*f"
countSup :: Double -> [Double] -> Int
countSup s l = length $ filter (>s) l
dropByIdx :: Int -> [a] -> [a]
dropByIdx k l = take k l ++ drop (k+1) l
elemIndex' :: Eq a => a -> [a] -> Int
elemIndex' e l = case (List.elemIndex e l) of
......@@ -253,8 +256,26 @@ filterProximity :: Proximity -> Double -> Double -> Bool
filterProximity proximity thr local =
case proximity of
WeightedLogJaccard _ _ _ -> local >= thr
Hamming -> undefined
Hamming -> undefined
getProximityName :: Proximity -> String
getProximityName proximity =
case proximity of
WeightedLogJaccard _ _ _ -> "WLJaccard"
Hamming -> "Hamming"
getProximityInit :: Proximity -> Double
getProximityInit proximity =
case proximity of
WeightedLogJaccard _ i _ -> i
Hamming -> undefined
getProximityStep :: Proximity -> Double
getProximityStep proximity =
case proximity of
WeightedLogJaccard _ _ s -> s
Hamming -> undefined
---------------
-- | Phylo | --
......
......@@ -285,6 +285,36 @@ toAccuracy freq term thr branches =
branches' :: [[PhyloGroup]]
branches' = relevantBranches term thr branches
toRecallWeighted :: Double -> [Double] -> [Double]
toRecallWeighted old curr =
let old' = old + sum curr
in map (\r -> (r / old') * r) curr
toRecall' :: Quality -> Map Int Double -> [[PhyloGroup]] -> Double
toRecall' quality frequency branches =
let terms = keys frequency
in sum $ map (\term -> toRecall (frequency ! term) term (quality ^. qua_minBranch) branches) terms
toPhyloQuality :: Quality -> Map Int Double -> Double -> [[PhyloGroup]] -> Double
toPhyloQuality quality frequency recall branches =
if (foldl' (\acc b -> acc && (length b < (quality ^. qua_minBranch))) True branches)
-- | the local phylo is composed of small branches
then 0
else
let relevance = quality ^. qua_relevance
-- | compute the F score for a given relevance
in ((1 + relevance ** 2) * accuracy * recall)
/ (((relevance ** 2) * accuracy + recall))
where
terms :: [Int]
terms = keys frequency
-- | for each term compute the global accuracy
accuracy :: Double
accuracy = sum $ map (\term -> toAccuracy (frequency ! term) term (quality ^. qua_minBranch) branches) terms
toPhyloQuality' :: Quality -> Map Int Double -> [[PhyloGroup]] -> Double
toPhyloQuality' quality frequency branches =
......@@ -295,7 +325,7 @@ toPhyloQuality' quality frequency branches =
let relevance = quality ^. qua_relevance
-- | compute the F score for a given relevance
in ((1 + relevance ** 2) * accuracy * recall)
/ (((relevance ** 2) * accuracy + recall))
/ (((relevance ** 2) * accuracy + recall))
where
terms :: [Int]
terms = keys frequency
......@@ -334,8 +364,8 @@ groupsToBranches groups =
in groups' `using` parList rdeepseq ) graph
recursiveMatching :: Proximity -> Quality -> Map Int Double -> Double -> Int -> [PhyloPeriodId] -> Map Date Double -> Double -> [[PhyloGroup]] -> [PhyloGroup]
recursiveMatching proximity qua freq thr frame periods docs quality branches =
recursiveMatching :: Proximity -> Quality -> Map Int Double -> Double -> Int -> [PhyloPeriodId] -> Map Date Double -> Double -> Double -> [[PhyloGroup]] -> [PhyloGroup]
recursiveMatching proximity qua freq thr frame periods docs quality oldRecall branches =
if (length branches == (length $ concat branches))
then concat branches
else if thr >= 1
......@@ -347,14 +377,22 @@ recursiveMatching proximity qua freq thr frame periods docs quality branches =
True -> concat
$ map (\branches' ->
let idx = fromJust $ elemIndex branches' nextBranches
in recursiveMatching proximity qua freq (thr + (getThresholdStep proximity)) frame periods docs (nextQualities !! idx) branches')
in recursiveMatching proximity qua
freq (thr + (getThresholdStep proximity))
frame periods docs (nextQualities !! idx)
(sum $ dropByIdx idx nextRecalls) branches')
$ nextBranches
-- | failure : last step was a local maximum of quality, let's validate it (traceMatchFailure thr quality (sum nextQualities))
False -> concat branches
where
-- | 2) for each of the possible next branches process the phyloQuality score
nextQualities :: [Double]
nextQualities = map (\nextBranch -> toPhyloQuality' qua freq nextBranch) nextBranches
nextQualities = map (\(nextBranch,recall) -> toPhyloQuality qua freq recall nextBranch) $ zip nextBranches nextRecalls
-- nextQualities = map (\nextBranch -> toPhyloQuality' qua freq nextBranch) nextBranches
-------
nextRecalls :: [Double]
nextRecalls = toRecallWeighted oldRecall
$ map (\nextBranch -> toRecall' qua freq nextBranch) nextBranches
-- | 1) for each local branch process a temporal matching then find the resulting branches
nextBranches :: [[[PhyloGroup]]]
nextBranches =
......@@ -380,13 +418,17 @@ temporalMatching phylo = updatePhyloGroups 1 branches' phylo
+ (getThresholdStep $ phyloProximity $ getConfig phylo))
(getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo)
(phylo ^. phylo_timeDocs) quality branches
(phylo ^. phylo_timeDocs) quality recall branches
-- | 3) process the quality score
quality :: Double
quality = toPhyloQuality' (phyloQuality $ getConfig phylo) frequency branches
quality = toPhyloQuality (phyloQuality $ getConfig phylo) frequency recall branches
-- quality = toPhyloQuality' (phyloQuality $ getConfig phylo) frequency branches
-------
recall :: Double
recall = toRecall' (phyloQuality $ getConfig phylo) frequency branches
-- | 3) process the constants of the quality score
frequency :: Map Int Double
frequency =
frequency =
let terms = ngramsInBranches branches
in fromList $ map (\t -> (t, ((termFreq' t $ concat branches) / (sum $ map (\t' -> termFreq' t' $ concat branches) terms)))) terms
-- | 2) group into branches
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment