Commit 1d02438b authored by qlobbe's avatar qlobbe

first commit

parent 86c6ae7f
...@@ -18,8 +18,8 @@ module Main where ...@@ -18,8 +18,8 @@ module Main where
import Data.Aeson import Data.Aeson
import Data.ByteString.Lazy (ByteString) import Data.ByteString.Lazy (ByteString)
import Data.Maybe (isJust, fromJust) -- import Data.Maybe (isJust, fromJust)
import Data.List (concat, nub, isSuffixOf, take) import Data.List (concat, nub, isSuffixOf)
import Data.String (String) import Data.String (String)
import Data.Text (Text, unwords, unpack) import Data.Text (Text, unwords, unpack)
......
...@@ -47,7 +47,7 @@ cooc2graph' distance threshold myCooc = distanceMap ...@@ -47,7 +47,7 @@ cooc2graph' distance threshold myCooc = distanceMap
where where
(ti, _) = createIndices myCooc (ti, _) = createIndices myCooc
myCooc' = toIndex ti myCooc myCooc' = toIndex ti myCooc
matCooc = map2mat 0 (Map.size ti) $ Map.filter (> 0) myCooc' matCooc = map2mat 0 (Map.size ti) $ Map.filter (> 1) myCooc'
distanceMat = measure distance matCooc distanceMat = measure distance matCooc
distanceMap = Map.filter (> threshold) $ mat2map distanceMat distanceMap = Map.filter (> threshold) $ mat2map distanceMat
......
...@@ -257,18 +257,21 @@ relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]] ...@@ -257,18 +257,21 @@ relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]]
relevantBranches term branches = relevantBranches term branches =
filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches
accuracy :: Int -> [PhyloGroup] -> Double accuracy :: Int -> [(Date,Date)] -> [PhyloGroup] -> Double
accuracy x bk = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk) accuracy x periods bk = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk')
/ (fromIntegral $ length bk)) / (fromIntegral $ length bk'))
where
bk' :: [PhyloGroup]
bk' = filter (\g -> elem (g ^. phylo_groupPeriod) periods) bk
recall :: Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double recall :: Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
recall x bk bx = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk) recall x bk bx = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk)
/ (fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx)) / (fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx))
fScore :: Double -> Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double fScore :: Double -> Int -> [(Date,Date)] -> [PhyloGroup] -> [[PhyloGroup]] -> Double
fScore beta x bk bx = fScore beta x periods bk bx =
let rec = recall x bk bx let rec = recall x bk bx
acc = accuracy x bk acc = accuracy x periods bk
in ((1 + beta ** 2) * acc * rec) in ((1 + beta ** 2) * acc * rec)
/ (((beta ** 2) * rec + acc)) / (((beta ** 2) * rec + acc))
...@@ -284,7 +287,8 @@ toPhyloQuality' beta freq branches = ...@@ -284,7 +287,8 @@ toPhyloQuality' beta freq branches =
else sum else sum
$ map (\i -> $ map (\i ->
let bks = relevantBranches i branches let bks = relevantBranches i branches
in (freq ! i) * (sum $ map (\bk -> ((wk bk) / (sum $ map wk bks)) * (fScore beta i bk bks)) bks)) periods = nub $ map _phylo_groupPeriod $ concat bks
in (freq ! i) * (sum $ map (\bk -> ((wk bk) / (sum $ map wk bks)) * (fScore beta i periods bk bks)) bks))
$ keys freq $ keys freq
toRecall :: Map Int Double -> [[PhyloGroup]] -> Double toRecall :: Map Int Double -> [[PhyloGroup]] -> Double
...@@ -311,8 +315,10 @@ toAccuracy freq branches = ...@@ -311,8 +315,10 @@ toAccuracy freq branches =
$ map (\x -> $ map (\x ->
let px = freq ! x let px = freq ! x
bx = relevantBranches x branches bx = relevantBranches x branches
-- | periods containing x
periods = nub $ map _phylo_groupPeriod $ concat bx
wks = sum $ map wk bx wks = sum $ map wk bx
in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (accuracy x bk)) bx)) in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (accuracy x periods bk)) bx))
$ keys freq $ keys freq
where where
pys :: Double pys :: Double
...@@ -328,8 +334,10 @@ toPhyloQuality beta freq branches = ...@@ -328,8 +334,10 @@ toPhyloQuality beta freq branches =
$ map (\x -> $ map (\x ->
let px = freq ! x let px = freq ! x
bx = relevantBranches x branches bx = relevantBranches x branches
-- | periods containing x
periods = nub $ map _phylo_groupPeriod $ concat bx
wks = sum $ map wk bx wks = sum $ map wk bx
in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore beta x bk bx)) bx)) in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore beta x periods bk bx)) bx))
$ keys freq $ keys freq
where where
pys :: Double pys :: Double
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment