Commit 7fef621e authored by qlobbe's avatar qlobbe

add the maxClique (in progress)

parent 4f17f5dd
...@@ -88,11 +88,13 @@ data ContextualUnit = ...@@ -88,11 +88,13 @@ data ContextualUnit =
Fis Fis
{ _fis_support :: Int { _fis_support :: Int
, _fis_size :: Int } , _fis_size :: Int }
| MaxClique
{ _clique_size :: Int }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data Quality = data Quality =
Quality { _qua_relevance :: Double Quality { _qua_granularity :: Double
, _qua_minBranch :: Int } , _qua_minBranch :: Int }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
...@@ -125,7 +127,7 @@ defaultConfig = ...@@ -125,7 +127,7 @@ defaultConfig =
, phyloLevel = 2 , phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 0 0.1 , phyloProximity = WeightedLogJaccard 10 0 0.1
, phyloSynchrony = ByProximityDistribution 0 , phyloSynchrony = ByProximityDistribution 0
, phyloQuality = Quality 10 3 , phyloQuality = Quality 0 3
, timeUnit = Year 3 1 5 , timeUnit = Year 3 1 5
, contextualUnit = Fis 1 5 , contextualUnit = Fis 1 5
, exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2] , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
...@@ -315,17 +317,13 @@ data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show) ...@@ -315,17 +317,13 @@ data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
-- | Frequent Item Set | -- -- | Frequent Item Set | --
--------------------------- ---------------------------
-- | Clique : Set of ngrams cooccurring in the same Document
type Clique = Set Ngrams
-- | Support : Number of Documents where a Clique occurs -- | Support : Number of Documents where a Clique occurs
type Support = Int type Support = Int
-- | Fis : Frequent Items Set (ie: the association between a Clique and a Support) data PhyloCUnit = PhyloCUnit
data PhyloFis = PhyloFis { _phyloCUnit_nodes :: Set Ngrams
{ _phyloFis_clique :: Clique , _phyloCUnit_support :: Support
, _phyloFis_support :: Support , _phyloCUnit_period :: (Date,Date)
, _phyloFis_period :: (Date,Date)
} deriving (Generic,NFData,Show,Eq) } deriving (Generic,NFData,Show,Eq)
...@@ -378,7 +376,7 @@ makeLenses ''ContextualUnit ...@@ -378,7 +376,7 @@ makeLenses ''ContextualUnit
makeLenses ''PhyloLabel makeLenses ''PhyloLabel
makeLenses ''TimeUnit makeLenses ''TimeUnit
makeLenses ''PhyloFoundations makeLenses ''PhyloFoundations
makeLenses ''PhyloFis makeLenses ''PhyloCUnit
makeLenses ''Phylo makeLenses ''Phylo
makeLenses ''PhyloPeriod makeLenses ''PhyloPeriod
makeLenses ''PhyloLevel makeLenses ''PhyloLevel
......
...@@ -62,8 +62,8 @@ phylo1 = temporalMatching ...@@ -62,8 +62,8 @@ phylo1 = temporalMatching
--------------------------------------------- ---------------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis] phyloFis :: Map (Date,Date) [PhyloCUnit]
phyloFis = toPhyloFis docsByPeriods (getFisSupport $ contextualUnit config) (getFisSize $ contextualUnit config) phyloFis = toPhyloFis docsByPeriods (getContextualUnitSupport $ contextualUnit config) (getContextualUnitSize $ contextualUnit config)
docsByPeriods :: Map (Date,Date) [Document] docsByPeriods :: Map (Date,Date) [Document]
......
...@@ -171,7 +171,7 @@ exportToDot phylo export = ...@@ -171,7 +171,7 @@ exportToDot phylo export =
,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo)) ,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo)) ,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo)) ,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "quaFactor") $ pack $ show (_qua_relevance $ phyloQuality $ getConfig phylo)) ,(toAttr (fromStrict "quaFactor") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
]) ])
......
...@@ -72,23 +72,23 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co ...@@ -72,23 +72,23 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
(\phyloLvl -> if lvl == (phyloLvl ^. phylo_levelLevel) (\phyloLvl -> if lvl == (phyloLvl ^. phylo_levelLevel)
then then
let pId = phyloLvl ^. phylo_levelPeriod let pId = phyloLvl ^. phylo_levelPeriod
phyloFis = m ! pId phyloCUnit = m ! pId
in phyloLvl in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj -> & phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups) groups ++ [ (((pId,lvl),length groups)
, f obj pId lvl (length groups) (getRoots phylo) , f obj pId lvl (length groups) (getRoots phylo)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId])) (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
] ) [] phyloFis) ] ) [] phyloCUnit)
else else
phyloLvl ) phyloLvl )
phylo phylo
fisToGroup :: PhyloFis -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup fisToGroup :: PhyloCUnit -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup
fisToGroup fis pId lvl idx fdt coocs = fisToGroup fis pId lvl idx fdt coocs =
let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloFis_clique) fdt let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloCUnit_nodes) fdt
in PhyloGroup pId lvl idx "" in PhyloGroup pId lvl idx ""
(fis ^. phyloFis_support) (fis ^. phyloCUnit_support)
ngrams ngrams
(ngramsToCooc ngrams coocs) (ngramsToCooc ngrams coocs)
(1,[0]) (1,[0])
...@@ -98,11 +98,13 @@ fisToGroup fis pId lvl idx fdt coocs = ...@@ -98,11 +98,13 @@ fisToGroup fis pId lvl idx fdt coocs =
toPhylo1 :: [Document] -> Phylo -> Phylo toPhylo1 :: [Document] -> Phylo -> Phylo
toPhylo1 docs phyloBase = temporalMatching toPhylo1 docs phyloBase = temporalMatching
$ appendGroups fisToGroup 1 phyloFis phyloBase $ appendGroups fisToGroup 1 phyloCUnit phyloBase
where where
-------------------------------------- --------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis] phyloCUnit :: Map (Date,Date) [PhyloCUnit]
phyloFis = toPhyloFis docs' (getFisSupport $ contextualUnit $ getConfig phyloBase) (getFisSize $ contextualUnit $ getConfig phyloBase) phyloCUnit = case (contextualUnit $ getConfig phyloBase) of
Fis s s' -> toPhyloFis docs' s s'
MaxClique _ -> undefined
-------------------------------------- --------------------------------------
docs' :: Map (Date,Date) [Document] docs' :: Map (Date,Date) [Document]
docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs
...@@ -115,30 +117,30 @@ toPhylo1 docs phyloBase = temporalMatching ...@@ -115,30 +117,30 @@ toPhylo1 docs phyloBase = temporalMatching
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False) -- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterFis :: Bool -> Int -> (Int -> [PhyloFis] -> [PhyloFis]) -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis] filterFis :: Bool -> Int -> (Int -> [PhyloCUnit] -> [PhyloCUnit]) -> Map (Date, Date) [PhyloCUnit] -> Map (Date, Date) [PhyloCUnit]
filterFis keep thr f m = case keep of filterFis keep thr f m = case keep of
False -> map (\l -> f thr l) m False -> map (\l -> f thr l) m
True -> map (\l -> keepFilled (f) thr l) m True -> map (\l -> keepFilled (f) thr l) m
-- | To filter Fis with small Support -- | To filter Fis with small Support
filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis] filterFisBySupport :: Int -> [PhyloCUnit] -> [PhyloCUnit]
filterFisBySupport thr l = filter (\fis -> (fis ^. phyloFis_support) >= thr) l filterFisBySupport thr l = filter (\fis -> (fis ^. phyloCUnit_support) >= thr) l
-- | To filter Fis with small Clique size -- | To filter Fis with small Clique size
filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis] filterFisByClique :: Int -> [PhyloCUnit] -> [PhyloCUnit]
filterFisByClique thr l = filter (\fis -> (size $ fis ^. phyloFis_clique) >= thr) l filterFisByClique thr l = filter (\fis -> (size $ fis ^. phyloCUnit_nodes) >= thr) l
-- | To filter nested Fis -- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis] filterFisByNested :: Map (Date, Date) [PhyloCUnit] -> Map (Date, Date) [PhyloCUnit]
filterFisByNested m = filterFisByNested m =
let fis = map (\l -> let fis = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ f' ^. phyloFis_clique) (Set.toList $ f ^. phyloFis_clique)) mem) foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ f' ^. phyloCUnit_nodes) (Set.toList $ f ^. phyloCUnit_nodes)) mem)
then mem then mem
else else
let fMax = filter (\f' -> not $ isNested (Set.toList $ f ^. phyloFis_clique) (Set.toList $ f' ^. phyloFis_clique)) mem let fMax = filter (\f' -> not $ isNested (Set.toList $ f ^. phyloCUnit_nodes) (Set.toList $ f' ^. phyloCUnit_nodes)) mem
in fMax ++ [f] ) [] l) in fMax ++ [f] ) [] l)
$ elems m $ elems m
fis' = fis `using` parList rdeepseq fis' = fis `using` parList rdeepseq
...@@ -146,7 +148,7 @@ filterFisByNested m = ...@@ -146,7 +148,7 @@ filterFisByNested m =
-- | To transform a time map of docs innto a time map of Fis with some filters -- | To transform a time map of docs innto a time map of Fis with some filters
toPhyloFis :: Map (Date, Date) [Document] -> Int -> Int -> Map (Date,Date) [PhyloFis] toPhyloFis :: Map (Date, Date) [Document] -> Int -> Int -> Map (Date,Date) [PhyloCUnit]
toPhyloFis phyloDocs support clique = traceFis "Filtered Fis" toPhyloFis phyloDocs support clique = traceFis "Filtered Fis"
$ filterFisByNested $ filterFisByNested
$ traceFis "Filtered by clique size" $ traceFis "Filtered by clique size"
...@@ -156,10 +158,10 @@ toPhyloFis phyloDocs support clique = traceFis "Filtered Fis" ...@@ -156,10 +158,10 @@ toPhyloFis phyloDocs support clique = traceFis "Filtered Fis"
$ traceFis "Unfiltered Fis" phyloFis $ traceFis "Unfiltered Fis" phyloFis
where where
-------------------------------------- --------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis] phyloFis :: Map (Date,Date) [PhyloCUnit]
phyloFis = phyloFis =
let fis = map (\(prd,docs) -> let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs) let fis = map (\(prd,docs) -> let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in (prd, map (\f -> PhyloFis (fst f) (snd f) prd) lst)) in (prd, map (\f -> PhyloCUnit (fst f) (snd f) prd) lst))
$ toList phyloDocs $ toList phyloDocs
fis' = fis `using` parList rdeepseq fis' = fis `using` parList rdeepseq
in fromList fis' in fromList fis'
...@@ -209,9 +211,9 @@ groupDocsByPeriod f pds es = ...@@ -209,9 +211,9 @@ groupDocsByPeriod f pds es =
docsToTermFreq :: [Document] -> Vector Ngrams -> Map Int Double docsToTermFreq :: [Document] -> Vector Ngrams -> Map Int Double
docsToTermFreq docs fdt = docsToTermFreq docs fdt =
let nbDocs = fromIntegral $ length docs let nbDocs = fromIntegral $ length docs
freqs = map (/nbDocs) freqs = map (/(log nbDocs))
$ fromList $ fromList
$ map (\lst -> (head' "docsToTermFreq" lst, fromIntegral $ length lst)) $ map (\lst -> (head' "docsToTermFreq" lst, log $ fromIntegral $ length lst))
$ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) docs $ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) docs
sumFreqs = sum $ elems freqs sumFreqs = sum $ elems freqs
in map (/sumFreqs) freqs in map (/sumFreqs) freqs
......
...@@ -162,28 +162,28 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l) ...@@ -162,28 +162,28 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l)
else f thr l else f thr l
traceClique :: Map (Date, Date) [PhyloFis] -> String traceClique :: Map (Date, Date) [PhyloCUnit] -> String
traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6] traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where where
-------------------------------------- --------------------------------------
cliques :: [Double] cliques :: [Double]
cliques = sort $ map (fromIntegral . size . _phyloFis_clique) $ concat $ elems mFis cliques = sort $ map (fromIntegral . size . _phyloCUnit_nodes) $ concat $ elems mFis
-------------------------------------- --------------------------------------
traceSupport :: Map (Date, Date) [PhyloFis] -> String traceSupport :: Map (Date, Date) [PhyloCUnit] -> String
traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6] traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where where
-------------------------------------- --------------------------------------
supports :: [Double] supports :: [Double]
supports = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems mFis supports = sort $ map (fromIntegral . _phyloCUnit_support) $ concat $ elems mFis
-------------------------------------- --------------------------------------
traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis] traceFis :: [Char] -> Map (Date, Date) [PhyloCUnit] -> Map (Date, Date) [PhyloCUnit]
traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n" traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
<> "Support : " <> (traceSupport mFis) <> "\n" <> "Support : " <> (traceSupport mFis) <> "\n"
<> "Clique : " <> (traceClique mFis) <> "\n" ) mFis <> "Nb Ngrams : " <> (traceClique mFis) <> "\n" ) mFis
------------------------- -------------------------
...@@ -191,15 +191,15 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l ...@@ -191,15 +191,15 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l
------------------------- -------------------------
getFisSupport :: ContextualUnit -> Int getContextualUnitSupport :: ContextualUnit -> Int
getFisSupport unit = case unit of getContextualUnitSupport unit = case unit of
Fis s _ -> s Fis s _ -> s
-- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support") MaxClique _ -> 0
getFisSize :: ContextualUnit -> Int getContextualUnitSize :: ContextualUnit -> Int
getFisSize unit = case unit of getContextualUnitSize unit = case unit of
Fis _ s -> s Fis _ s -> s
-- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size") MaxClique s -> s
-------------- --------------
......
...@@ -15,7 +15,7 @@ Portability : POSIX ...@@ -15,7 +15,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.TemporalMatching where module Gargantext.Viz.Phylo.TemporalMatching where
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, delete, or) import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, or)
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, intersectionWith, findWithDefault, keys, (!), filterWithKey) import Data.Map (Map, fromList, elems, restrictKeys, unionWith, intersectionWith, findWithDefault, keys, (!), filterWithKey)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -234,61 +234,10 @@ phyloBranchMatching frame periods proximity thr docs branch = traceBranchMatchin ...@@ -234,61 +234,10 @@ phyloBranchMatching frame periods proximity thr docs branch = traceBranchMatchin
----------------------- -----------------------
count :: Eq a => a -> [a] -> Int
count x = length . filter (== x)
termFreq' :: Int -> [PhyloGroup] -> Double
termFreq' term groups =
let ngrams = concat $ map _phylo_groupNgrams groups
in log ((fromIntegral $ count term ngrams)
/ (fromIntegral $ length ngrams))
relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]] relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]]
relevantBranches term branches = relevantBranches term branches =
filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches
branchCov' :: [PhyloGroup] -> [[PhyloGroup]] -> Double
branchCov' branch branches =
(fromIntegral $ length branch) / (fromIntegral $ length $ concat branches)
toRecall :: Double -> Int -> Int -> [[PhyloGroup]] -> Double
toRecall freq term border branches =
-- | given a random term in a phylo
freq
-- | for each relevant branches
* (sum $ map (\branch ->
-- | given its local coverage
((branchCov' branch branches') / (sum $ map (\b -> branchCov' b branches') branches'))
-- | compute the local recall
* ( (fromIntegral $ length $ filter (\group -> elem term $ group ^. phylo_groupNgrams) branch)
/ ( (fromIntegral $ length $ filter (\group -> elem term $ group ^. phylo_groupNgrams) $ concat branches')
-- | with a ponderation from border branches
+ (fromIntegral border)) )) branches')
where
branches' :: [[PhyloGroup]]
branches' = relevantBranches term branches
toAccuracy :: Double -> Int -> [[PhyloGroup]] -> Double
toAccuracy freq term branches =
if (null branches)
then 0
else
-- | given a random term in a phylo
freq
-- | for each relevant branches
* (sum $ map (\branch ->
-- | given its local coverage
((branchCov' branch branches') / (sum $ map (\b -> branchCov' b branches') branches'))
-- | compute the local accuracy
* ( (fromIntegral $ length $ filter (\group -> elem term $ group ^. phylo_groupNgrams) branch)
/ (fromIntegral $ length branch))) branches')
where
branches' :: [[PhyloGroup]]
branches' = relevantBranches term branches
fScore :: Double -> Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double fScore :: Double -> Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
fScore beta i bk bks = fScore beta i bk bks =
let recall = ( (fromIntegral $ length $ filter (\g -> elem i $ g ^. phylo_groupNgrams) bk) let recall = ( (fromIntegral $ length $ filter (\g -> elem i $ g ^. phylo_groupNgrams) bk)
...@@ -314,28 +263,6 @@ toPhyloQuality' beta freq branches = ...@@ -314,28 +263,6 @@ toPhyloQuality' beta freq branches =
$ keys freq $ keys freq
toPhyloQuality :: Double -> Map Int Double -> Int -> Double -> [[PhyloGroup]] -> Double
toPhyloQuality beta frequency border oldAcc branches =
-- trace (" rec : " <> show(recall)) $
-- trace (" acc : " <> show(accuracy)) $
if (null branches)
then 0
else ((1 + beta ** 2) * accuracy * recall)
/ (((beta ** 2) * accuracy + recall))
where
-- | for each term compute the global accuracy
accuracy :: Double
accuracy = oldAcc + (sum $ map (\term -> toAccuracy (frequency ! term) term branches) $ keys frequency)
-- | for each term compute the global recall
recall :: Double
recall = sum $ map (\term -> toRecall (frequency ! term) term border branches) $ keys frequency
toBorderAccuracy :: Map Int Double -> [[PhyloGroup]] -> Double
toBorderAccuracy freq branches = sum $ map (\t -> toAccuracy (freq ! t) t branches) $ keys freq
----------------------------- -----------------------------
-- | Adaptative Matching | -- -- | Adaptative Matching | --
----------------------------- -----------------------------
...@@ -366,12 +293,6 @@ reduceFrequency frequency branches = ...@@ -366,12 +293,6 @@ reduceFrequency frequency branches =
restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches) restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
alterBorder :: Int -> [[PhyloGroup]] -> [PhyloGroup] -> Int
alterBorder border branches branch = border + (length $ concat branches) - (length branch)
-- | Important ne pas virer les filtree mais les mettre en false
seqMatching :: Proximity -> Double -> Map Int Double -> Int -> Double -> Int -> Map Date Double -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)] seqMatching :: Proximity -> Double -> Map Int Double -> Int -> Double -> Int -> Map Date Double -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
seqMatching proximity beta frequency minBranch egoThr frame docs periods done ego rest = seqMatching proximity beta frequency minBranch egoThr frame docs periods done ego rest =
-- | 1) keep or not the new division of ego -- | 1) keep or not the new division of ego
...@@ -422,38 +343,6 @@ recursiveMatching' proximity beta minBranch frequency egoThr frame periods docs ...@@ -422,38 +343,6 @@ recursiveMatching' proximity beta minBranch frequency egoThr frame periods docs
in recursiveMatching' proximity beta minBranch frequency' (egoThr + (getThresholdStep proximity)) frame periods docs branches' in recursiveMatching' proximity beta minBranch frequency' (egoThr + (getThresholdStep proximity)) frame periods docs branches'
recursiveMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Int -> [PhyloPeriodId] -> Map Date Double -> Double -> Int -> Double -> [PhyloGroup] -> [PhyloGroup]
recursiveMatching proximity beta minBranch frequency egoThr frame periods docs quality border oldAcc groups =
if ((egoThr >= 1) || (quality > quality') || ((length $ concat $ snd branches') == (length groups)))
then
trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality') <> "\n"
<> " |✓ " <> show(length $ fst branches') <> show(map length $ fst branches')
<> " |✗ " <> show(length $ snd branches') <> "[" <> show(length $ concat $ snd branches') <> "]") $
groups
else
let next = map (\b -> recursiveMatching proximity beta minBranch
(reduceFrequency frequency (fst branches'))
(egoThr + (getThresholdStep proximity))
frame periods docs quality'
(alterBorder border (fst branches') b)
(oldAcc + (toBorderAccuracy frequency (delete b ((fst branches') ++ (snd branches')))))
b ) (fst branches')
in trace (" ✓ F(β) = " <> show(quality) <> " (vs) " <> show(quality') <> "\n"
<> " |✓ " <> show(length $ fst branches') <> show(map length $ fst branches')
<> " |✗ " <> show(length $ snd branches') <> "[" <> show(length $ concat $ snd branches') <> "]") $
concat (next ++ (snd branches'))
where
-- | 2) for each of the possible next branches process the phyloQuality score
quality' :: Double
quality' = toPhyloQuality beta frequency border oldAcc ((fst branches') ++ (snd branches'))
-- | 1) for each local branch process a temporal matching then find the resulting branches
branches' :: ([[PhyloGroup]],[[PhyloGroup]])
branches' =
let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ phyloBranchMatching frame periods proximity egoThr docs groups
in partition (\b -> length b >= minBranch) (branches `using` parList rdeepseq)
temporalMatching :: Phylo -> Phylo temporalMatching :: Phylo -> Phylo
temporalMatching phylo = updatePhyloGroups 1 temporalMatching phylo = updatePhyloGroups 1
(fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches) (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
...@@ -463,7 +352,7 @@ temporalMatching phylo = updatePhyloGroups 1 ...@@ -463,7 +352,7 @@ temporalMatching phylo = updatePhyloGroups 1
branches :: [[PhyloGroup]] branches :: [[PhyloGroup]]
branches = map fst branches = map fst
$ recursiveMatching' (phyloProximity $ getConfig phylo) $ recursiveMatching' (phyloProximity $ getConfig phylo)
(_qua_relevance $ phyloQuality $ getConfig phylo) (_qua_granularity $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo) (_qua_minBranch $ phyloQuality $ getConfig phylo)
(phylo ^. phylo_termFreq) (phylo ^. phylo_termFreq)
(getThresholdInit $ phyloProximity $ getConfig phylo) (getThresholdInit $ phyloProximity $ getConfig phylo)
...@@ -477,47 +366,3 @@ temporalMatching phylo = updatePhyloGroups 1 ...@@ -477,47 +366,3 @@ temporalMatching phylo = updatePhyloGroups 1
(phyloProximity $ getConfig phylo) (getThresholdInit $ phyloProximity $ getConfig phylo) (phyloProximity $ getConfig phylo) (getThresholdInit $ phyloProximity $ getConfig phylo)
(phylo ^. phylo_timeDocs) (phylo ^. phylo_timeDocs)
(traceTemporalMatching $ getGroupsFromLevel 1 phylo) (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
\ No newline at end of file
temporalMatching' :: Phylo -> Phylo
temporalMatching' phylo = updatePhyloGroups 1 branches' phylo
where
-- | 5) apply the recursive matching
branches' :: Map PhyloGroupId PhyloGroup
branches' =
let next = trace (" ✓ F(β) = " <> show(quality)
<> " |✓ " <> show(length $ fst branches) <> show(map length $ fst branches)
<> " |✗ " <> show(length $ snd branches) <> "[" <> show(length $ concat $ snd branches) <> "]")
$ map (\branch -> recursiveMatching (phyloProximity $ getConfig phylo)
(_qua_relevance $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo)
(reduceFrequency frequency (fst branches))
( (getThresholdInit $ phyloProximity $ getConfig phylo)
+ (getThresholdStep $ phyloProximity $ getConfig phylo))
(getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo)
(phylo ^. phylo_timeDocs) quality (alterBorder 0 (fst branches) branch)
(toBorderAccuracy frequency (delete branch ((fst branches) ++ (snd branches))))
branch
) (fst branches)
in fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd (concat (next ++ (snd branches)))
-- | 4) process the quality score
quality :: Double
quality = toPhyloQuality (_qua_relevance $ phyloQuality $ getConfig phylo) frequency 0 0 ((fst branches) ++ (snd branches))
-- | 3) process the constants of the quality score
frequency :: Map Int Double
frequency =
let terms = ngramsInBranches ((fst branches) ++ (snd branches))
freqs = map (\t -> termFreq' t $ concat ((fst branches) ++ (snd branches))) terms
in fromList $ map (\(t,freq) -> (t,freq/(sum freqs))) $ zip terms freqs
-- | 2) group into branches
branches :: ([[PhyloGroup]],[[PhyloGroup]])
branches = partition (\b -> length b >= (_qua_minBranch $ phyloQuality $ getConfig phylo))
$ groupsToBranches $ fromList $ map (\group -> (getGroupId group, group)) groups'
-- | 1) for each group process an initial temporal Matching
groups' :: [PhyloGroup]
groups' = phyloBranchMatching (getTimeFrame $ timeUnit $ getConfig phylo) (getPeriodIds phylo)
(phyloProximity $ getConfig phylo) (getThresholdInit $ phyloProximity $ getConfig phylo)
(phylo ^. phylo_timeDocs)
(traceTemporalMatching $ getGroupsFromLevel 1 phylo)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment