Commit 2120f449 authored by qlobbe's avatar qlobbe Committed by Alexandre Delanoë

refactoring

parent c6d78653
...@@ -152,7 +152,7 @@ seaToLabel config = case (seaElevation config) of ...@@ -152,7 +152,7 @@ seaToLabel config = case (seaElevation config) of
sensToLabel :: PhyloConfig -> [Char] sensToLabel :: PhyloConfig -> [Char]
sensToLabel config = case (phyloProximity config) of sensToLabel config = case (similarity config) of
Hamming _ _ -> undefined Hamming _ _ -> undefined
WeightedLogJaccard s _ -> ("WeightedLogJaccard_" <> show s) WeightedLogJaccard s _ -> ("WeightedLogJaccard_" <> show s)
WeightedLogSim s _ -> ( "WeightedLogSim-sens_" <> show s) WeightedLogSim s _ -> ( "WeightedLogSim-sens_" <> show s)
......
...@@ -71,7 +71,7 @@ data SeaElevation = ...@@ -71,7 +71,7 @@ data SeaElevation =
instance ToSchema SeaElevation instance ToSchema SeaElevation
data Proximity = data Similarity =
WeightedLogJaccard WeightedLogJaccard
{ _wlj_sensibility :: Double { _wlj_sensibility :: Double
, _wlj_minSharedNgrams :: Int } , _wlj_minSharedNgrams :: Int }
...@@ -84,7 +84,7 @@ data Proximity = ...@@ -84,7 +84,7 @@ data Proximity =
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
instance ToSchema Proximity where instance ToSchema Similarity where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
...@@ -179,7 +179,7 @@ data PhyloConfig = ...@@ -179,7 +179,7 @@ data PhyloConfig =
, listParser :: ListParser , listParser :: ListParser
, phyloName :: Text , phyloName :: Text
, phyloScale :: Int , phyloScale :: Int
, phyloProximity :: Proximity , similarity :: Similarity
, seaElevation :: SeaElevation , seaElevation :: SeaElevation
, findAncestors :: Bool , findAncestors :: Bool
, phyloSynchrony :: Synchrony , phyloSynchrony :: Synchrony
...@@ -205,7 +205,7 @@ data PhyloSubConfig = ...@@ -205,7 +205,7 @@ data PhyloSubConfig =
subConfig2config :: PhyloSubConfig -> PhyloConfig subConfig2config :: PhyloSubConfig -> PhyloConfig
subConfig2config subConfig = defaultConfig { phyloProximity = WeightedLogJaccard (_sc_phyloProximity subConfig) 1 subConfig2config subConfig = defaultConfig { similarity = WeightedLogJaccard (_sc_phyloProximity subConfig) 1
, phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups , phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups
, phyloQuality = Quality (_sc_phyloQuality subConfig) 1 , phyloQuality = Quality (_sc_phyloQuality subConfig) 1
, timeUnit = _sc_timeUnit subConfig , timeUnit = _sc_timeUnit subConfig
...@@ -223,7 +223,7 @@ defaultConfig = ...@@ -223,7 +223,7 @@ defaultConfig =
, listParser = V4 , listParser = V4
, phyloName = pack "Phylo Name" , phyloName = pack "Phylo Name"
, phyloScale = 2 , phyloScale = 2
, phyloProximity = WeightedLogJaccard 0.5 1 , similarity = WeightedLogJaccard 0.5 1
, seaElevation = Constante 0.1 0.1 , seaElevation = Constante 0.1 0.1
, findAncestors = False , findAncestors = False
, phyloSynchrony = ByProximityThreshold 0.5 0 AllBranches MergeAllGroups , phyloSynchrony = ByProximityThreshold 0.5 0 AllBranches MergeAllGroups
...@@ -251,8 +251,8 @@ instance ToJSON CorpusParser ...@@ -251,8 +251,8 @@ instance ToJSON CorpusParser
instance FromJSON ListParser instance FromJSON ListParser
instance ToJSON ListParser instance ToJSON ListParser
instance FromJSON Proximity instance FromJSON Similarity
instance ToJSON Proximity instance ToJSON Similarity
instance FromJSON SeaElevation instance FromJSON SeaElevation
instance ToJSON SeaElevation instance ToJSON SeaElevation
...@@ -592,7 +592,7 @@ instance ToSchema PhyloExport where ...@@ -592,7 +592,7 @@ instance ToSchema PhyloExport where
makeLenses ''PhyloConfig makeLenses ''PhyloConfig
makeLenses ''PhyloSubConfig makeLenses ''PhyloSubConfig
makeLenses ''Proximity makeLenses ''Similarity
makeLenses ''SeaElevation makeLenses ''SeaElevation
makeLenses ''Quality makeLenses ''Quality
makeLenses ''Cluster makeLenses ''Cluster
......
...@@ -25,7 +25,7 @@ import Data.Vector (Vector) ...@@ -25,7 +25,7 @@ import Data.Vector (Vector)
import Debug.Trace (trace) import Debug.Trace (trace)
import Gargantext.Core.Viz.Phylo import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods) import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toSimilarity, getNextPeriods)
import Gargantext.Prelude hiding (scale) import Gargantext.Prelude hiding (scale)
import Prelude (writeFile) import Prelude (writeFile)
import System.FilePath import System.FilePath
...@@ -288,9 +288,9 @@ exportToDot phylo export = ...@@ -288,9 +288,9 @@ exportToDot phylo export =
{- 8) create the edges between the branches {- 8) create the edges between the branches
-- _ <- mapM (\(bId,bId') -> -- _ <- mapM (\(bId,bId') ->
-- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId') -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
-- (Text.pack $ show(branchIdsToProximity bId bId' -- (Text.pack $ show(branchIdsToSimilarity bId bId'
-- (getThresholdInit $ phyloProximity $ getConfig phylo) -- (getThresholdInit $ phyloSimilarity $ getConfig phylo)
-- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch -- (getThresholdStep $ phyloSimilarity $ getConfig phylo))) BranchToBranch
-- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
-} -}
...@@ -595,23 +595,23 @@ getGroupThr step g = ...@@ -595,23 +595,23 @@ getGroupThr step g =
breaks = (g ^. phylo_groupMeta) ! "breaks" breaks = (g ^. phylo_groupMeta) ! "breaks"
in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup toAncestor :: Double -> Map Int Double -> Similarity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
toAncestor nbDocs diago proximity step candidates ego = toAncestor nbDocs diago similarity step candidates ego =
let curr = ego ^. phylo_groupAncestors let curr = ego ^. phylo_groupAncestors
in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w)) in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
$ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g)))) $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
$ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams))) $ map (\g -> (g, toSimilarity nbDocs diago similarity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
$ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates)) $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup] headsToAncestors :: Double -> Map Int Double -> Similarity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
headsToAncestors nbDocs diago proximity step heads acc = headsToAncestors nbDocs diago similarity step heads acc =
if (null heads) if (null heads)
then acc then acc
else else
let ego = head' "headsToAncestors" heads let ego = head' "headsToAncestors" heads
heads' = tail' "headsToAncestors" heads heads' = tail' "headsToAncestors" heads
in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego]) in headsToAncestors nbDocs diago similarity step heads' (acc ++ [toAncestor nbDocs diago similarity step heads' ego])
toHorizon :: Phylo -> Phylo toHorizon :: Phylo -> Phylo
...@@ -645,13 +645,13 @@ toHorizon phylo = ...@@ -645,13 +645,13 @@ toHorizon phylo =
noHeads = groups \\ heads noHeads = groups \\ heads
nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd] nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd] diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
proximity = (phyloProximity $ getConfig phylo) sim = (similarity $ getConfig phylo)
step = case getSeaElevation phylo of step = case getSeaElevation phylo of
Constante _ s -> s Constante _ s -> s
Adaptative _ -> 0 Adaptative _ -> 0
-- in headsToAncestors nbDocs diago proximity heads groups [] -- in headsToAncestors nbDocs diago Similarity heads groups []
in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego) in map (\ego -> toAncestor nbDocs diago sim step noHeads ego)
$ headsToAncestors nbDocs diago proximity step heads [] $ headsToAncestors nbDocs diago sim step heads []
) periods ) periods
-- | 3) process this task concurrently -- | 3) process this task concurrently
newGroups :: [[PhyloGroup]] newGroups :: [[PhyloGroup]]
......
...@@ -31,10 +31,11 @@ import Gargantext.Core.Viz.Phylo ...@@ -31,10 +31,11 @@ import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon) import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity) import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toSimilarity)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.List as List
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
------------------ ------------------
...@@ -78,6 +79,13 @@ toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGrou ...@@ -78,6 +79,13 @@ toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGrou
----------------------------- -----------------------------
{-
-- create a square ladder
-}
squareLadder :: [Double] -> [Double]
squareLadder ladder = List.map (\x -> x * x) ladder
{- {-
-- create an adaptative diachronic 'sea elevation' ladder -- create an adaptative diachronic 'sea elevation' ladder
-} -}
...@@ -107,7 +115,7 @@ constDiachronicLadder curr step ladder = ...@@ -107,7 +115,7 @@ constDiachronicLadder curr step ladder =
-} -}
scanSimilarity :: Scale -> Phylo -> Phylo scanSimilarity :: Scale -> Phylo -> Phylo
scanSimilarity lvl phylo = scanSimilarity lvl phylo =
let proximity = phyloProximity $ getConfig phylo let proximity = similarity $ getConfig phylo
scanning = foldlWithKey (\acc pId pds -> scanning = foldlWithKey (\acc pId pds ->
-- 1) process period by period -- 1) process period by period
let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams))
...@@ -124,7 +132,7 @@ scanSimilarity lvl phylo = ...@@ -124,7 +132,7 @@ scanSimilarity lvl phylo =
map (\(id',ngrams') -> map (\(id',ngrams') ->
let nbDocs = (sum . elems) $ filterDocs docs ([idToPrd id, idToPrd id']) let nbDocs = (sum . elems) $ filterDocs docs ([idToPrd id, idToPrd id'])
diago = reduceDiagos $ filterDiago diagos ([idToPrd id, idToPrd id']) diago = reduceDiagos $ filterDiago diagos ([idToPrd id, idToPrd id'])
in ((id,id'),toProximity nbDocs diago proximity ngrams ngrams' ngrams') in ((id,id'),toSimilarity nbDocs diago proximity ngrams ngrams' ngrams')
) $ filter (\(_,ngrams') -> (not . null) $ intersect ngrams ngrams') targets ) $ filter (\(_,ngrams') -> (not . null) $ intersect ngrams ngrams') targets
) egos ) egos
pairs' = pairs `using` parList rdeepseq pairs' = pairs `using` parList rdeepseq
...@@ -173,7 +181,7 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx "" ...@@ -173,7 +181,7 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
addTemporalLinksToPhylo :: Phylo -> Phylo addTemporalLinksToPhylo :: Phylo -> Phylo
addTemporalLinksToPhylo phylowithoutLink = case strategy of addTemporalLinksToPhylo phylowithoutLink = case strategy of
Constante start gap -> temporalMatching (constDiachronicLadder start gap Set.empty) phylowithoutLink Constante start gap -> temporalMatching (constDiachronicLadder start gap Set.empty) phylowithoutLink
Adaptative steps -> temporalMatching (adaptDiachronicLadder steps (phylowithoutLink ^. phylo_diaSimScan) Set.empty) phylowithoutLink Adaptative steps -> temporalMatching (squareLadder $ adaptDiachronicLadder steps (phylowithoutLink ^. phylo_diaSimScan) Set.empty) phylowithoutLink
where where
strategy :: SeaElevation strategy :: SeaElevation
strategy = getSeaElevation phylowithoutLink strategy = getSeaElevation phylowithoutLink
......
...@@ -330,16 +330,16 @@ getPeriodPointers fil g = ...@@ -330,16 +330,16 @@ getPeriodPointers fil g =
ToChildsMemory -> undefined ToChildsMemory -> undefined
ToParentsMemory -> undefined ToParentsMemory -> undefined
filterProximity :: Proximity -> Double -> Double -> Bool filterSimilarity :: Similarity -> Double -> Double -> Bool
filterProximity proximity thr local = filterSimilarity similarity thr local =
case proximity of case similarity of
WeightedLogJaccard _ _ -> local >= thr WeightedLogJaccard _ _ -> local >= thr
WeightedLogSim _ _ -> local >= thr WeightedLogSim _ _ -> local >= thr
Hamming _ _ -> undefined Hamming _ _ -> undefined
getProximityName :: Proximity -> String getSimilarityName :: Similarity -> String
getProximityName proximity = getSimilarityName similarity =
case proximity of case similarity of
WeightedLogJaccard _ _ -> "WLJaccard" WeightedLogJaccard _ _ -> "WLJaccard"
WeightedLogSim _ _ -> "WeightedLogSim" WeightedLogSim _ _ -> "WeightedLogSim"
Hamming _ _ -> "Hamming" Hamming _ _ -> "Hamming"
...@@ -578,16 +578,16 @@ traceSynchronyStart phylo = ...@@ -578,16 +578,16 @@ traceSynchronyStart phylo =
------------------- -------------------
-- | Proximity | -- -- | Similarity | --
------------------- -------------------
getSensibility :: Proximity -> Double getSensibility :: Similarity -> Double
getSensibility proxi = case proxi of getSensibility proxi = case proxi of
WeightedLogJaccard s _ -> s WeightedLogJaccard s _ -> s
WeightedLogSim s _ -> s WeightedLogSim s _ -> s
Hamming _ _ -> undefined Hamming _ _ -> undefined
getMinSharedNgrams :: Proximity -> Int getMinSharedNgrams :: Similarity -> Int
getMinSharedNgrams proxi = case proxi of getMinSharedNgrams proxi = case proxi of
WeightedLogJaccard _ m -> m WeightedLogJaccard _ m -> m
WeightedLogSim _ m -> m WeightedLogSim _ m -> m
...@@ -605,8 +605,8 @@ intersectInit acc lst lst' = ...@@ -605,8 +605,8 @@ intersectInit acc lst lst' =
then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst') then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
else acc else acc
branchIdsToProximity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double branchIdsToSimilarity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
branchIdsToProximity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id')) branchIdsToSimilarity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))
ngramsInBranches :: [[PhyloGroup]] -> [Int] ngramsInBranches :: [[PhyloGroup]] -> [Int]
ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
...@@ -662,4 +662,4 @@ traceTemporalMatching groups = ...@@ -662,4 +662,4 @@ traceTemporalMatching groups =
traceGroupsProxi :: [Double] -> [Double] traceGroupsProxi :: [Double] -> [Double]
traceGroupsProxi l = traceGroupsProxi l =
trace ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups proximity" <> "\n") l trace ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups Similarity" <> "\n") l
...@@ -124,7 +124,7 @@ toDiamonds groups = foldl' (\acc groups' -> ...@@ -124,7 +124,7 @@ toDiamonds groups = foldl' (\acc groups' ->
$ foldl' (\acc g -> acc ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodParents) ) [] groups $ foldl' (\acc g -> acc ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodParents) ) [] groups
groupsToEdges :: Proximity -> Synchrony -> Double -> Map Int Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] groupsToEdges :: Similarity -> Synchrony -> Double -> Map Int Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
groupsToEdges prox sync nbDocs diago groups = groupsToEdges prox sync nbDocs diago groups =
case sync of case sync of
ByProximityThreshold thr sens _ strat -> ByProximityThreshold thr sens _ strat ->
...@@ -153,7 +153,7 @@ toParentId :: PhyloGroup -> PhyloGroupId ...@@ -153,7 +153,7 @@ toParentId :: PhyloGroup -> PhyloGroupId
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupScale + 1), child ^. phylo_groupIndex) toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupScale + 1), child ^. phylo_groupIndex)
reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup] reduceGroups :: Similarity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
reduceGroups prox sync docs diagos branch = reduceGroups prox sync docs diagos branch =
-- 1) reduce a branch as a set of periods & groups -- 1) reduce a branch as a set of periods & groups
let periods = fromListWith (++) let periods = fromListWith (++)
...@@ -197,7 +197,7 @@ levelUpAncestors groups = ...@@ -197,7 +197,7 @@ levelUpAncestors groups =
synchronicClustering :: Phylo -> Phylo synchronicClustering :: Phylo -> Phylo
synchronicClustering phylo = synchronicClustering phylo =
let prox = phyloProximity $ getConfig phylo let prox = similarity $ getConfig phylo
sync = phyloSynchrony $ getConfig phylo sync = phyloSynchrony $ getConfig phylo
docs = phylo ^. phylo_timeDocs docs = phylo ^. phylo_timeDocs
diagos = map coocToDiago $ phylo ^. phylo_timeCooc diagos = map coocToDiago $ phylo ^. phylo_timeCooc
......
...@@ -119,9 +119,9 @@ weightedLogSim' sens nbDocs diago ego_ngrams target_ngrams ...@@ -119,9 +119,9 @@ weightedLogSim' sens nbDocs diago ego_ngrams target_ngrams
{- {-
-- perform a seamilarity measure between a given group and a pair of targeted groups -- perform a seamilarity measure between a given group and a pair of targeted groups
-} -}
toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double toSimilarity :: Double -> Map Int Double -> Similarity -> [Int] -> [Int] -> [Int] -> Double
toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' = toSimilarity nbDocs diago similarity egoNgrams targetNgrams targetNgrams' =
case proximity of case similarity of
WeightedLogJaccard sens _ -> WeightedLogJaccard sens _ ->
let pairNgrams = if targetNgrams == targetNgrams' let pairNgrams = if targetNgrams == targetNgrams'
then targetNgrams then targetNgrams
...@@ -147,7 +147,7 @@ findLastPeriod fil periods = case fil of ...@@ -147,7 +147,7 @@ findLastPeriod fil periods = case fil of
ToChildsMemory -> undefined ToChildsMemory -> undefined
ToParentsMemory -> undefined ToParentsMemory -> undefined
removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> Period removeOldPointers :: [Pointer] -> Filiation -> Double -> Similarity -> Period
-> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))] -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
-> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))] -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
removeOldPointers oldPointers fil thr prox prd pairs removeOldPointers oldPointers fil thr prox prd pairs
...@@ -166,11 +166,11 @@ removeOldPointers oldPointers fil thr prox prd pairs ...@@ -166,11 +166,11 @@ removeOldPointers oldPointers fil thr prox prd pairs
|| (((fst . fst . fst) id') > (fst lastMatchedPrd))) pairs || (((fst . fst . fst) id') > (fst lastMatchedPrd))) pairs
| otherwise = [] | otherwise = []
filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer] filterPointers :: Similarity -> Double -> [Pointer] -> [Pointer]
filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts filterPointers proxi thr pts = filter (\(_,w) -> filterSimilarity proxi thr w) pts
filterPointers' :: Proximity -> Double -> [(Pointer,[Int])] -> [(Pointer,[Int])] filterPointers' :: Similarity -> Double -> [(Pointer,[Int])] -> [(Pointer,[Int])]
filterPointers' proxi thr pts = filter (\((_,w),_) -> filterProximity proxi thr w) pts filterPointers' proxi thr pts = filter (\((_,w),_) -> filterSimilarity proxi thr w) pts
reduceDiagos :: Map Date Cooc -> Map Int Double reduceDiagos :: Map Date Cooc -> Map Int Double
...@@ -231,7 +231,7 @@ groupsToBranches groups = ...@@ -231,7 +231,7 @@ groupsToBranches groups =
{- {-
-- find the best pair/singleton of parents/childs for a given group -- find the best pair/singleton of parents/childs for a given group
-} -}
makePairs :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [Period] -> [Pointer] -> Filiation -> Double -> Proximity makePairs :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [Period] -> [Pointer] -> Filiation -> Double -> Similarity
-> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))] -> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos = makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos =
if (null periods) if (null periods)
...@@ -248,7 +248,7 @@ makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs di ...@@ -248,7 +248,7 @@ makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs di
$ filter (\(id,ngrams) -> $ filter (\(id,ngrams) ->
let nbDocs = (sum . elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id]) let nbDocs = (sum . elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id])
diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id]) diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id])
in (toProximity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr in (toSimilarity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr
) candidates ) candidates
-------------------------------------- --------------------------------------
lastPrd :: Period lastPrd :: Period
...@@ -258,7 +258,7 @@ makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs di ...@@ -258,7 +258,7 @@ makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs di
{- {-
-- find the best temporal links between a given group and its parents/childs -- find the best temporal links between a given group and its parents/childs
-} -}
phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Similarity -> Map Date Double -> Map Date Cooc
-> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer] -> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ngrams) = phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ngrams) =
if (null $ filterPointers proxi thr oldPointers) if (null $ filterPointers proxi thr oldPointers)
...@@ -266,10 +266,10 @@ phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ng ...@@ -266,10 +266,10 @@ phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ng
then if null nextPointers then if null nextPointers
then [] then []
else filterPointersByPeriod filiation else filterPointersByPeriod filiation
-- 2) keep only the best set of pointers grouped by proximity -- 2) keep only the best set of pointers grouped by Similarity
$ head' "phyloGroupMatching" $ head' "phyloGroupMatching"
$ groupBy (\pt pt' -> (snd . fst) pt == (snd . fst) pt') $ groupBy (\pt pt' -> (snd . fst) pt == (snd . fst) pt')
-- 1) find the first time frame where at leats one pointer satisfies the proximity threshold -- 1) find the first time frame where at leats one pointer satisfies the Similarity threshold
$ sortBy (comparing (Down . snd . fst)) $ head' "pointers" nextPointers $ sortBy (comparing (Down . snd . fst)) $ head' "pointers" nextPointers
else oldPointers else oldPointers
where where
...@@ -277,29 +277,29 @@ phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ng ...@@ -277,29 +277,29 @@ phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ng
nextPointers = take 1 nextPointers = take 1
-- stop as soon as we find a time frame where at least one singleton / pair satisfies the threshold -- stop as soon as we find a time frame where at least one singleton / pair satisfies the threshold
$ dropWhile (null) $ dropWhile (null)
-- for each time frame, process the proximity on relevant pairs of targeted groups -- for each time frame, process the Similarity on relevant pairs of targeted groups
$ scanl (\acc targets -> $ scanl (\acc targets ->
let periods = nub $ map (fst . fst . fst) targets let periods = nub $ map (fst . fst . fst) targets
lastPrd = findLastPeriod filiation periods lastPrd = findLastPeriod filiation periods
nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods)) nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods))
diago = reduceDiagos diago = reduceDiagos
$ filterDiago diagos ([(fst . fst) id] ++ periods) $ filterDiago diagos ([(fst . fst) id] ++ periods)
singletons = processProximity nbdocs diago $ map (\g -> (g,g)) $ filter (\g -> (fst . fst . fst) g == lastPrd) targets singletons = processSimilarity nbdocs diago $ map (\g -> (g,g)) $ filter (\g -> (fst . fst . fst) g == lastPrd) targets
pairs = makePairs (id,ngrams) targets periods oldPointers filiation thr proxi docs diagos pairs = makePairs (id,ngrams) targets periods oldPointers filiation thr proxi docs diagos
in in
if (null singletons) if (null singletons)
then acc ++ ( processProximity nbdocs diago pairs ) then acc ++ ( processSimilarity nbdocs diago pairs )
else acc ++ singletons else acc ++ singletons
) [] $ map concat $ inits candidates -- groups from [[1900],[1900,1901],[1900,1901,1902],...] ) [] $ map concat $ inits candidates -- groups from [[1900],[1900,1901],[1900,1901,1902],...]
----------------------------- -----------------------------
processProximity :: Double -> Map Int Double -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))] -> [(Pointer,[Int])] processSimilarity :: Double -> Map Int Double -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))] -> [(Pointer,[Int])]
processProximity nbdocs diago targets = filterPointers' proxi thr processSimilarity nbdocs diago targets = filterPointers' proxi thr
$ concat $ concat
$ map (\(c,c') -> $ map (\(c,c') ->
let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c') let similarity = toSimilarity nbdocs diago proxi ngrams (snd c) (snd c')
in if ((c == c') || (snd c == snd c')) in if ((c == c') || (snd c == snd c'))
then [((fst c,proximity),snd c)] then [((fst c,similarity),snd c)]
else [((fst c,proximity),snd c),((fst c',proximity),snd c')] ) targets else [((fst c,similarity),snd c),((fst c',similarity),snd c')] ) targets
{- {-
...@@ -329,8 +329,8 @@ getCandidates minNgrams ego targets = ...@@ -329,8 +329,8 @@ getCandidates minNgrams ego targets =
{- {-
-- set up and start performing the upstream/downstream inter‐temporal matching period by period -- set up and start performing the upstream/downstream inter‐temporal matching period by period
-} -}
reconstructTemporalLinks :: Int -> [Period] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup] reconstructTemporalLinks :: Int -> [Period] -> Similarity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
reconstructTemporalLinks frame periods proximity thr docs coocs groups = reconstructTemporalLinks frame periods similarity thr docs coocs groups =
let groups' = groupByField _phylo_groupPeriod groups let groups' = groupByField _phylo_groupPeriod groups
in foldl' (\acc prd -> in foldl' (\acc prd ->
let -- 1) find the parents/childs matching periods let -- 1) find the parents/childs matching periods
...@@ -347,9 +347,9 @@ reconstructTemporalLinks frame periods proximity thr docs coocs groups = ...@@ -347,9 +347,9 @@ reconstructTemporalLinks frame periods proximity thr docs coocs groups =
diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar) diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
-- 5) match in parallel all the groups (egos) to their possible candidates -- 5) match in parallel all the groups (egos) to their possible candidates
egos = map (\ego -> egos = map (\ego ->
let pointersPar = phyloGroupMatching (getCandidates (getMinSharedNgrams proximity) ego candidatesPar) ToParents proximity docsPar diagoPar let pointersPar = phyloGroupMatching (getCandidates (getMinSharedNgrams similarity) ego candidatesPar) ToParents similarity docsPar diagoPar
thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams) thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
pointersChi = phyloGroupMatching (getCandidates (getMinSharedNgrams proximity) ego candidatesChi) ToChilds proximity docsChi diagoChi pointersChi = phyloGroupMatching (getCandidates (getMinSharedNgrams similarity) ego candidatesChi) ToChilds similarity docsChi diagoChi
thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams) thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams)
in addPointers ToChilds TemporalPointer pointersChi in addPointers ToChilds TemporalPointer pointersChi
$ addPointers ToParents TemporalPointer pointersPar $ addPointers ToParents TemporalPointer pointersPar
...@@ -364,7 +364,7 @@ reconstructTemporalLinks frame periods proximity thr docs coocs groups = ...@@ -364,7 +364,7 @@ reconstructTemporalLinks frame periods proximity thr docs coocs groups =
{- {-
-- reconstruct a phylomemetic network from a list of groups and from a given threshold -- reconstruct a phylomemetic network from a list of groups and from a given threshold
-} -}
toPhylomemeticNetwork :: Int -> [Period] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [Branch] toPhylomemeticNetwork :: Int -> [Period] -> Similarity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [Branch]
toPhylomemeticNetwork timescale periods similarity thr docs coocs groups = toPhylomemeticNetwork timescale periods similarity thr docs coocs groups =
groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ reconstructTemporalLinks timescale periods similarity thr docs coocs groups $ reconstructTemporalLinks timescale periods similarity thr docs coocs groups
...@@ -523,7 +523,7 @@ thrToMeta thr branches = ...@@ -523,7 +523,7 @@ thrToMeta thr branches =
-- done = all the already separated branches -- done = all the already separated branches
-- rest = all the branches we still have to separate -- rest = all the branches we still have to separate
-} -}
separateBranches :: Double -> Proximity -> Double -> Map Int Double -> Int -> Double -> Double separateBranches :: Double -> Similarity -> Double -> Map Int Double -> Int -> Double -> Double
-> Int -> Map Date Double -> Map Date Cooc -> [Period] -> Int -> Map Date Double -> Map Date Cooc -> [Period]
-> [(Branch,ShouldTry)] -> (Branch,ShouldTry) -> [(Branch,ShouldTry)] -> [(Branch,ShouldTry)] -> (Branch,ShouldTry) -> [(Branch,ShouldTry)]
-> [(Branch,ShouldTry)] -> [(Branch,ShouldTry)]
...@@ -578,13 +578,13 @@ separateBranches fdt similarity lambda frequency minBranch thr rise timescale do ...@@ -578,13 +578,13 @@ separateBranches fdt similarity lambda frequency minBranch thr rise timescale do
{- {-
-- perform the sea-level rise algorithm, browse the similarity ladder and check that we can try out the next step -- perform the sea-level rise algorithm, browse the similarity ladder and check that we can try out the next step
-} -}
seaLevelRise :: Double -> Proximity -> Double -> Int -> Map Int Double seaLevelRise :: Double -> Similarity -> Double -> Int -> Map Int Double
-> [Double] -> Double -> [Double] -> Double
-> Int -> [Period] -> Int -> [Period]
-> Map Date Double -> Map Date Cooc -> Map Date Double -> Map Date Cooc
-> [(Branch,ShouldTry)] -> [(Branch,ShouldTry)]
-> ([(Branch,ShouldTry)],FinalQuality) -> ([(Branch,ShouldTry)],FinalQuality)
seaLevelRise fdt proximity lambda minBranch frequency ladder rise frame periods docs coocs branches = seaLevelRise fdt similarity lambda minBranch frequency ladder rise frame periods docs coocs branches =
-- if the ladder is empty or thr > 1 or there is no branch to break then stop -- if the ladder is empty or thr > 1 or there is no branch to break then stop
if (null ladder) || ((List.head ladder) > 1) || (stopRise branches) if (null ladder) || ((List.head ladder) > 1) || (stopRise branches)
then (branches, toPhyloQuality fdt lambda frequency (map fst branches)) then (branches, toPhyloQuality fdt lambda frequency (map fst branches))
...@@ -596,9 +596,9 @@ seaLevelRise fdt proximity lambda minBranch frequency ladder rise frame periods ...@@ -596,9 +596,9 @@ seaLevelRise fdt proximity lambda minBranch frequency ladder rise frame periods
<> " ξ = " <> printf "%.5f" (globalAccuracy frequency (map fst branches)) <> " ξ = " <> printf "%.5f" (globalAccuracy frequency (map fst branches))
<> " ρ = " <> printf "%.5f" (globalRecall frequency (map fst branches)) <> " ρ = " <> printf "%.5f" (globalRecall frequency (map fst branches))
<> " branches = " <> show(length branches)) <> " branches = " <> show(length branches))
$ separateBranches fdt proximity lambda frequency minBranch thr rise frame docs coocs periods $ separateBranches fdt similarity lambda frequency minBranch thr rise frame docs coocs periods
[] (List.head branches) (List.tail branches) [] (List.head branches) (List.tail branches)
in seaLevelRise fdt proximity lambda minBranch frequency (List.tail ladder) (rise + 1) frame periods docs coocs branches' in seaLevelRise fdt similarity lambda minBranch frequency (List.tail ladder) (rise + 1) frame periods docs coocs branches'
where where
-------- --------
stopRise :: [(Branch,ShouldTry)] -> Bool stopRise :: [(Branch,ShouldTry)] -> Bool
...@@ -624,7 +624,7 @@ temporalMatching ladder phylo = updatePhyloGroups 1 ...@@ -624,7 +624,7 @@ temporalMatching ladder phylo = updatePhyloGroups 1
--- 2) process the temporal matching by elevating the similarity ladder --- 2) process the temporal matching by elevating the similarity ladder
sea :: ([(Branch,ShouldTry)],FinalQuality) sea :: ([(Branch,ShouldTry)],FinalQuality)
sea = seaLevelRise (fromIntegral $ Vector.length $ getRoots phylo) sea = seaLevelRise (fromIntegral $ Vector.length $ getRoots phylo)
(phyloProximity $ getConfig phylo) (similarity $ getConfig phylo)
(_qua_granularity $ phyloQuality $ getConfig phylo) (_qua_granularity $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo) (_qua_minBranch $ phyloQuality $ getConfig phylo)
(phylo ^. phylo_termFreq) (phylo ^. phylo_termFreq)
...@@ -641,7 +641,7 @@ temporalMatching ladder phylo = updatePhyloGroups 1 ...@@ -641,7 +641,7 @@ temporalMatching ladder phylo = updatePhyloGroups 1
seabed = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo))) seabed = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
$ toPhylomemeticNetwork (getTimeFrame $ timeUnit $ getConfig phylo) $ toPhylomemeticNetwork (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (getPeriodIds phylo)
(phyloProximity $ getConfig phylo) (similarity $ getConfig phylo)
(List.head ladder) (List.head ladder)
(phylo ^. phylo_timeDocs) (phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc) (phylo ^. phylo_timeCooc)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment