diff --git a/src/Gargantext/Viz/AdaptativePhylo.hs b/src/Gargantext/Viz/AdaptativePhylo.hs index 1ab3bf09bfbe88a4e4915ddc23985632bc0b5e47..e9807ed4990fddc9754cdcde89cfa660c104ac10 100644 --- a/src/Gargantext/Viz/AdaptativePhylo.hs +++ b/src/Gargantext/Viz/AdaptativePhylo.hs @@ -57,12 +57,22 @@ data CorpusParser = | Csv {_csv_limit :: Int} deriving (Show,Generic,Eq) +data SeaElevation = + Constante + { _cons_start :: Double + , _cons_step :: Double } + | Adaptative + { _adap_granularity :: Double } + deriving (Show,Generic,Eq) data Proximity = WeightedLogJaccard { _wlj_sensibility :: Double - , _wlj_thresholdInit :: Double - , _wlj_thresholdStep :: Double } + -- , _wlj_thresholdInit :: Double + -- , _wlj_thresholdStep :: Double + -- | max height for sea level in temporal matching + -- , _wlj_elevation :: Double + } | Hamming deriving (Show,Generic,Eq) @@ -114,6 +124,7 @@ data Config = , phyloName :: Text , phyloLevel :: Int , phyloProximity :: Proximity + , seaElevation :: SeaElevation , phyloSynchrony :: Synchrony , phyloQuality :: Quality , timeUnit :: TimeUnit @@ -132,8 +143,9 @@ defaultConfig = , corpusParser = Csv 1000 , phyloName = pack "Default Phylo" , phyloLevel = 2 - , phyloProximity = WeightedLogJaccard 10 0 0.1 - , phyloSynchrony = ByProximityThreshold 0.1 10 AllBranches MergeAllGroups + , phyloProximity = WeightedLogJaccard 10 + , seaElevation = Adaptative 25 + , phyloSynchrony = ByProximityThreshold 0.6 10 SiblingBranches MergeAllGroups , phyloQuality = Quality 0.1 1 , timeUnit = Year 3 1 5 , clique = Fis 1 5 @@ -148,6 +160,8 @@ instance FromJSON CorpusParser instance ToJSON CorpusParser instance FromJSON Proximity instance ToJSON Proximity +instance FromJSON SeaElevation +instance ToJSON SeaElevation instance FromJSON TimeUnit instance ToJSON TimeUnit instance FromJSON Clique @@ -253,6 +267,7 @@ data Phylo = , _phylo_timeCooc :: !(Map Date Cooc) , _phylo_timeDocs :: !(Map Date Double) , _phylo_termFreq :: !(Map Int Double) + , _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double) , _phylo_param :: PhyloParam , _phylo_periods :: Map PhyloPeriodId PhyloPeriod } @@ -366,9 +381,13 @@ data PhyloLabel = data PhyloBranch = PhyloBranch { _branch_id :: PhyloBranchId - , _branch_label :: Text - , _branch_meta :: Map Text [Double] - } deriving (Generic, Show) + , _branch_canonId :: [Int] + , _branch_seaLevel :: [Double] + , _branch_x :: Double + , _branch_y :: Double + , _branch_label :: Text + , _branch_meta :: Map Text [Double] + } deriving (Generic, Show, Eq) data PhyloExport = PhyloExport @@ -382,6 +401,7 @@ data PhyloExport = makeLenses ''Config makeLenses ''Proximity +makeLenses ''SeaElevation makeLenses ''Quality makeLenses ''Clique makeLenses ''PhyloLabel diff --git a/src/Gargantext/Viz/Phylo/PhyloExample.hs b/src/Gargantext/Viz/Phylo/PhyloExample.hs index aad57189febee69278348411bf905a53e1d8e19a..b1cb92cbd94afa2e05c3dce7ac5836608301c501 100644 --- a/src/Gargantext/Viz/Phylo/PhyloExample.hs +++ b/src/Gargantext/Viz/Phylo/PhyloExample.hs @@ -30,7 +30,7 @@ import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloMaker import Gargantext.Viz.Phylo.PhyloExport -import Gargantext.Viz.Phylo.TemporalMatching (temporalMatching) +import Gargantext.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching) import Gargantext.Viz.Phylo.SynchronicClustering (synchronicClustering) import Control.Lens @@ -60,7 +60,12 @@ phylo2 = synchronicClustering phylo1 ----------------------------------------------- phylo1 :: Phylo -phylo1 = temporalMatching +phylo1 = case (getSeaElevation phyloBase) of + Constante s g -> constanteTemporalMatching s g + $ toGroupsProxi 1 + $ appendGroups cliqueToGroup 1 phyloClique phyloBase + Adaptative s -> adaptativeTemporalMatching s + $ toGroupsProxi 1 $ appendGroups cliqueToGroup 1 phyloClique phyloBase diff --git a/src/Gargantext/Viz/Phylo/PhyloExport.hs b/src/Gargantext/Viz/Phylo/PhyloExport.hs index 12d5cebe79282940eb6c8f99b7bcc45d77267a26..f5fa52a606a73dfa71c801caf860d732d2743073 100644 --- a/src/Gargantext/Viz/Phylo/PhyloExport.hs +++ b/src/Gargantext/Viz/Phylo/PhyloExport.hs @@ -17,8 +17,8 @@ Portability : POSIX module Gargantext.Viz.Phylo.PhyloExport where -import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, delete) -import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy) +import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList) +import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy, inits) import Data.Vector (Vector) import Prelude (writeFile) @@ -116,7 +116,11 @@ branchToDotNode b = ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)] <> (metaToAttr $ b ^. branch_meta) <> [ toAttr "nodeType" "branch" - , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id)) ]) + , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id)) + , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x)) + , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y)) + , toAttr "label" (pack $ show $ b ^. branch_label) + ]) periodToDotNode :: (Date,Date) -> Dot DotId periodToDotNode prd = @@ -130,7 +134,7 @@ periodToDotNode prd = groupToDotNode :: Vector Ngrams -> PhyloGroup -> Dot DotId groupToDotNode fdt g = node (groupIdToDotId $ getGroupId g) - ([FontName "Arial", Shape BoxShape, toLabel (groupToTable fdt g)] + ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)] <> [ toAttr "nodeType" "group" , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod)) , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod)) @@ -141,7 +145,7 @@ groupToDotNode fdt g = toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId toDotEdge source target lbl edgeType = edge source target (case edgeType of - GroupToGroup -> [ Width 10, Color [toWColor Black], Constraint True + GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True , Label (StrLabel $ fromStrict lbl)] BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)]) , Label (StrLabel $ fromStrict lbl)] @@ -174,12 +178,9 @@ exportToDot phylo export = <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo)) ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups)) ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs)) + ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods)) ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches)) ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups)) - ,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo)) - ,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo)) - ,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo)) - ,(toAttr (fromStrict "quaGranularity") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo)) ]) @@ -232,12 +233,12 @@ exportToDot phylo export = ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo -- | 8) create the edges between the branches - _ <- mapM (\(bId,bId') -> - toDotEdge (branchIdToDotId bId) (branchIdToDotId bId') - (Text.pack $ show(branchIdsToProximity bId bId' - (getThresholdInit $ phyloProximity $ getConfig phylo) - (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch - ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches + -- _ <- mapM (\(bId,bId') -> + -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId') + -- (Text.pack $ show(branchIdsToProximity bId bId' + -- (getThresholdInit $ phyloProximity $ getConfig phylo) + -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch + -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches graphAttrs [Rank SameRank] @@ -418,8 +419,8 @@ processLabels labels foundations export = toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double -toDynamics n parents group m = - let prd = group ^. phylo_groupPeriod +toDynamics n parents g m = + let prd = g ^. phylo_groupPeriod end = last' "dynamics" (sort $ map snd $ elems m) in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end)) -- | decrease @@ -438,7 +439,7 @@ toDynamics n parents group m = processDynamics :: [PhyloGroup] -> [PhyloGroup] -processDynamics groups = +processDynamics groups = map (\g -> let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId) && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups @@ -467,15 +468,38 @@ toPhyloExport phylo = exportToDot phylo $ processMetrics export where export :: PhyloExport - export = PhyloExport groups branches + export = PhyloExport groups + $ map (\(x,b) -> b & branch_x .~ x) + $ zip branchesGaps branches -------------------------------------- - branches :: [PhyloBranch] - branches = map (\bId -> PhyloBranch bId "" empty) $ nub $ map _phylo_groupBranchId groups + branchesGaps :: [Double] + branchesGaps = map sum + $ inits + $ map (\(b,x) -> b ^. branch_y + 0.05 - x) + $ zip branches + $ ([0] ++ (map (\(b,b') -> + let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) [] + in (b' ^. branch_seaLevel) !! (idx - 1) + ) $ listToSeq branches)) + -------------------------------------- + branches :: [PhyloBranch] + branches = map (\g -> + let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels" + breaks = (g ^. phylo_groupMeta) ! "breaks" + canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId) + in trace (show(canonId)) $ PhyloBranch (g ^. phylo_groupBranchId) + canonId + seaLvl + 0 + (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) + "" empty) + $ map (\gs -> head' "export" gs) + $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId) + $ sortOn (\g -> g ^. phylo_groupBranchId) groups -------------------------------------- groups :: [PhyloGroup] groups = traceExportGroups - $ processDynamics - $ map (\g -> g & phylo_groupMeta %~ delete "dynamics") + $ processDynamics $ getGroupsFromLevel (phyloLevel $ getConfig phylo) $ tracePhyloInfo phylo diff --git a/src/Gargantext/Viz/Phylo/PhyloMaker.hs b/src/Gargantext/Viz/Phylo/PhyloMaker.hs index bf4af24d7cbfe781099f5adaa7d5841b7528d0aa..caf2b648f83e5dc0fc0441d3ad896cb749d29395 100644 --- a/src/Gargantext/Viz/Phylo/PhyloMaker.hs +++ b/src/Gargantext/Viz/Phylo/PhyloMaker.hs @@ -15,15 +15,15 @@ Portability : POSIX module Gargantext.Viz.Phylo.PhyloMaker where -import Data.List (concat, nub, partition, sort, (++), group) -import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, singleton) +import Data.List (concat, nub, partition, sort, (++), group, intersect, null) +import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey) import Data.Set (size) import Data.Vector (Vector) import Gargantext.Prelude import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.Phylo.PhyloTools -import Gargantext.Viz.Phylo.TemporalMatching (temporalMatching) +import Gargantext.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity) import Gargantext.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Text.Context (TermList) import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..)) @@ -43,7 +43,8 @@ import qualified Data.Set as Set toPhylo :: [Document] -> TermList -> Config -> Phylo -toPhylo docs lst conf = traceToPhylo (phyloLevel conf) $ +toPhylo docs lst conf = trace ("# phylo1 groups " <> show(length $ getGroupsFromLevel 1 phylo1)) + $ traceToPhylo (phyloLevel conf) $ if (phyloLevel conf) > 1 then foldl' (\phylo' _ -> synchronicClustering phylo') phylo1 [2..(phyloLevel conf)] else phylo1 @@ -62,8 +63,35 @@ toPhylo docs lst conf = traceToPhylo (phyloLevel conf) $ -- | To Phylo 1 | -- -------------------- - -appendGroups :: (a -> Double -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo +toGroupsProxi :: Level -> Phylo -> Phylo +toGroupsProxi lvl phylo = + let proximity = phyloProximity $ getConfig phylo + groupsProxi = foldlWithKey (\acc pId pds -> + -- 1) process period by period + let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) + $ elems + $ view ( phylo_periodLevels + . traverse . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl) + . phylo_levelGroups ) pds + next = getNextPeriods ToParents (getTimeFrame $ timeUnit $ getConfig phylo) pId (keys $ phylo ^. phylo_periods) + targets = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ getGroupsFromLevelPeriods lvl next phylo + docs = filterDocs (phylo ^. phylo_timeDocs) ([pId] ++ next) + diagos = filterDiago (phylo ^. phylo_timeCooc) ([pId] ++ next) + -- 2) compute the pairs in parallel + pairs = map (\(id,ngrams) -> + map (\(id',ngrams') -> + let nbDocs = (sum . elems) $ filterDocs docs ([idToPrd id, idToPrd id']) + diago = reduceDiagos $ filterDiago diagos ([idToPrd id, idToPrd id']) + in ((id,id'),toProximity nbDocs diago proximity ngrams ngrams' ngrams') + ) $ filter (\(_,ngrams') -> (not . null) $ intersect ngrams ngrams') targets + ) egos + pairs' = pairs `using` parList rdeepseq + in acc ++ (concat pairs') + ) [] $ phylo ^. phylo_periods + in phylo & phylo_groupsProxi .~ ((traceGroupsProxi . fromList) groupsProxi) + + +appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n") $ over ( phylo_periods . traverse @@ -76,7 +104,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co in phyloLvl & phylo_levelGroups .~ (fromList $ foldl (\groups obj -> groups ++ [ (((pId,lvl),length groups) - , f obj(getPhyloThresholdInit phylo) pId lvl (length groups) (getRoots phylo) + , f obj pId lvl (length groups) (getRoots phylo) (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId])) ] ) [] phyloCUnit) else @@ -84,21 +112,26 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co phylo -cliqueToGroup :: PhyloClique -> Double -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup -cliqueToGroup fis thr pId lvl idx fdt coocs = +cliqueToGroup :: PhyloClique -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup +cliqueToGroup fis pId lvl idx fdt coocs = let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloClique_nodes) fdt in PhyloGroup pId lvl idx "" (fis ^. phyloClique_support) ngrams (ngramsToCooc ngrams coocs) (1,[0]) -- | branchid (lvl,[path in the branching tree]) - (singleton "thr" [thr]) + (fromList [("breaks",[0]),("seaLevels",[0])]) [] [] [] [] toPhylo1 :: [Document] -> Phylo -> Phylo -toPhylo1 docs phyloBase = temporalMatching - $ appendGroups cliqueToGroup 1 phyloClique phyloBase +toPhylo1 docs phyloBase = case (getSeaElevation phyloBase) of + Constante start gap -> constanteTemporalMatching start gap + $ toGroupsProxi 1 + $ appendGroups cliqueToGroup 1 phyloClique phyloBase + Adaptative steps -> adaptativeTemporalMatching steps + $ toGroupsProxi 1 + $ appendGroups cliqueToGroup 1 phyloClique phyloBase where -------------------------------------- phyloClique :: Map (Date,Date) [PhyloClique] @@ -247,5 +280,6 @@ toPhyloBase docs lst conf = (docsToTimeScaleCooc docs (foundations ^. foundations_roots)) (docsToTimeScaleNb docs) (docsToTermFreq docs (foundations ^. foundations_roots)) + empty params (fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels 1 prd))) periods) diff --git a/src/Gargantext/Viz/Phylo/PhyloTools.hs b/src/Gargantext/Viz/Phylo/PhyloTools.hs index e7595d6d3645055ed1b6aa9c3d105c017d3d9a1c..cb6692835b38e58eb0b6f7290f20375eee5a90e2 100644 --- a/src/Gargantext/Viz/Phylo/PhyloTools.hs +++ b/src/Gargantext/Viz/Phylo/PhyloTools.hs @@ -34,6 +34,7 @@ import Control.Lens hiding (Level) import qualified Data.Vector as Vector import qualified Data.List as List import qualified Data.Set as Set +import qualified Data.Map as Map ------------ -- | Io | -- @@ -66,6 +67,7 @@ roundToStr = printf "%0.*f" countSup :: Double -> [Double] -> Int countSup s l = length $ filter (>s) l + dropByIdx :: Int -> [a] -> [a] dropByIdx k l = take k l ++ drop (k+1) l @@ -76,6 +78,15 @@ elemIndex' e l = case (List.elemIndex e l) of Just i -> i +commonPrefix :: Eq a => [a] -> [a] -> [a] -> [a] +commonPrefix lst lst' acc = + if (null lst || null lst') + then acc + else if (head' "commonPrefix" lst == head' "commonPrefix" lst') + then commonPrefix (tail lst) (tail lst') (acc ++ [head' "commonPrefix" lst]) + else acc + + --------------------- -- | Foundations | -- --------------------- @@ -249,7 +260,7 @@ idToPrd :: PhyloGroupId -> PhyloPeriodId idToPrd id = (fst . fst) id getGroupThr :: PhyloGroup -> Double -getGroupThr group = head' "getGroupThr" ((group ^. phylo_groupMeta) ! "thr") +getGroupThr group = last' "getGroupThr" ((group ^. phylo_groupMeta) ! "breaks") groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup] groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups @@ -263,27 +274,14 @@ getPeriodPointers fil group = filterProximity :: Proximity -> Double -> Double -> Bool filterProximity proximity thr local = case proximity of - WeightedLogJaccard _ _ _ -> local >= thr + WeightedLogJaccard _ -> local >= thr Hamming -> undefined getProximityName :: Proximity -> String getProximityName proximity = case proximity of - WeightedLogJaccard _ _ _ -> "WLJaccard" - Hamming -> "Hamming" - -getProximityInit :: Proximity -> Double -getProximityInit proximity = - case proximity of - WeightedLogJaccard _ i _ -> i - Hamming -> undefined - - -getProximityStep :: Proximity -> Double -getProximityStep proximity = - case proximity of - WeightedLogJaccard _ _ s -> s - Hamming -> undefined + WeightedLogJaccard _ -> "WLJaccard" + Hamming -> "Hamming" --------------- -- | Phylo | -- @@ -318,13 +316,8 @@ getLevels phylo = nub . traverse . phylo_periodLevels ) phylo - -getPhyloThresholdInit :: Phylo -> Double -getPhyloThresholdInit phylo = getThresholdInit (phyloProximity (getConfig phylo)) - - -getPhyloThresholdStep :: Phylo -> Double -getPhyloThresholdStep phylo = getThresholdStep (phyloProximity (getConfig phylo)) +getSeaElevation :: Phylo -> SeaElevation +getSeaElevation phylo = seaElevation (getConfig phylo) getConfig :: Phylo -> Config @@ -350,6 +343,26 @@ getGroupsFromLevel lvl phylo = . phylo_levelGroups ) phylo +getGroupsFromLevelPeriods :: Level -> [PhyloPeriodId] -> Phylo -> [PhyloGroup] +getGroupsFromLevelPeriods lvl periods phylo = + elems $ view ( phylo_periods + . traverse + . filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods) + . phylo_periodLevels + . traverse + . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl) + . phylo_levelGroups ) phylo + + +getGroupsFromPeriods :: Level -> Map PhyloPeriodId PhyloPeriod -> [PhyloGroup] +getGroupsFromPeriods lvl periods = + elems $ view ( traverse + . phylo_periodLevels + . traverse + . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl) + . phylo_levelGroups ) periods + + updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo updatePhyloGroups lvl m phylo = over ( phylo_periods @@ -407,27 +420,7 @@ traceSynchronyStart phylo = getSensibility :: Proximity -> Double getSensibility proxi = case proxi of - WeightedLogJaccard s _ _ -> s - Hamming -> undefined - -getThresholdInit :: Proximity -> Double -getThresholdInit proxi = case proxi of - WeightedLogJaccard _ t _ -> t - Hamming -> undefined - -getThresholdStep :: Proximity -> Double -getThresholdStep proxi = case proxi of - WeightedLogJaccard _ _ s -> s - Hamming -> undefined - - -traceBranchMatching :: Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -traceBranchMatching proxi thr groups = case proxi of - WeightedLogJaccard _ i s -> trace ( - roundToStr 2 thr <> " " - <> foldl (\acc _ -> acc <> ".") "." [(10*i),(10*i + 10*s)..(10*thr)] - <> " " <> show(length groups) <> " groups" - ) groups + WeightedLogJaccard s -> s Hamming -> undefined ---------------- @@ -494,4 +487,9 @@ traceMatchEnd groups = traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup] traceTemporalMatching groups = - trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups \ No newline at end of file + trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups + + +traceGroupsProxi :: Map (PhyloGroupId,PhyloGroupId) Double -> Map (PhyloGroupId,PhyloGroupId) Double +traceGroupsProxi m = + trace ( "\n" <> "-- | " <> show(Map.size m) <> " computed pairs of groups proximity" <> "\n") m \ No newline at end of file diff --git a/src/Gargantext/Viz/Phylo/SynchronicClustering.hs b/src/Gargantext/Viz/Phylo/SynchronicClustering.hs index ae0429008a1a2949a3eeca6397434031517220a1..375378c40bdddbee844245ddf1ab29839e40c229 100644 --- a/src/Gargantext/Viz/Phylo/SynchronicClustering.hs +++ b/src/Gargantext/Viz/Phylo/SynchronicClustering.hs @@ -21,8 +21,9 @@ import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos) import Gargantext.Viz.Phylo.PhyloExport (processDynamics) -import Data.List ((++), null, intersect, nub, concat, sort, sortOn, init, all, group, maximum, groupBy) -import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member, singleton) +import Data.List ((++), null, intersect, nub, concat, sort, sortOn, all, groupBy, group, maximum) +import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member) +import Data.Text (Text) import Control.Lens hiding (Level) import Control.Parallel.Strategies (parList, rdeepseq, using) @@ -37,20 +38,26 @@ import qualified Data.Set as Set ------------------------- mergeBranchIds :: [[Int]] -> [Int] -mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq) ids +mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids where -- | 2) find the most Up Left ids in the hierarchy of similarity -- mostUpLeft :: [[Int]] -> [[Int]] -- mostUpLeft ids' = -- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids' -- inf = (fst . minimum) groupIds - -- in map snd $ filter (\gIds -> fst gIds == inf) groupIds + -- in map snd $ filter (\gIds -> fst gIds == inf) groupIds -- | 1) find the most frequent ids - mostFreq :: [[Int]] -> [[Int]] - mostFreq ids' = - let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids' - sup = (fst . maximum) groupIds - in map snd $ filter (\gIds -> fst gIds == sup) groupIds + mostFreq' :: [[Int]] -> [[Int]] + mostFreq' ids' = + let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids' + sup = (fst . maximum) groupIds + in map snd $ filter (\gIds -> fst gIds == sup) groupIds + + +mergeMeta :: [Int] -> [PhyloGroup] -> Map Text [Double] +mergeMeta bId groups = + let ego = head' "mergeMeta" $ filter (\g -> (snd (g ^. phylo_groupBranchId)) == bId) groups + in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")] groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]] @@ -64,10 +71,8 @@ groupsToBranches' groups = in map (\ids -> let groups' = elems $ restrictKeys groups (Set.fromList ids) bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups' - in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl + 1,bId))) groups') graph + in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl,bId))) groups') graph -getLastThr :: [PhyloGroup] -> Double -getLastThr childs = maximum $ concat $ map (\g -> (g ^. phylo_groupMeta) ! "thr") childs mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup mergeGroups coocs id mapIds childs = @@ -75,11 +80,15 @@ mergeGroups coocs id mapIds childs = in PhyloGroup (fst $ fst id) (snd $ fst id) (snd id) "" (sum $ map _phylo_groupSupport childs) ngrams (ngramsToCooc ngrams coocs) - ((snd $ fst id),(mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) childs)) - (singleton "thr" [getLastThr childs]) [] (map (\g -> (getGroupId g, 1)) childs) + ((snd $ fst id),bId) + (mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs) (updatePointers $ concat $ map _phylo_groupPeriodParents childs) (updatePointers $ concat $ map _phylo_groupPeriodChilds childs) - where + where + -------------------- + bId :: [Int] + bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) childs + -------------------- updatePointers :: [Pointer] -> [Pointer] updatePointers pointers = map (\(pId,w) -> (mapIds ! pId,w)) pointers @@ -92,7 +101,7 @@ addPhyloLevel lvl phylo = toNextLevel' :: Phylo -> [PhyloGroup] -> Phylo -toNextLevel' phylo groups = +toNextLevel' phylo groups = let curLvl = getLastLevel phylo oldGroups = fromList $ map (\g -> (getGroupId g, getLevelParentId g)) groups newGroups = concat $ groupsToBranches' @@ -163,9 +172,9 @@ groupsToEdges prox sync nbDocs diago groups = toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)] toEdges sens edges = case prox of - WeightedLogJaccard _ _ _ -> map (\(g,g') -> - ((g,g'), weightedLogJaccard' sens nbDocs diago - (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges + WeightedLogJaccard _ -> map (\(g,g') -> + ((g,g'), weightedLogJaccard' sens nbDocs diago + (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges _ -> undefined @@ -181,7 +190,7 @@ toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup] -reduceGroups prox sync docs diagos branch = +reduceGroups prox sync docs diagos branch = -- | 1) reduce a branch as a set of periods & groups let periods = fromListWith (++) $ map (\g -> (g ^. phylo_groupPeriod,[g])) branch @@ -198,21 +207,13 @@ reduceGroups prox sync docs diagos branch = $ toRelatedComponents groups edges) periods -getGroupRealBId :: Double -> PhyloGroup -> [Int] -getGroupRealBId step g = - let nb = round(getGroupThr g / step) + 2 - in take nb (snd $ g ^. phylo_groupBranchId) - - - -adjustClustering :: Synchrony -> Double -> [[PhyloGroup]] -> [[PhyloGroup]] -adjustClustering sync step branches = case sync of - ByProximityThreshold _ _ scope _ -> - case scope of +adjustClustering :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]] +adjustClustering sync branches = case sync of + ByProximityThreshold _ _ scope _ -> case scope of SingleBranch -> branches - SiblingBranches -> groupBy (\g g' -> (init $ getGroupRealBId step g) == (init $ getGroupRealBId step g')) + SiblingBranches -> groupBy (\g g' -> (last' "adjustClustering" $ (g ^. phylo_groupMeta) ! "breaks") + == (last' "adjustClustering" $ (g' ^. phylo_groupMeta) ! "breaks")) $ sortOn _phylo_groupBranchId $ concat branches - -- SiblingBranches -> elems $ fromListWith (++) $ map (\b -> ((init . snd . _phylo_groupBranchId) $ head' "adjustClustering" b,b)) branches AllBranches -> [concat branches] ByProximityDistribution _ _ -> branches @@ -226,7 +227,7 @@ synchronicClustering phylo = diagos = map coocToDiago $ phylo ^. phylo_timeCooc newBranches = map (\branch -> reduceGroups prox sync docs diagos branch) $ map processDynamics - $ adjustClustering sync (getPhyloThresholdStep phylo) + $ adjustClustering sync $ phyloToLastBranches $ traceSynchronyStart phylo newBranches' = newBranches `using` parList rdeepseq diff --git a/src/Gargantext/Viz/Phylo/TemporalMatching.hs b/src/Gargantext/Viz/Phylo/TemporalMatching.hs index 7a416adc21c6a83c9efcaf742ae7173fe2d4204a..8beed714d561cacd3022c0484f236ecee9b2efb3 100644 --- a/src/Gargantext/Viz/Phylo/TemporalMatching.hs +++ b/src/Gargantext/Viz/Phylo/TemporalMatching.hs @@ -15,18 +15,19 @@ Portability : POSIX module Gargantext.Viz.Phylo.TemporalMatching where -import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, or) -import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), singleton, empty, mapKeys) +import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, or, sort, (!!)) +import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust) import Gargantext.Prelude import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.Phylo.PhyloTools --- import Prelude (logBase) +import Prelude (floor) import Control.Lens hiding (Level) import Control.Parallel.Strategies (parList, rdeepseq, using) --- import Debug.Trace (trace) +import Debug.Trace (trace) +import qualified Data.Map as Map import qualified Data.Set as Set @@ -77,7 +78,7 @@ weightedLogJaccard' sens nbDocs diago ngrams ngrams' toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' = case proximity of - WeightedLogJaccard sens _ _ -> + WeightedLogJaccard sens -> let pairNgrams = if targetNgrams == targetNgrams' then targetNgrams else union targetNgrams targetNgrams' @@ -268,9 +269,9 @@ toPhyloQuality' beta freq branches = $ keys freq ------------------------------ --- | Adaptative Matching | -- ------------------------------ +------------------------------------ +-- | Constant Temporal Matching | -- +------------------------------------ groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]] @@ -299,18 +300,21 @@ reduceFrequency frequency branches = restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches) updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]] -updateThr thr branches = map (\b -> map (\g -> g & phylo_groupMeta .~ (singleton "thr" [thr])) b) branches +updateThr thr branches = map (\b -> map (\g -> + g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches -- | Sequentially break each branch of a phylo where -- done = all the allready broken branches -- ego = the current branch we want to break -- rest = the branches we still have to break -breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)] -breakBranches proximity beta frequency minBranch thr frame docs coocs periods done ego rest = +breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double + -> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)] +breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods done ego rest = -- | 1) keep or not the new division of ego let done' = done ++ (if snd ego - then (if ((null (fst ego')) || (quality > quality')) + then + (if ((null (fst ego')) || (quality > quality')) then -- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality') -- <> " | " <> show(length $ fst ego) <> " groups : " @@ -328,7 +332,7 @@ breakBranches proximity beta frequency minBranch thr frame docs coocs periods do -- | 2) if there is no more branches in rest then return else continue if null rest then done' - else breakBranches proximity beta frequency minBranch thr frame docs coocs periods + else breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods done' (head' "breakBranches" rest) (tail' "breakBranches" rest) where -------------------------------------- @@ -341,41 +345,43 @@ breakBranches proximity beta frequency minBranch thr frame docs coocs periods do $ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego) branches' = branches `using` parList rdeepseq in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch) - $ if (length branches' > 1) - then updateThr thr branches' - else branches' + $ thrToMeta thr + $ depthToMeta (elevation - depth) branches' -------------------------------------- quality' :: Double quality' = toPhyloQuality' beta frequency ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest)) -seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)] -seaLevelMatching proximity beta minBranch frequency thr frame periods docs coocs branches = - -- | if there is no branch to break or if sea level > 1 then end +seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double + -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)] +seaLevelMatching proximity beta minBranch frequency thr step depth elevation frame periods docs coocs branches = + -- | if there is no branch to break or if seaLvl level > 1 then end if (thr >= 1) || ((not . or) $ map snd branches) then branches else - -- | break all the possible branches at the current sea level - let branches' = breakBranches proximity beta frequency minBranch thr frame docs coocs periods + -- | break all the possible branches at the current seaLvl level + let branches' = breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches) frequency' = reduceFrequency frequency (map fst branches') - in seaLevelMatching proximity beta minBranch frequency' (thr + (getThresholdStep proximity)) frame periods docs coocs branches' + in seaLevelMatching proximity beta minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches' -temporalMatching :: Phylo -> Phylo -temporalMatching phylo = updatePhyloGroups 1 +constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo +constanteTemporalMatching start step phylo = updatePhyloGroups 1 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches) phylo where - -- | 2) process the temporal matching by elevating sea level + -- | 2) process the temporal matching by elevating seaLvl level branches :: [[PhyloGroup]] branches = map fst $ seaLevelMatching (phyloProximity $ getConfig phylo) (_qua_granularity $ phyloQuality $ getConfig phylo) (_qua_minBranch $ phyloQuality $ getConfig phylo) (phylo ^. phylo_termFreq) - (getThresholdInit $ phyloProximity $ getConfig phylo) + start step + (fromIntegral $ round (((1 - start) / step) - 1)) + (fromIntegral $ round ((1 - start) / step)) (getTimeFrame $ timeUnit $ getConfig phylo) (getPeriodIds phylo) (phylo ^. phylo_timeDocs) @@ -388,7 +394,153 @@ temporalMatching phylo = updatePhyloGroups 1 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo) (getPeriodIds phylo) (phyloProximity $ getConfig phylo) - (getThresholdInit $ phyloProximity $ getConfig phylo) + start (phylo ^. phylo_timeDocs) (phylo ^. phylo_timeCooc) - (traceTemporalMatching $ getGroupsFromLevel 1 phylo) \ No newline at end of file + (traceTemporalMatching $ getGroupsFromLevel 1 phylo) + + +-------------------------------------- +-- | Adaptative Temporal Matching | -- +-------------------------------------- + + +thrToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]] +thrToMeta thr branches = + map (\b -> + map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches + +depthToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]] +depthToMeta depth branches = + let break = length branches > 1 + in map (\b -> + map (\g -> + if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [depth]) "breaks"(g ^. phylo_groupMeta)) + else g) b) branches + +reduceTupleMapByKeys :: Eq a => [a] -> Map (a,a) Double -> Map (a,a) Double +reduceTupleMapByKeys ks m = filterWithKey (\(k,k') _ -> (elem k ks) && (elem k' ks)) m + + +getInTupleMap :: Ord a => Map (a,a) Double -> a -> a -> Double +getInTupleMap m k k' + | isJust (m !? ( k ,k')) = m ! ( k ,k') + | isJust (m !? ( k',k )) = m ! ( k',k ) + | otherwise = 0 + + +toThreshold :: Double -> Map (PhyloGroupId,PhyloGroupId) Double -> Double +toThreshold lvl proxiGroups = + let idx = ((Map.size proxiGroups) `div` (floor lvl)) - 1 + in if idx >= 0 + then (sort $ elems proxiGroups) !! idx + else 1 + + +-- done = all the allready broken branches +-- ego = the current branch we want to break +-- rest = the branches we still have to break +adaptativeBreakBranches :: Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double + -> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc + -> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))] + -> [([PhyloGroup],(Bool,[Double]))] +adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods done ego rest = + -- | 1) keep or not the new division of ego + let done' = done ++ (if (fst . snd) ego + then (if ((null (fst ego')) || (quality > quality')) + then + [(concat $ thrToMeta thr $ [fst ego],(False, ((snd . snd) ego)))] + else + ( (map (\e -> (e,(True, ((snd . snd) ego) ++ [thr]))) (fst ego')) + ++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego')))) + else [(concat $ thrToMeta thr $ [fst ego], snd ego)]) + in + -- | uncomment let .. in for debugging + -- let part1 = partition (snd) done' + -- part2 = partition (snd) rest + -- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] " + -- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]" + -- ) $ + -- | 2) if there is no more branches in rest then return else continue + if null rest + then done' + else adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods + done' (head' "breakBranches" rest) (tail' "breakBranches" rest) + where + -------------------------------------- + thr :: Double + thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi + -------------------------------------- + quality :: Double + quality = toPhyloQuality' beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest)) + -------------------------------------- + ego' :: ([[PhyloGroup]],[[PhyloGroup]]) + ego' = + let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) + $ matchGroupsToGroups frame periods proxiConf thr docs coocs (fst ego) + branches' = branches `using` parList rdeepseq + in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) > minBranch) + $ thrToMeta thr + $ depthToMeta (elevation - depth) branches' + -------------------------------------- + quality' :: Double + quality' = toPhyloQuality' beta frequency + ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest)) + + +adaptativeSeaLevelMatching :: Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double + -> Double -> Int -> Map Int Double + -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc + -> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))] +adaptativeSeaLevelMatching proxiConf depth elevation groupsProxi beta minBranch frequency frame periods docs coocs branches = + -- | if there is no branch to break or if seaLvl level >= depth then end + if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches) + then branches + else + -- | break all the possible branches at the current seaLvl level + let branches' = adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods + [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches) + frequency' = reduceFrequency frequency (map fst branches') + groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi + -- thr = toThreshold depth groupsProxi + in trace("\n " <> foldl (\acc _ -> acc <> "🌊 ") "" [0..(elevation - depth)] + <> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches') + <> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]" + <> " thr = ") + $ adaptativeSeaLevelMatching proxiConf (depth - 1) elevation groupsProxi' beta minBranch frequency' frame periods docs coocs branches' + + +adaptativeTemporalMatching :: Double -> Phylo -> Phylo +adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1 + (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches) + phylo + where + -- | 2) process the temporal matching by elevating seaLvl level + branches :: [[PhyloGroup]] + branches = map fst + $ adaptativeSeaLevelMatching (phyloProximity $ getConfig phylo) + (elevation - 1) + elevation + (phylo ^. phylo_groupsProxi) + (_qua_granularity $ phyloQuality $ getConfig phylo) + (_qua_minBranch $ phyloQuality $ getConfig phylo) + (phylo ^. phylo_termFreq) + (getTimeFrame $ timeUnit $ getConfig phylo) + (getPeriodIds phylo) + (phylo ^. phylo_timeDocs) + (phylo ^. phylo_timeCooc) + groups + -- | 1) for each group process an initial temporal Matching + -- | here we suppose that all the groups of level 1 are part of the same big branch + groups :: [([PhyloGroup],(Bool,[Double]))] + groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr]))) + $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) + $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo) + (getPeriodIds phylo) (phyloProximity $ getConfig phylo) + thr + (phylo ^. phylo_timeDocs) + (phylo ^. phylo_timeCooc) + (traceTemporalMatching $ getGroupsFromLevel 1 phylo) + -------------------------------------- + thr :: Double + thr = toThreshold elevation (phylo ^. phylo_groupsProxi) \ No newline at end of file