Commit 2d0a7430 authored by qlobbe's avatar qlobbe

add adaptative and constante sea level elevation

parent a9fc87aa
...@@ -57,12 +57,22 @@ data CorpusParser = ...@@ -57,12 +57,22 @@ data CorpusParser =
| Csv {_csv_limit :: Int} | Csv {_csv_limit :: Int}
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data SeaElevation =
Constante
{ _cons_start :: Double
, _cons_step :: Double }
| Adaptative
{ _adap_granularity :: Double }
deriving (Show,Generic,Eq)
data Proximity = data Proximity =
WeightedLogJaccard WeightedLogJaccard
{ _wlj_sensibility :: Double { _wlj_sensibility :: Double
, _wlj_thresholdInit :: Double -- , _wlj_thresholdInit :: Double
, _wlj_thresholdStep :: Double } -- , _wlj_thresholdStep :: Double
-- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double
}
| Hamming | Hamming
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
...@@ -114,6 +124,7 @@ data Config = ...@@ -114,6 +124,7 @@ data Config =
, phyloName :: Text , phyloName :: Text
, phyloLevel :: Int , phyloLevel :: Int
, phyloProximity :: Proximity , phyloProximity :: Proximity
, seaElevation :: SeaElevation
, phyloSynchrony :: Synchrony , phyloSynchrony :: Synchrony
, phyloQuality :: Quality , phyloQuality :: Quality
, timeUnit :: TimeUnit , timeUnit :: TimeUnit
...@@ -132,8 +143,9 @@ defaultConfig = ...@@ -132,8 +143,9 @@ defaultConfig =
, corpusParser = Csv 1000 , corpusParser = Csv 1000
, phyloName = pack "Default Phylo" , phyloName = pack "Default Phylo"
, phyloLevel = 2 , phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 0 0.1 , phyloProximity = WeightedLogJaccard 10
, phyloSynchrony = ByProximityThreshold 0.1 10 AllBranches MergeAllGroups , seaElevation = Adaptative 25
, phyloSynchrony = ByProximityThreshold 0.6 10 SiblingBranches MergeAllGroups
, phyloQuality = Quality 0.1 1 , phyloQuality = Quality 0.1 1
, timeUnit = Year 3 1 5 , timeUnit = Year 3 1 5
, clique = Fis 1 5 , clique = Fis 1 5
...@@ -148,6 +160,8 @@ instance FromJSON CorpusParser ...@@ -148,6 +160,8 @@ instance FromJSON CorpusParser
instance ToJSON CorpusParser instance ToJSON CorpusParser
instance FromJSON Proximity instance FromJSON Proximity
instance ToJSON Proximity instance ToJSON Proximity
instance FromJSON SeaElevation
instance ToJSON SeaElevation
instance FromJSON TimeUnit instance FromJSON TimeUnit
instance ToJSON TimeUnit instance ToJSON TimeUnit
instance FromJSON Clique instance FromJSON Clique
...@@ -253,6 +267,7 @@ data Phylo = ...@@ -253,6 +267,7 @@ data Phylo =
, _phylo_timeCooc :: !(Map Date Cooc) , _phylo_timeCooc :: !(Map Date Cooc)
, _phylo_timeDocs :: !(Map Date Double) , _phylo_timeDocs :: !(Map Date Double)
, _phylo_termFreq :: !(Map Int Double) , _phylo_termFreq :: !(Map Int Double)
, _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
, _phylo_param :: PhyloParam , _phylo_param :: PhyloParam
, _phylo_periods :: Map PhyloPeriodId PhyloPeriod , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
} }
...@@ -366,9 +381,13 @@ data PhyloLabel = ...@@ -366,9 +381,13 @@ data PhyloLabel =
data PhyloBranch = data PhyloBranch =
PhyloBranch PhyloBranch
{ _branch_id :: PhyloBranchId { _branch_id :: PhyloBranchId
, _branch_canonId :: [Int]
, _branch_seaLevel :: [Double]
, _branch_x :: Double
, _branch_y :: Double
, _branch_label :: Text , _branch_label :: Text
, _branch_meta :: Map Text [Double] , _branch_meta :: Map Text [Double]
} deriving (Generic, Show) } deriving (Generic, Show, Eq)
data PhyloExport = data PhyloExport =
PhyloExport PhyloExport
...@@ -382,6 +401,7 @@ data PhyloExport = ...@@ -382,6 +401,7 @@ data PhyloExport =
makeLenses ''Config makeLenses ''Config
makeLenses ''Proximity makeLenses ''Proximity
makeLenses ''SeaElevation
makeLenses ''Quality makeLenses ''Quality
makeLenses ''Clique makeLenses ''Clique
makeLenses ''PhyloLabel makeLenses ''PhyloLabel
......
...@@ -30,7 +30,7 @@ import Gargantext.Viz.AdaptativePhylo ...@@ -30,7 +30,7 @@ import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.PhyloMaker import Gargantext.Viz.Phylo.PhyloMaker
import Gargantext.Viz.Phylo.PhyloExport import Gargantext.Viz.Phylo.PhyloExport
import Gargantext.Viz.Phylo.TemporalMatching (temporalMatching) import Gargantext.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching)
import Gargantext.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Control.Lens import Control.Lens
...@@ -60,7 +60,12 @@ phylo2 = synchronicClustering phylo1 ...@@ -60,7 +60,12 @@ phylo2 = synchronicClustering phylo1
----------------------------------------------- -----------------------------------------------
phylo1 :: Phylo phylo1 :: Phylo
phylo1 = temporalMatching phylo1 = case (getSeaElevation phyloBase) of
Constante s g -> constanteTemporalMatching s g
$ toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
Adaptative s -> adaptativeTemporalMatching s
$ toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique phyloBase $ appendGroups cliqueToGroup 1 phyloClique phyloBase
......
...@@ -17,8 +17,8 @@ Portability : POSIX ...@@ -17,8 +17,8 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloExport where module Gargantext.Viz.Phylo.PhyloExport where
import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, delete) import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy) import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy, inits)
import Data.Vector (Vector) import Data.Vector (Vector)
import Prelude (writeFile) import Prelude (writeFile)
...@@ -116,7 +116,11 @@ branchToDotNode b = ...@@ -116,7 +116,11 @@ branchToDotNode b =
([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)] ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
<> (metaToAttr $ b ^. branch_meta) <> (metaToAttr $ b ^. branch_meta)
<> [ toAttr "nodeType" "branch" <> [ toAttr "nodeType" "branch"
, toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id)) ]) , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
, toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
, toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
, toAttr "label" (pack $ show $ b ^. branch_label)
])
periodToDotNode :: (Date,Date) -> Dot DotId periodToDotNode :: (Date,Date) -> Dot DotId
periodToDotNode prd = periodToDotNode prd =
...@@ -130,7 +134,7 @@ periodToDotNode prd = ...@@ -130,7 +134,7 @@ periodToDotNode prd =
groupToDotNode :: Vector Ngrams -> PhyloGroup -> Dot DotId groupToDotNode :: Vector Ngrams -> PhyloGroup -> Dot DotId
groupToDotNode fdt g = groupToDotNode fdt g =
node (groupIdToDotId $ getGroupId g) node (groupIdToDotId $ getGroupId g)
([FontName "Arial", Shape BoxShape, toLabel (groupToTable fdt g)] ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
<> [ toAttr "nodeType" "group" <> [ toAttr "nodeType" "group"
, toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod)) , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
, toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod)) , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
...@@ -141,7 +145,7 @@ groupToDotNode fdt g = ...@@ -141,7 +145,7 @@ groupToDotNode fdt g =
toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
toDotEdge source target lbl edgeType = edge source target toDotEdge source target lbl edgeType = edge source target
(case edgeType of (case edgeType of
GroupToGroup -> [ Width 10, Color [toWColor Black], Constraint True GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True
, Label (StrLabel $ fromStrict lbl)] , Label (StrLabel $ fromStrict lbl)]
BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)]) BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
, Label (StrLabel $ fromStrict lbl)] , Label (StrLabel $ fromStrict lbl)]
...@@ -174,12 +178,9 @@ exportToDot phylo export = ...@@ -174,12 +178,9 @@ exportToDot phylo export =
<> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo)) <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups)) ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs)) ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches)) ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups)) ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "quaGranularity") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
]) ])
...@@ -232,12 +233,12 @@ exportToDot phylo export = ...@@ -232,12 +233,12 @@ exportToDot phylo export =
) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
-- | 8) create the edges between the branches -- | 8) create the edges between the branches
_ <- mapM (\(bId,bId') -> -- _ <- mapM (\(bId,bId') ->
toDotEdge (branchIdToDotId bId) (branchIdToDotId bId') -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
(Text.pack $ show(branchIdsToProximity bId bId' -- (Text.pack $ show(branchIdsToProximity bId bId'
(getThresholdInit $ phyloProximity $ getConfig phylo) -- (getThresholdInit $ phyloProximity $ getConfig phylo)
(getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
graphAttrs [Rank SameRank] graphAttrs [Rank SameRank]
...@@ -418,8 +419,8 @@ processLabels labels foundations export = ...@@ -418,8 +419,8 @@ processLabels labels foundations export =
toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
toDynamics n parents group m = toDynamics n parents g m =
let prd = group ^. phylo_groupPeriod let prd = g ^. phylo_groupPeriod
end = last' "dynamics" (sort $ map snd $ elems m) end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end)) in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
-- | decrease -- | decrease
...@@ -467,15 +468,38 @@ toPhyloExport phylo = exportToDot phylo ...@@ -467,15 +468,38 @@ toPhyloExport phylo = exportToDot phylo
$ processMetrics export $ processMetrics export
where where
export :: PhyloExport export :: PhyloExport
export = PhyloExport groups branches export = PhyloExport groups
$ map (\(x,b) -> b & branch_x .~ x)
$ zip branchesGaps branches
--------------------------------------
branchesGaps :: [Double]
branchesGaps = map sum
$ inits
$ map (\(b,x) -> b ^. branch_y + 0.05 - x)
$ zip branches
$ ([0] ++ (map (\(b,b') ->
let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
in (b' ^. branch_seaLevel) !! (idx - 1)
) $ listToSeq branches))
-------------------------------------- --------------------------------------
branches :: [PhyloBranch] branches :: [PhyloBranch]
branches = map (\bId -> PhyloBranch bId "" empty) $ nub $ map _phylo_groupBranchId groups branches = map (\g ->
let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
breaks = (g ^. phylo_groupMeta) ! "breaks"
canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
in trace (show(canonId)) $ PhyloBranch (g ^. phylo_groupBranchId)
canonId
seaLvl
0
(last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
"" empty)
$ map (\gs -> head' "export" gs)
$ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
$ sortOn (\g -> g ^. phylo_groupBranchId) groups
-------------------------------------- --------------------------------------
groups :: [PhyloGroup] groups :: [PhyloGroup]
groups = traceExportGroups groups = traceExportGroups
$ processDynamics $ processDynamics
$ map (\g -> g & phylo_groupMeta %~ delete "dynamics")
$ getGroupsFromLevel (phyloLevel $ getConfig phylo) $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
$ tracePhyloInfo phylo $ tracePhyloInfo phylo
......
...@@ -15,15 +15,15 @@ Portability : POSIX ...@@ -15,15 +15,15 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloMaker where module Gargantext.Viz.Phylo.PhyloMaker where
import Data.List (concat, nub, partition, sort, (++), group) import Data.List (concat, nub, partition, sort, (++), group, intersect, null)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, singleton) import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey)
import Data.Set (size) import Data.Set (size)
import Data.Vector (Vector) import Data.Vector (Vector)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.TemporalMatching (temporalMatching) import Gargantext.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity)
import Gargantext.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Text.Context (TermList) import Gargantext.Text.Context (TermList)
import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..)) import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
...@@ -43,7 +43,8 @@ import qualified Data.Set as Set ...@@ -43,7 +43,8 @@ import qualified Data.Set as Set
toPhylo :: [Document] -> TermList -> Config -> Phylo toPhylo :: [Document] -> TermList -> Config -> Phylo
toPhylo docs lst conf = traceToPhylo (phyloLevel conf) $ toPhylo docs lst conf = trace ("# phylo1 groups " <> show(length $ getGroupsFromLevel 1 phylo1))
$ traceToPhylo (phyloLevel conf) $
if (phyloLevel conf) > 1 if (phyloLevel conf) > 1
then foldl' (\phylo' _ -> synchronicClustering phylo') phylo1 [2..(phyloLevel conf)] then foldl' (\phylo' _ -> synchronicClustering phylo') phylo1 [2..(phyloLevel conf)]
else phylo1 else phylo1
...@@ -62,8 +63,35 @@ toPhylo docs lst conf = traceToPhylo (phyloLevel conf) $ ...@@ -62,8 +63,35 @@ toPhylo docs lst conf = traceToPhylo (phyloLevel conf) $
-- | To Phylo 1 | -- -- | To Phylo 1 | --
-------------------- --------------------
toGroupsProxi :: Level -> Phylo -> Phylo
appendGroups :: (a -> Double -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo toGroupsProxi lvl phylo =
let proximity = phyloProximity $ getConfig phylo
groupsProxi = foldlWithKey (\acc pId pds ->
-- 1) process period by period
let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams))
$ elems
$ view ( phylo_periodLevels
. traverse . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups ) pds
next = getNextPeriods ToParents (getTimeFrame $ timeUnit $ getConfig phylo) pId (keys $ phylo ^. phylo_periods)
targets = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ getGroupsFromLevelPeriods lvl next phylo
docs = filterDocs (phylo ^. phylo_timeDocs) ([pId] ++ next)
diagos = filterDiago (phylo ^. phylo_timeCooc) ([pId] ++ next)
-- 2) compute the pairs in parallel
pairs = map (\(id,ngrams) ->
map (\(id',ngrams') ->
let nbDocs = (sum . elems) $ filterDocs docs ([idToPrd id, idToPrd id'])
diago = reduceDiagos $ filterDiago diagos ([idToPrd id, idToPrd id'])
in ((id,id'),toProximity nbDocs diago proximity ngrams ngrams' ngrams')
) $ filter (\(_,ngrams') -> (not . null) $ intersect ngrams ngrams') targets
) egos
pairs' = pairs `using` parList rdeepseq
in acc ++ (concat pairs')
) [] $ phylo ^. phylo_periods
in phylo & phylo_groupsProxi .~ ((traceGroupsProxi . fromList) groupsProxi)
appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n") appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
$ over ( phylo_periods $ over ( phylo_periods
. traverse . traverse
...@@ -76,7 +104,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co ...@@ -76,7 +104,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
in phyloLvl in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj -> & phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups) groups ++ [ (((pId,lvl),length groups)
, f obj(getPhyloThresholdInit phylo) pId lvl (length groups) (getRoots phylo) , f obj pId lvl (length groups) (getRoots phylo)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId])) (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
] ) [] phyloCUnit) ] ) [] phyloCUnit)
else else
...@@ -84,20 +112,25 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co ...@@ -84,20 +112,25 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
phylo phylo
cliqueToGroup :: PhyloClique -> Double -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup cliqueToGroup :: PhyloClique -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup
cliqueToGroup fis thr pId lvl idx fdt coocs = cliqueToGroup fis pId lvl idx fdt coocs =
let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloClique_nodes) fdt let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloClique_nodes) fdt
in PhyloGroup pId lvl idx "" in PhyloGroup pId lvl idx ""
(fis ^. phyloClique_support) (fis ^. phyloClique_support)
ngrams ngrams
(ngramsToCooc ngrams coocs) (ngramsToCooc ngrams coocs)
(1,[0]) -- | branchid (lvl,[path in the branching tree]) (1,[0]) -- | branchid (lvl,[path in the branching tree])
(singleton "thr" [thr]) (fromList [("breaks",[0]),("seaLevels",[0])])
[] [] [] [] [] [] [] []
toPhylo1 :: [Document] -> Phylo -> Phylo toPhylo1 :: [Document] -> Phylo -> Phylo
toPhylo1 docs phyloBase = temporalMatching toPhylo1 docs phyloBase = case (getSeaElevation phyloBase) of
Constante start gap -> constanteTemporalMatching start gap
$ toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
Adaptative steps -> adaptativeTemporalMatching steps
$ toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique phyloBase $ appendGroups cliqueToGroup 1 phyloClique phyloBase
where where
-------------------------------------- --------------------------------------
...@@ -247,5 +280,6 @@ toPhyloBase docs lst conf = ...@@ -247,5 +280,6 @@ toPhyloBase docs lst conf =
(docsToTimeScaleCooc docs (foundations ^. foundations_roots)) (docsToTimeScaleCooc docs (foundations ^. foundations_roots))
(docsToTimeScaleNb docs) (docsToTimeScaleNb docs)
(docsToTermFreq docs (foundations ^. foundations_roots)) (docsToTermFreq docs (foundations ^. foundations_roots))
empty
params params
(fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels 1 prd))) periods) (fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels 1 prd))) periods)
...@@ -34,6 +34,7 @@ import Control.Lens hiding (Level) ...@@ -34,6 +34,7 @@ import Control.Lens hiding (Level)
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Map as Map
------------ ------------
-- | Io | -- -- | Io | --
...@@ -66,6 +67,7 @@ roundToStr = printf "%0.*f" ...@@ -66,6 +67,7 @@ roundToStr = printf "%0.*f"
countSup :: Double -> [Double] -> Int countSup :: Double -> [Double] -> Int
countSup s l = length $ filter (>s) l countSup s l = length $ filter (>s) l
dropByIdx :: Int -> [a] -> [a] dropByIdx :: Int -> [a] -> [a]
dropByIdx k l = take k l ++ drop (k+1) l dropByIdx k l = take k l ++ drop (k+1) l
...@@ -76,6 +78,15 @@ elemIndex' e l = case (List.elemIndex e l) of ...@@ -76,6 +78,15 @@ elemIndex' e l = case (List.elemIndex e l) of
Just i -> i Just i -> i
commonPrefix :: Eq a => [a] -> [a] -> [a] -> [a]
commonPrefix lst lst' acc =
if (null lst || null lst')
then acc
else if (head' "commonPrefix" lst == head' "commonPrefix" lst')
then commonPrefix (tail lst) (tail lst') (acc ++ [head' "commonPrefix" lst])
else acc
--------------------- ---------------------
-- | Foundations | -- -- | Foundations | --
--------------------- ---------------------
...@@ -249,7 +260,7 @@ idToPrd :: PhyloGroupId -> PhyloPeriodId ...@@ -249,7 +260,7 @@ idToPrd :: PhyloGroupId -> PhyloPeriodId
idToPrd id = (fst . fst) id idToPrd id = (fst . fst) id
getGroupThr :: PhyloGroup -> Double getGroupThr :: PhyloGroup -> Double
getGroupThr group = head' "getGroupThr" ((group ^. phylo_groupMeta) ! "thr") getGroupThr group = last' "getGroupThr" ((group ^. phylo_groupMeta) ! "breaks")
groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup] groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup]
groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups
...@@ -263,28 +274,15 @@ getPeriodPointers fil group = ...@@ -263,28 +274,15 @@ getPeriodPointers fil group =
filterProximity :: Proximity -> Double -> Double -> Bool filterProximity :: Proximity -> Double -> Double -> Bool
filterProximity proximity thr local = filterProximity proximity thr local =
case proximity of case proximity of
WeightedLogJaccard _ _ _ -> local >= thr WeightedLogJaccard _ -> local >= thr
Hamming -> undefined Hamming -> undefined
getProximityName :: Proximity -> String getProximityName :: Proximity -> String
getProximityName proximity = getProximityName proximity =
case proximity of case proximity of
WeightedLogJaccard _ _ _ -> "WLJaccard" WeightedLogJaccard _ -> "WLJaccard"
Hamming -> "Hamming" Hamming -> "Hamming"
getProximityInit :: Proximity -> Double
getProximityInit proximity =
case proximity of
WeightedLogJaccard _ i _ -> i
Hamming -> undefined
getProximityStep :: Proximity -> Double
getProximityStep proximity =
case proximity of
WeightedLogJaccard _ _ s -> s
Hamming -> undefined
--------------- ---------------
-- | Phylo | -- -- | Phylo | --
--------------- ---------------
...@@ -318,13 +316,8 @@ getLevels phylo = nub ...@@ -318,13 +316,8 @@ getLevels phylo = nub
. traverse . traverse
. phylo_periodLevels ) phylo . phylo_periodLevels ) phylo
getSeaElevation :: Phylo -> SeaElevation
getPhyloThresholdInit :: Phylo -> Double getSeaElevation phylo = seaElevation (getConfig phylo)
getPhyloThresholdInit phylo = getThresholdInit (phyloProximity (getConfig phylo))
getPhyloThresholdStep :: Phylo -> Double
getPhyloThresholdStep phylo = getThresholdStep (phyloProximity (getConfig phylo))
getConfig :: Phylo -> Config getConfig :: Phylo -> Config
...@@ -350,6 +343,26 @@ getGroupsFromLevel lvl phylo = ...@@ -350,6 +343,26 @@ getGroupsFromLevel lvl phylo =
. phylo_levelGroups ) phylo . phylo_levelGroups ) phylo
getGroupsFromLevelPeriods :: Level -> [PhyloPeriodId] -> Phylo -> [PhyloGroup]
getGroupsFromLevelPeriods lvl periods phylo =
elems $ view ( phylo_periods
. traverse
. filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
. phylo_periodLevels
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups ) phylo
getGroupsFromPeriods :: Level -> Map PhyloPeriodId PhyloPeriod -> [PhyloGroup]
getGroupsFromPeriods lvl periods =
elems $ view ( traverse
. phylo_periodLevels
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups ) periods
updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
updatePhyloGroups lvl m phylo = updatePhyloGroups lvl m phylo =
over ( phylo_periods over ( phylo_periods
...@@ -407,27 +420,7 @@ traceSynchronyStart phylo = ...@@ -407,27 +420,7 @@ traceSynchronyStart phylo =
getSensibility :: Proximity -> Double getSensibility :: Proximity -> Double
getSensibility proxi = case proxi of getSensibility proxi = case proxi of
WeightedLogJaccard s _ _ -> s WeightedLogJaccard s -> s
Hamming -> undefined
getThresholdInit :: Proximity -> Double
getThresholdInit proxi = case proxi of
WeightedLogJaccard _ t _ -> t
Hamming -> undefined
getThresholdStep :: Proximity -> Double
getThresholdStep proxi = case proxi of
WeightedLogJaccard _ _ s -> s
Hamming -> undefined
traceBranchMatching :: Proximity -> Double -> [PhyloGroup] -> [PhyloGroup]
traceBranchMatching proxi thr groups = case proxi of
WeightedLogJaccard _ i s -> trace (
roundToStr 2 thr <> " "
<> foldl (\acc _ -> acc <> ".") "." [(10*i),(10*i + 10*s)..(10*thr)]
<> " " <> show(length groups) <> " groups"
) groups
Hamming -> undefined Hamming -> undefined
---------------- ----------------
...@@ -495,3 +488,8 @@ traceMatchEnd groups = ...@@ -495,3 +488,8 @@ traceMatchEnd groups =
traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup] traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup]
traceTemporalMatching groups = traceTemporalMatching groups =
trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups
traceGroupsProxi :: Map (PhyloGroupId,PhyloGroupId) Double -> Map (PhyloGroupId,PhyloGroupId) Double
traceGroupsProxi m =
trace ( "\n" <> "-- | " <> show(Map.size m) <> " computed pairs of groups proximity" <> "\n") m
\ No newline at end of file
...@@ -21,8 +21,9 @@ import Gargantext.Viz.Phylo.PhyloTools ...@@ -21,8 +21,9 @@ import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos) import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
import Gargantext.Viz.Phylo.PhyloExport (processDynamics) import Gargantext.Viz.Phylo.PhyloExport (processDynamics)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn, init, all, group, maximum, groupBy) import Data.List ((++), null, intersect, nub, concat, sort, sortOn, all, groupBy, group, maximum)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member, singleton) import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
import Data.Text (Text)
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
...@@ -37,7 +38,7 @@ import qualified Data.Set as Set ...@@ -37,7 +38,7 @@ import qualified Data.Set as Set
------------------------- -------------------------
mergeBranchIds :: [[Int]] -> [Int] mergeBranchIds :: [[Int]] -> [Int]
mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq) ids mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
where where
-- | 2) find the most Up Left ids in the hierarchy of similarity -- | 2) find the most Up Left ids in the hierarchy of similarity
-- mostUpLeft :: [[Int]] -> [[Int]] -- mostUpLeft :: [[Int]] -> [[Int]]
...@@ -46,13 +47,19 @@ mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq) ids ...@@ -46,13 +47,19 @@ mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq) ids
-- inf = (fst . minimum) groupIds -- inf = (fst . minimum) groupIds
-- in map snd $ filter (\gIds -> fst gIds == inf) groupIds -- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
-- | 1) find the most frequent ids -- | 1) find the most frequent ids
mostFreq :: [[Int]] -> [[Int]] mostFreq' :: [[Int]] -> [[Int]]
mostFreq ids' = mostFreq' ids' =
let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids' let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
sup = (fst . maximum) groupIds sup = (fst . maximum) groupIds
in map snd $ filter (\gIds -> fst gIds == sup) groupIds in map snd $ filter (\gIds -> fst gIds == sup) groupIds
mergeMeta :: [Int] -> [PhyloGroup] -> Map Text [Double]
mergeMeta bId groups =
let ego = head' "mergeMeta" $ filter (\g -> (snd (g ^. phylo_groupBranchId)) == bId) groups
in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")]
groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]] groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches' groups = groupsToBranches' groups =
-- | run the related component algorithm -- | run the related component algorithm
...@@ -64,10 +71,8 @@ groupsToBranches' groups = ...@@ -64,10 +71,8 @@ groupsToBranches' groups =
in map (\ids -> in map (\ids ->
let groups' = elems $ restrictKeys groups (Set.fromList ids) let groups' = elems $ restrictKeys groups (Set.fromList ids)
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups' bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl + 1,bId))) groups') graph in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl,bId))) groups') graph
getLastThr :: [PhyloGroup] -> Double
getLastThr childs = maximum $ concat $ map (\g -> (g ^. phylo_groupMeta) ! "thr") childs
mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
mergeGroups coocs id mapIds childs = mergeGroups coocs id mapIds childs =
...@@ -75,11 +80,15 @@ mergeGroups coocs id mapIds childs = ...@@ -75,11 +80,15 @@ mergeGroups coocs id mapIds childs =
in PhyloGroup (fst $ fst id) (snd $ fst id) (snd id) "" in PhyloGroup (fst $ fst id) (snd $ fst id) (snd id) ""
(sum $ map _phylo_groupSupport childs) ngrams (sum $ map _phylo_groupSupport childs) ngrams
(ngramsToCooc ngrams coocs) (ngramsToCooc ngrams coocs)
((snd $ fst id),(mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) childs)) ((snd $ fst id),bId)
(singleton "thr" [getLastThr childs]) [] (map (\g -> (getGroupId g, 1)) childs) (mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs)
(updatePointers $ concat $ map _phylo_groupPeriodParents childs) (updatePointers $ concat $ map _phylo_groupPeriodParents childs)
(updatePointers $ concat $ map _phylo_groupPeriodChilds childs) (updatePointers $ concat $ map _phylo_groupPeriodChilds childs)
where where
--------------------
bId :: [Int]
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) childs
--------------------
updatePointers :: [Pointer] -> [Pointer] updatePointers :: [Pointer] -> [Pointer]
updatePointers pointers = map (\(pId,w) -> (mapIds ! pId,w)) pointers updatePointers pointers = map (\(pId,w) -> (mapIds ! pId,w)) pointers
...@@ -163,7 +172,7 @@ groupsToEdges prox sync nbDocs diago groups = ...@@ -163,7 +172,7 @@ groupsToEdges prox sync nbDocs diago groups =
toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)] toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)]
toEdges sens edges = toEdges sens edges =
case prox of case prox of
WeightedLogJaccard _ _ _ -> map (\(g,g') -> WeightedLogJaccard _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard' sens nbDocs diago ((g,g'), weightedLogJaccard' sens nbDocs diago
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
_ -> undefined _ -> undefined
...@@ -198,21 +207,13 @@ reduceGroups prox sync docs diagos branch = ...@@ -198,21 +207,13 @@ reduceGroups prox sync docs diagos branch =
$ toRelatedComponents groups edges) periods $ toRelatedComponents groups edges) periods
getGroupRealBId :: Double -> PhyloGroup -> [Int] adjustClustering :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]]
getGroupRealBId step g = adjustClustering sync branches = case sync of
let nb = round(getGroupThr g / step) + 2 ByProximityThreshold _ _ scope _ -> case scope of
in take nb (snd $ g ^. phylo_groupBranchId)
adjustClustering :: Synchrony -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
adjustClustering sync step branches = case sync of
ByProximityThreshold _ _ scope _ ->
case scope of
SingleBranch -> branches SingleBranch -> branches
SiblingBranches -> groupBy (\g g' -> (init $ getGroupRealBId step g) == (init $ getGroupRealBId step g')) SiblingBranches -> groupBy (\g g' -> (last' "adjustClustering" $ (g ^. phylo_groupMeta) ! "breaks")
== (last' "adjustClustering" $ (g' ^. phylo_groupMeta) ! "breaks"))
$ sortOn _phylo_groupBranchId $ concat branches $ sortOn _phylo_groupBranchId $ concat branches
-- SiblingBranches -> elems $ fromListWith (++) $ map (\b -> ((init . snd . _phylo_groupBranchId) $ head' "adjustClustering" b,b)) branches
AllBranches -> [concat branches] AllBranches -> [concat branches]
ByProximityDistribution _ _ -> branches ByProximityDistribution _ _ -> branches
...@@ -226,7 +227,7 @@ synchronicClustering phylo = ...@@ -226,7 +227,7 @@ synchronicClustering phylo =
diagos = map coocToDiago $ phylo ^. phylo_timeCooc diagos = map coocToDiago $ phylo ^. phylo_timeCooc
newBranches = map (\branch -> reduceGroups prox sync docs diagos branch) newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
$ map processDynamics $ map processDynamics
$ adjustClustering sync (getPhyloThresholdStep phylo) $ adjustClustering sync
$ phyloToLastBranches $ phyloToLastBranches
$ traceSynchronyStart phylo $ traceSynchronyStart phylo
newBranches' = newBranches `using` parList rdeepseq newBranches' = newBranches `using` parList rdeepseq
......
...@@ -15,18 +15,19 @@ Portability : POSIX ...@@ -15,18 +15,19 @@ Portability : POSIX
module Gargantext.Viz.Phylo.TemporalMatching where module Gargantext.Viz.Phylo.TemporalMatching where
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, or) import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, or, sort, (!!))
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), singleton, empty, mapKeys) import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
-- import Prelude (logBase) import Prelude (floor)
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
-- import Debug.Trace (trace) import Debug.Trace (trace)
import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
...@@ -77,7 +78,7 @@ weightedLogJaccard' sens nbDocs diago ngrams ngrams' ...@@ -77,7 +78,7 @@ weightedLogJaccard' sens nbDocs diago ngrams ngrams'
toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' = toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
case proximity of case proximity of
WeightedLogJaccard sens _ _ -> WeightedLogJaccard sens ->
let pairNgrams = if targetNgrams == targetNgrams' let pairNgrams = if targetNgrams == targetNgrams'
then targetNgrams then targetNgrams
else union targetNgrams targetNgrams' else union targetNgrams targetNgrams'
...@@ -268,9 +269,9 @@ toPhyloQuality' beta freq branches = ...@@ -268,9 +269,9 @@ toPhyloQuality' beta freq branches =
$ keys freq $ keys freq
----------------------------- ------------------------------------
-- | Adaptative Matching | -- -- | Constant Temporal Matching | --
----------------------------- ------------------------------------
groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]] groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
...@@ -299,18 +300,21 @@ reduceFrequency frequency branches = ...@@ -299,18 +300,21 @@ reduceFrequency frequency branches =
restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches) restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]] updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
updateThr thr branches = map (\b -> map (\g -> g & phylo_groupMeta .~ (singleton "thr" [thr])) b) branches updateThr thr branches = map (\b -> map (\g ->
g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches
-- | Sequentially break each branch of a phylo where -- | Sequentially break each branch of a phylo where
-- done = all the allready broken branches -- done = all the allready broken branches
-- ego = the current branch we want to break -- ego = the current branch we want to break
-- rest = the branches we still have to break -- rest = the branches we still have to break
breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)] breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
breakBranches proximity beta frequency minBranch thr frame docs coocs periods done ego rest = -> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
-- | 1) keep or not the new division of ego -- | 1) keep or not the new division of ego
let done' = done ++ (if snd ego let done' = done ++ (if snd ego
then (if ((null (fst ego')) || (quality > quality')) then
(if ((null (fst ego')) || (quality > quality'))
then then
-- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality') -- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
-- <> " | " <> show(length $ fst ego) <> " groups : " -- <> " | " <> show(length $ fst ego) <> " groups : "
...@@ -328,7 +332,7 @@ breakBranches proximity beta frequency minBranch thr frame docs coocs periods do ...@@ -328,7 +332,7 @@ breakBranches proximity beta frequency minBranch thr frame docs coocs periods do
-- | 2) if there is no more branches in rest then return else continue -- | 2) if there is no more branches in rest then return else continue
if null rest if null rest
then done' then done'
else breakBranches proximity beta frequency minBranch thr frame docs coocs periods else breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
done' (head' "breakBranches" rest) (tail' "breakBranches" rest) done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
where where
-------------------------------------- --------------------------------------
...@@ -341,41 +345,43 @@ breakBranches proximity beta frequency minBranch thr frame docs coocs periods do ...@@ -341,41 +345,43 @@ breakBranches proximity beta frequency minBranch thr frame docs coocs periods do
$ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego) $ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego)
branches' = branches `using` parList rdeepseq branches' = branches `using` parList rdeepseq
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch) in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
$ if (length branches' > 1) $ thrToMeta thr
then updateThr thr branches' $ depthToMeta (elevation - depth) branches'
else branches'
-------------------------------------- --------------------------------------
quality' :: Double quality' :: Double
quality' = toPhyloQuality' beta frequency quality' = toPhyloQuality' beta frequency
((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest)) ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)] seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
seaLevelMatching proximity beta minBranch frequency thr frame periods docs coocs branches = -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
-- | if there is no branch to break or if sea level > 1 then end seaLevelMatching proximity beta minBranch frequency thr step depth elevation frame periods docs coocs branches =
-- | if there is no branch to break or if seaLvl level > 1 then end
if (thr >= 1) || ((not . or) $ map snd branches) if (thr >= 1) || ((not . or) $ map snd branches)
then branches then branches
else else
-- | break all the possible branches at the current sea level -- | break all the possible branches at the current seaLvl level
let branches' = breakBranches proximity beta frequency minBranch thr frame docs coocs periods let branches' = breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches) [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches') frequency' = reduceFrequency frequency (map fst branches')
in seaLevelMatching proximity beta minBranch frequency' (thr + (getThresholdStep proximity)) frame periods docs coocs branches' in seaLevelMatching proximity beta minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches'
temporalMatching :: Phylo -> Phylo constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo
temporalMatching phylo = updatePhyloGroups 1 constanteTemporalMatching start step phylo = updatePhyloGroups 1
(fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches) (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
phylo phylo
where where
-- | 2) process the temporal matching by elevating sea level -- | 2) process the temporal matching by elevating seaLvl level
branches :: [[PhyloGroup]] branches :: [[PhyloGroup]]
branches = map fst branches = map fst
$ seaLevelMatching (phyloProximity $ getConfig phylo) $ seaLevelMatching (phyloProximity $ getConfig phylo)
(_qua_granularity $ phyloQuality $ getConfig phylo) (_qua_granularity $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo) (_qua_minBranch $ phyloQuality $ getConfig phylo)
(phylo ^. phylo_termFreq) (phylo ^. phylo_termFreq)
(getThresholdInit $ phyloProximity $ getConfig phylo) start step
(fromIntegral $ round (((1 - start) / step) - 1))
(fromIntegral $ round ((1 - start) / step))
(getTimeFrame $ timeUnit $ getConfig phylo) (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (getPeriodIds phylo)
(phylo ^. phylo_timeDocs) (phylo ^. phylo_timeDocs)
...@@ -388,7 +394,153 @@ temporalMatching phylo = updatePhyloGroups 1 ...@@ -388,7 +394,153 @@ temporalMatching phylo = updatePhyloGroups 1
$ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo) $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (phyloProximity $ getConfig phylo) (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
(getThresholdInit $ phyloProximity $ getConfig phylo) start
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
(traceTemporalMatching $ getGroupsFromLevel 1 phylo)
--------------------------------------
-- | Adaptative Temporal Matching | --
--------------------------------------
thrToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
thrToMeta thr branches =
map (\b ->
map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
depthToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
depthToMeta depth branches =
let break = length branches > 1
in map (\b ->
map (\g ->
if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [depth]) "breaks"(g ^. phylo_groupMeta))
else g) b) branches
reduceTupleMapByKeys :: Eq a => [a] -> Map (a,a) Double -> Map (a,a) Double
reduceTupleMapByKeys ks m = filterWithKey (\(k,k') _ -> (elem k ks) && (elem k' ks)) m
getInTupleMap :: Ord a => Map (a,a) Double -> a -> a -> Double
getInTupleMap m k k'
| isJust (m !? ( k ,k')) = m ! ( k ,k')
| isJust (m !? ( k',k )) = m ! ( k',k )
| otherwise = 0
toThreshold :: Double -> Map (PhyloGroupId,PhyloGroupId) Double -> Double
toThreshold lvl proxiGroups =
let idx = ((Map.size proxiGroups) `div` (floor lvl)) - 1
in if idx >= 0
then (sort $ elems proxiGroups) !! idx
else 1
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
adaptativeBreakBranches :: Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double
-> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc
-> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
-> [([PhyloGroup],(Bool,[Double]))]
adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods done ego rest =
-- | 1) keep or not the new division of ego
let done' = done ++ (if (fst . snd) ego
then (if ((null (fst ego')) || (quality > quality'))
then
[(concat $ thrToMeta thr $ [fst ego],(False, ((snd . snd) ego)))]
else
( (map (\e -> (e,(True, ((snd . snd) ego) ++ [thr]))) (fst ego'))
++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego'))))
else [(concat $ thrToMeta thr $ [fst ego], snd ego)])
in
-- | uncomment let .. in for debugging
-- let part1 = partition (snd) done'
-- part2 = partition (snd) rest
-- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
-- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
-- ) $
-- | 2) if there is no more branches in rest then return else continue
if null rest
then done'
else adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
where
--------------------------------------
thr :: Double
thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi
--------------------------------------
quality :: Double
quality = toPhyloQuality' beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
--------------------------------------
ego' :: ([[PhyloGroup]],[[PhyloGroup]])
ego' =
let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups frame periods proxiConf thr docs coocs (fst ego)
branches' = branches `using` parList rdeepseq
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) > minBranch)
$ thrToMeta thr
$ depthToMeta (elevation - depth) branches'
--------------------------------------
quality' :: Double
quality' = toPhyloQuality' beta frequency
((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
adaptativeSeaLevelMatching :: Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double
-> Double -> Int -> Map Int Double
-> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc
-> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
adaptativeSeaLevelMatching proxiConf depth elevation groupsProxi beta minBranch frequency frame periods docs coocs branches =
-- | if there is no branch to break or if seaLvl level >= depth then end
if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
then branches
else
-- | break all the possible branches at the current seaLvl level
let branches' = adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches')
groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi
-- thr = toThreshold depth groupsProxi
in trace("\n " <> foldl (\acc _ -> acc <> "🌊 ") "" [0..(elevation - depth)]
<> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches')
<> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]"
<> " thr = ")
$ adaptativeSeaLevelMatching proxiConf (depth - 1) elevation groupsProxi' beta minBranch frequency' frame periods docs coocs branches'
adaptativeTemporalMatching :: Double -> Phylo -> Phylo
adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
(fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
phylo
where
-- | 2) process the temporal matching by elevating seaLvl level
branches :: [[PhyloGroup]]
branches = map fst
$ adaptativeSeaLevelMatching (phyloProximity $ getConfig phylo)
(elevation - 1)
elevation
(phylo ^. phylo_groupsProxi)
(_qua_granularity $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo)
(phylo ^. phylo_termFreq)
(getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo)
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
groups
-- | 1) for each group process an initial temporal Matching
-- | here we suppose that all the groups of level 1 are part of the same big branch
groups :: [([PhyloGroup],(Bool,[Double]))]
groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr])))
$ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (phyloProximity $ getConfig phylo)
thr
(phylo ^. phylo_timeDocs) (phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc) (phylo ^. phylo_timeCooc)
(traceTemporalMatching $ getGroupsFromLevel 1 phylo) (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
--------------------------------------
thr :: Double
thr = toThreshold elevation (phylo ^. phylo_groupsProxi)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment