Commit 2d0a7430 authored by qlobbe's avatar qlobbe

add adaptative and constante sea level elevation

parent a9fc87aa
Pipeline #683 failed with stage
...@@ -57,12 +57,22 @@ data CorpusParser = ...@@ -57,12 +57,22 @@ data CorpusParser =
| Csv {_csv_limit :: Int} | Csv {_csv_limit :: Int}
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data SeaElevation =
Constante
{ _cons_start :: Double
, _cons_step :: Double }
| Adaptative
{ _adap_granularity :: Double }
deriving (Show,Generic,Eq)
data Proximity = data Proximity =
WeightedLogJaccard WeightedLogJaccard
{ _wlj_sensibility :: Double { _wlj_sensibility :: Double
, _wlj_thresholdInit :: Double -- , _wlj_thresholdInit :: Double
, _wlj_thresholdStep :: Double } -- , _wlj_thresholdStep :: Double
-- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double
}
| Hamming | Hamming
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
...@@ -114,6 +124,7 @@ data Config = ...@@ -114,6 +124,7 @@ data Config =
, phyloName :: Text , phyloName :: Text
, phyloLevel :: Int , phyloLevel :: Int
, phyloProximity :: Proximity , phyloProximity :: Proximity
, seaElevation :: SeaElevation
, phyloSynchrony :: Synchrony , phyloSynchrony :: Synchrony
, phyloQuality :: Quality , phyloQuality :: Quality
, timeUnit :: TimeUnit , timeUnit :: TimeUnit
...@@ -132,8 +143,9 @@ defaultConfig = ...@@ -132,8 +143,9 @@ defaultConfig =
, corpusParser = Csv 1000 , corpusParser = Csv 1000
, phyloName = pack "Default Phylo" , phyloName = pack "Default Phylo"
, phyloLevel = 2 , phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 0 0.1 , phyloProximity = WeightedLogJaccard 10
, phyloSynchrony = ByProximityThreshold 0.1 10 AllBranches MergeAllGroups , seaElevation = Adaptative 25
, phyloSynchrony = ByProximityThreshold 0.6 10 SiblingBranches MergeAllGroups
, phyloQuality = Quality 0.1 1 , phyloQuality = Quality 0.1 1
, timeUnit = Year 3 1 5 , timeUnit = Year 3 1 5
, clique = Fis 1 5 , clique = Fis 1 5
...@@ -148,6 +160,8 @@ instance FromJSON CorpusParser ...@@ -148,6 +160,8 @@ instance FromJSON CorpusParser
instance ToJSON CorpusParser instance ToJSON CorpusParser
instance FromJSON Proximity instance FromJSON Proximity
instance ToJSON Proximity instance ToJSON Proximity
instance FromJSON SeaElevation
instance ToJSON SeaElevation
instance FromJSON TimeUnit instance FromJSON TimeUnit
instance ToJSON TimeUnit instance ToJSON TimeUnit
instance FromJSON Clique instance FromJSON Clique
...@@ -253,6 +267,7 @@ data Phylo = ...@@ -253,6 +267,7 @@ data Phylo =
, _phylo_timeCooc :: !(Map Date Cooc) , _phylo_timeCooc :: !(Map Date Cooc)
, _phylo_timeDocs :: !(Map Date Double) , _phylo_timeDocs :: !(Map Date Double)
, _phylo_termFreq :: !(Map Int Double) , _phylo_termFreq :: !(Map Int Double)
, _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
, _phylo_param :: PhyloParam , _phylo_param :: PhyloParam
, _phylo_periods :: Map PhyloPeriodId PhyloPeriod , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
} }
...@@ -366,9 +381,13 @@ data PhyloLabel = ...@@ -366,9 +381,13 @@ data PhyloLabel =
data PhyloBranch = data PhyloBranch =
PhyloBranch PhyloBranch
{ _branch_id :: PhyloBranchId { _branch_id :: PhyloBranchId
, _branch_label :: Text , _branch_canonId :: [Int]
, _branch_meta :: Map Text [Double] , _branch_seaLevel :: [Double]
} deriving (Generic, Show) , _branch_x :: Double
, _branch_y :: Double
, _branch_label :: Text
, _branch_meta :: Map Text [Double]
} deriving (Generic, Show, Eq)
data PhyloExport = data PhyloExport =
PhyloExport PhyloExport
...@@ -382,6 +401,7 @@ data PhyloExport = ...@@ -382,6 +401,7 @@ data PhyloExport =
makeLenses ''Config makeLenses ''Config
makeLenses ''Proximity makeLenses ''Proximity
makeLenses ''SeaElevation
makeLenses ''Quality makeLenses ''Quality
makeLenses ''Clique makeLenses ''Clique
makeLenses ''PhyloLabel makeLenses ''PhyloLabel
......
...@@ -30,7 +30,7 @@ import Gargantext.Viz.AdaptativePhylo ...@@ -30,7 +30,7 @@ import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.PhyloMaker import Gargantext.Viz.Phylo.PhyloMaker
import Gargantext.Viz.Phylo.PhyloExport import Gargantext.Viz.Phylo.PhyloExport
import Gargantext.Viz.Phylo.TemporalMatching (temporalMatching) import Gargantext.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching)
import Gargantext.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Control.Lens import Control.Lens
...@@ -60,7 +60,12 @@ phylo2 = synchronicClustering phylo1 ...@@ -60,7 +60,12 @@ phylo2 = synchronicClustering phylo1
----------------------------------------------- -----------------------------------------------
phylo1 :: Phylo phylo1 :: Phylo
phylo1 = temporalMatching phylo1 = case (getSeaElevation phyloBase) of
Constante s g -> constanteTemporalMatching s g
$ toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
Adaptative s -> adaptativeTemporalMatching s
$ toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique phyloBase $ appendGroups cliqueToGroup 1 phyloClique phyloBase
......
...@@ -17,8 +17,8 @@ Portability : POSIX ...@@ -17,8 +17,8 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloExport where module Gargantext.Viz.Phylo.PhyloExport where
import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, delete) import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy) import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy, inits)
import Data.Vector (Vector) import Data.Vector (Vector)
import Prelude (writeFile) import Prelude (writeFile)
...@@ -116,7 +116,11 @@ branchToDotNode b = ...@@ -116,7 +116,11 @@ branchToDotNode b =
([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)] ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
<> (metaToAttr $ b ^. branch_meta) <> (metaToAttr $ b ^. branch_meta)
<> [ toAttr "nodeType" "branch" <> [ toAttr "nodeType" "branch"
, toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id)) ]) , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
, toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
, toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
, toAttr "label" (pack $ show $ b ^. branch_label)
])
periodToDotNode :: (Date,Date) -> Dot DotId periodToDotNode :: (Date,Date) -> Dot DotId
periodToDotNode prd = periodToDotNode prd =
...@@ -130,7 +134,7 @@ periodToDotNode prd = ...@@ -130,7 +134,7 @@ periodToDotNode prd =
groupToDotNode :: Vector Ngrams -> PhyloGroup -> Dot DotId groupToDotNode :: Vector Ngrams -> PhyloGroup -> Dot DotId
groupToDotNode fdt g = groupToDotNode fdt g =
node (groupIdToDotId $ getGroupId g) node (groupIdToDotId $ getGroupId g)
([FontName "Arial", Shape BoxShape, toLabel (groupToTable fdt g)] ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
<> [ toAttr "nodeType" "group" <> [ toAttr "nodeType" "group"
, toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod)) , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
, toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod)) , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
...@@ -141,7 +145,7 @@ groupToDotNode fdt g = ...@@ -141,7 +145,7 @@ groupToDotNode fdt g =
toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
toDotEdge source target lbl edgeType = edge source target toDotEdge source target lbl edgeType = edge source target
(case edgeType of (case edgeType of
GroupToGroup -> [ Width 10, Color [toWColor Black], Constraint True GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True
, Label (StrLabel $ fromStrict lbl)] , Label (StrLabel $ fromStrict lbl)]
BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)]) BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
, Label (StrLabel $ fromStrict lbl)] , Label (StrLabel $ fromStrict lbl)]
...@@ -174,12 +178,9 @@ exportToDot phylo export = ...@@ -174,12 +178,9 @@ exportToDot phylo export =
<> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo)) <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups)) ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs)) ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches)) ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups)) ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo))
,(toAttr (fromStrict "quaGranularity") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
]) ])
...@@ -232,12 +233,12 @@ exportToDot phylo export = ...@@ -232,12 +233,12 @@ exportToDot phylo export =
) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
-- | 8) create the edges between the branches -- | 8) create the edges between the branches
_ <- mapM (\(bId,bId') -> -- _ <- mapM (\(bId,bId') ->
toDotEdge (branchIdToDotId bId) (branchIdToDotId bId') -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
(Text.pack $ show(branchIdsToProximity bId bId' -- (Text.pack $ show(branchIdsToProximity bId bId'
(getThresholdInit $ phyloProximity $ getConfig phylo) -- (getThresholdInit $ phyloProximity $ getConfig phylo)
(getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
graphAttrs [Rank SameRank] graphAttrs [Rank SameRank]
...@@ -418,8 +419,8 @@ processLabels labels foundations export = ...@@ -418,8 +419,8 @@ processLabels labels foundations export =
toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
toDynamics n parents group m = toDynamics n parents g m =
let prd = group ^. phylo_groupPeriod let prd = g ^. phylo_groupPeriod
end = last' "dynamics" (sort $ map snd $ elems m) end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end)) in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
-- | decrease -- | decrease
...@@ -438,7 +439,7 @@ toDynamics n parents group m = ...@@ -438,7 +439,7 @@ toDynamics n parents group m =
processDynamics :: [PhyloGroup] -> [PhyloGroup] processDynamics :: [PhyloGroup] -> [PhyloGroup]
processDynamics groups = processDynamics groups =
map (\g -> map (\g ->
let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId) let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
&& ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
...@@ -467,15 +468,38 @@ toPhyloExport phylo = exportToDot phylo ...@@ -467,15 +468,38 @@ toPhyloExport phylo = exportToDot phylo
$ processMetrics export $ processMetrics export
where where
export :: PhyloExport export :: PhyloExport
export = PhyloExport groups branches export = PhyloExport groups
$ map (\(x,b) -> b & branch_x .~ x)
$ zip branchesGaps branches
-------------------------------------- --------------------------------------
branches :: [PhyloBranch] branchesGaps :: [Double]
branches = map (\bId -> PhyloBranch bId "" empty) $ nub $ map _phylo_groupBranchId groups branchesGaps = map sum
$ inits
$ map (\(b,x) -> b ^. branch_y + 0.05 - x)
$ zip branches
$ ([0] ++ (map (\(b,b') ->
let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
in (b' ^. branch_seaLevel) !! (idx - 1)
) $ listToSeq branches))
--------------------------------------
branches :: [PhyloBranch]
branches = map (\g ->
let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
breaks = (g ^. phylo_groupMeta) ! "breaks"
canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
in trace (show(canonId)) $ PhyloBranch (g ^. phylo_groupBranchId)
canonId
seaLvl
0
(last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
"" empty)
$ map (\gs -> head' "export" gs)
$ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
$ sortOn (\g -> g ^. phylo_groupBranchId) groups
-------------------------------------- --------------------------------------
groups :: [PhyloGroup] groups :: [PhyloGroup]
groups = traceExportGroups groups = traceExportGroups
$ processDynamics $ processDynamics
$ map (\g -> g & phylo_groupMeta %~ delete "dynamics")
$ getGroupsFromLevel (phyloLevel $ getConfig phylo) $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
$ tracePhyloInfo phylo $ tracePhyloInfo phylo
......
...@@ -15,15 +15,15 @@ Portability : POSIX ...@@ -15,15 +15,15 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloMaker where module Gargantext.Viz.Phylo.PhyloMaker where
import Data.List (concat, nub, partition, sort, (++), group) import Data.List (concat, nub, partition, sort, (++), group, intersect, null)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, singleton) import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey)
import Data.Set (size) import Data.Set (size)
import Data.Vector (Vector) import Data.Vector (Vector)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.TemporalMatching (temporalMatching) import Gargantext.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity)
import Gargantext.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Text.Context (TermList) import Gargantext.Text.Context (TermList)
import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..)) import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
...@@ -43,7 +43,8 @@ import qualified Data.Set as Set ...@@ -43,7 +43,8 @@ import qualified Data.Set as Set
toPhylo :: [Document] -> TermList -> Config -> Phylo toPhylo :: [Document] -> TermList -> Config -> Phylo
toPhylo docs lst conf = traceToPhylo (phyloLevel conf) $ toPhylo docs lst conf = trace ("# phylo1 groups " <> show(length $ getGroupsFromLevel 1 phylo1))
$ traceToPhylo (phyloLevel conf) $
if (phyloLevel conf) > 1 if (phyloLevel conf) > 1
then foldl' (\phylo' _ -> synchronicClustering phylo') phylo1 [2..(phyloLevel conf)] then foldl' (\phylo' _ -> synchronicClustering phylo') phylo1 [2..(phyloLevel conf)]
else phylo1 else phylo1
...@@ -62,8 +63,35 @@ toPhylo docs lst conf = traceToPhylo (phyloLevel conf) $ ...@@ -62,8 +63,35 @@ toPhylo docs lst conf = traceToPhylo (phyloLevel conf) $
-- | To Phylo 1 | -- -- | To Phylo 1 | --
-------------------- --------------------
toGroupsProxi :: Level -> Phylo -> Phylo
appendGroups :: (a -> Double -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo toGroupsProxi lvl phylo =
let proximity = phyloProximity $ getConfig phylo
groupsProxi = foldlWithKey (\acc pId pds ->
-- 1) process period by period
let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams))
$ elems
$ view ( phylo_periodLevels
. traverse . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups ) pds
next = getNextPeriods ToParents (getTimeFrame $ timeUnit $ getConfig phylo) pId (keys $ phylo ^. phylo_periods)
targets = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ getGroupsFromLevelPeriods lvl next phylo
docs = filterDocs (phylo ^. phylo_timeDocs) ([pId] ++ next)
diagos = filterDiago (phylo ^. phylo_timeCooc) ([pId] ++ next)
-- 2) compute the pairs in parallel
pairs = map (\(id,ngrams) ->
map (\(id',ngrams') ->
let nbDocs = (sum . elems) $ filterDocs docs ([idToPrd id, idToPrd id'])
diago = reduceDiagos $ filterDiago diagos ([idToPrd id, idToPrd id'])
in ((id,id'),toProximity nbDocs diago proximity ngrams ngrams' ngrams')
) $ filter (\(_,ngrams') -> (not . null) $ intersect ngrams ngrams') targets
) egos
pairs' = pairs `using` parList rdeepseq
in acc ++ (concat pairs')
) [] $ phylo ^. phylo_periods
in phylo & phylo_groupsProxi .~ ((traceGroupsProxi . fromList) groupsProxi)
appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n") appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
$ over ( phylo_periods $ over ( phylo_periods
. traverse . traverse
...@@ -76,7 +104,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co ...@@ -76,7 +104,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
in phyloLvl in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj -> & phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups) groups ++ [ (((pId,lvl),length groups)
, f obj(getPhyloThresholdInit phylo) pId lvl (length groups) (getRoots phylo) , f obj pId lvl (length groups) (getRoots phylo)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId])) (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
] ) [] phyloCUnit) ] ) [] phyloCUnit)
else else
...@@ -84,21 +112,26 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co ...@@ -84,21 +112,26 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
phylo phylo
cliqueToGroup :: PhyloClique -> Double -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup cliqueToGroup :: PhyloClique -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup
cliqueToGroup fis thr pId lvl idx fdt coocs = cliqueToGroup fis pId lvl idx fdt coocs =
let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloClique_nodes) fdt let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloClique_nodes) fdt
in PhyloGroup pId lvl idx "" in PhyloGroup pId lvl idx ""
(fis ^. phyloClique_support) (fis ^. phyloClique_support)
ngrams ngrams
(ngramsToCooc ngrams coocs) (ngramsToCooc ngrams coocs)
(1,[0]) -- | branchid (lvl,[path in the branching tree]) (1,[0]) -- | branchid (lvl,[path in the branching tree])
(singleton "thr" [thr]) (fromList [("breaks",[0]),("seaLevels",[0])])
[] [] [] [] [] [] [] []
toPhylo1 :: [Document] -> Phylo -> Phylo toPhylo1 :: [Document] -> Phylo -> Phylo
toPhylo1 docs phyloBase = temporalMatching toPhylo1 docs phyloBase = case (getSeaElevation phyloBase) of
$ appendGroups cliqueToGroup 1 phyloClique phyloBase Constante start gap -> constanteTemporalMatching start gap
$ toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
Adaptative steps -> adaptativeTemporalMatching steps
$ toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
where where
-------------------------------------- --------------------------------------
phyloClique :: Map (Date,Date) [PhyloClique] phyloClique :: Map (Date,Date) [PhyloClique]
...@@ -247,5 +280,6 @@ toPhyloBase docs lst conf = ...@@ -247,5 +280,6 @@ toPhyloBase docs lst conf =
(docsToTimeScaleCooc docs (foundations ^. foundations_roots)) (docsToTimeScaleCooc docs (foundations ^. foundations_roots))
(docsToTimeScaleNb docs) (docsToTimeScaleNb docs)
(docsToTermFreq docs (foundations ^. foundations_roots)) (docsToTermFreq docs (foundations ^. foundations_roots))
empty
params params
(fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels 1 prd))) periods) (fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels 1 prd))) periods)
...@@ -34,6 +34,7 @@ import Control.Lens hiding (Level) ...@@ -34,6 +34,7 @@ import Control.Lens hiding (Level)
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Map as Map
------------ ------------
-- | Io | -- -- | Io | --
...@@ -66,6 +67,7 @@ roundToStr = printf "%0.*f" ...@@ -66,6 +67,7 @@ roundToStr = printf "%0.*f"
countSup :: Double -> [Double] -> Int countSup :: Double -> [Double] -> Int
countSup s l = length $ filter (>s) l countSup s l = length $ filter (>s) l
dropByIdx :: Int -> [a] -> [a] dropByIdx :: Int -> [a] -> [a]
dropByIdx k l = take k l ++ drop (k+1) l dropByIdx k l = take k l ++ drop (k+1) l
...@@ -76,6 +78,15 @@ elemIndex' e l = case (List.elemIndex e l) of ...@@ -76,6 +78,15 @@ elemIndex' e l = case (List.elemIndex e l) of
Just i -> i Just i -> i
commonPrefix :: Eq a => [a] -> [a] -> [a] -> [a]
commonPrefix lst lst' acc =
if (null lst || null lst')
then acc
else if (head' "commonPrefix" lst == head' "commonPrefix" lst')
then commonPrefix (tail lst) (tail lst') (acc ++ [head' "commonPrefix" lst])
else acc
--------------------- ---------------------
-- | Foundations | -- -- | Foundations | --
--------------------- ---------------------
...@@ -249,7 +260,7 @@ idToPrd :: PhyloGroupId -> PhyloPeriodId ...@@ -249,7 +260,7 @@ idToPrd :: PhyloGroupId -> PhyloPeriodId
idToPrd id = (fst . fst) id idToPrd id = (fst . fst) id
getGroupThr :: PhyloGroup -> Double getGroupThr :: PhyloGroup -> Double
getGroupThr group = head' "getGroupThr" ((group ^. phylo_groupMeta) ! "thr") getGroupThr group = last' "getGroupThr" ((group ^. phylo_groupMeta) ! "breaks")
groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup] groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup]
groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups
...@@ -263,27 +274,14 @@ getPeriodPointers fil group = ...@@ -263,27 +274,14 @@ getPeriodPointers fil group =
filterProximity :: Proximity -> Double -> Double -> Bool filterProximity :: Proximity -> Double -> Double -> Bool
filterProximity proximity thr local = filterProximity proximity thr local =
case proximity of case proximity of
WeightedLogJaccard _ _ _ -> local >= thr WeightedLogJaccard _ -> local >= thr
Hamming -> undefined Hamming -> undefined
getProximityName :: Proximity -> String getProximityName :: Proximity -> String
getProximityName proximity = getProximityName proximity =
case proximity of case proximity of
WeightedLogJaccard _ _ _ -> "WLJaccard" WeightedLogJaccard _ -> "WLJaccard"
Hamming -> "Hamming" Hamming -> "Hamming"
getProximityInit :: Proximity -> Double
getProximityInit proximity =
case proximity of
WeightedLogJaccard _ i _ -> i
Hamming -> undefined
getProximityStep :: Proximity -> Double
getProximityStep proximity =
case proximity of
WeightedLogJaccard _ _ s -> s
Hamming -> undefined
--------------- ---------------
-- | Phylo | -- -- | Phylo | --
...@@ -318,13 +316,8 @@ getLevels phylo = nub ...@@ -318,13 +316,8 @@ getLevels phylo = nub
. traverse . traverse
. phylo_periodLevels ) phylo . phylo_periodLevels ) phylo
getSeaElevation :: Phylo -> SeaElevation
getPhyloThresholdInit :: Phylo -> Double getSeaElevation phylo = seaElevation (getConfig phylo)
getPhyloThresholdInit phylo = getThresholdInit (phyloProximity (getConfig phylo))
getPhyloThresholdStep :: Phylo -> Double
getPhyloThresholdStep phylo = getThresholdStep (phyloProximity (getConfig phylo))
getConfig :: Phylo -> Config getConfig :: Phylo -> Config
...@@ -350,6 +343,26 @@ getGroupsFromLevel lvl phylo = ...@@ -350,6 +343,26 @@ getGroupsFromLevel lvl phylo =
. phylo_levelGroups ) phylo . phylo_levelGroups ) phylo
getGroupsFromLevelPeriods :: Level -> [PhyloPeriodId] -> Phylo -> [PhyloGroup]
getGroupsFromLevelPeriods lvl periods phylo =
elems $ view ( phylo_periods
. traverse
. filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
. phylo_periodLevels
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups ) phylo
getGroupsFromPeriods :: Level -> Map PhyloPeriodId PhyloPeriod -> [PhyloGroup]
getGroupsFromPeriods lvl periods =
elems $ view ( traverse
. phylo_periodLevels
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups ) periods
updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
updatePhyloGroups lvl m phylo = updatePhyloGroups lvl m phylo =
over ( phylo_periods over ( phylo_periods
...@@ -407,27 +420,7 @@ traceSynchronyStart phylo = ...@@ -407,27 +420,7 @@ traceSynchronyStart phylo =
getSensibility :: Proximity -> Double getSensibility :: Proximity -> Double
getSensibility proxi = case proxi of getSensibility proxi = case proxi of
WeightedLogJaccard s _ _ -> s WeightedLogJaccard s -> s
Hamming -> undefined
getThresholdInit :: Proximity -> Double
getThresholdInit proxi = case proxi of
WeightedLogJaccard _ t _ -> t
Hamming -> undefined
getThresholdStep :: Proximity -> Double
getThresholdStep proxi = case proxi of
WeightedLogJaccard _ _ s -> s
Hamming -> undefined
traceBranchMatching :: Proximity -> Double -> [PhyloGroup] -> [PhyloGroup]
traceBranchMatching proxi thr groups = case proxi of
WeightedLogJaccard _ i s -> trace (
roundToStr 2 thr <> " "
<> foldl (\acc _ -> acc <> ".") "." [(10*i),(10*i + 10*s)..(10*thr)]
<> " " <> show(length groups) <> " groups"
) groups
Hamming -> undefined Hamming -> undefined
---------------- ----------------
...@@ -494,4 +487,9 @@ traceMatchEnd groups = ...@@ -494,4 +487,9 @@ traceMatchEnd groups =
traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup] traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup]
traceTemporalMatching groups = traceTemporalMatching groups =
trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups
\ No newline at end of file
traceGroupsProxi :: Map (PhyloGroupId,PhyloGroupId) Double -> Map (PhyloGroupId,PhyloGroupId) Double
traceGroupsProxi m =
trace ( "\n" <> "-- | " <> show(Map.size m) <> " computed pairs of groups proximity" <> "\n") m
\ No newline at end of file
...@@ -21,8 +21,9 @@ import Gargantext.Viz.Phylo.PhyloTools ...@@ -21,8 +21,9 @@ import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos) import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
import Gargantext.Viz.Phylo.PhyloExport (processDynamics) import Gargantext.Viz.Phylo.PhyloExport (processDynamics)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn, init, all, group, maximum, groupBy) import Data.List ((++), null, intersect, nub, concat, sort, sortOn, all, groupBy, group, maximum)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member, singleton) import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
import Data.Text (Text)
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
...@@ -37,20 +38,26 @@ import qualified Data.Set as Set ...@@ -37,20 +38,26 @@ import qualified Data.Set as Set
------------------------- -------------------------
mergeBranchIds :: [[Int]] -> [Int] mergeBranchIds :: [[Int]] -> [Int]
mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq) ids mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
where where
-- | 2) find the most Up Left ids in the hierarchy of similarity -- | 2) find the most Up Left ids in the hierarchy of similarity
-- mostUpLeft :: [[Int]] -> [[Int]] -- mostUpLeft :: [[Int]] -> [[Int]]
-- mostUpLeft ids' = -- mostUpLeft ids' =
-- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids' -- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
-- inf = (fst . minimum) groupIds -- inf = (fst . minimum) groupIds
-- in map snd $ filter (\gIds -> fst gIds == inf) groupIds -- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
-- | 1) find the most frequent ids -- | 1) find the most frequent ids
mostFreq :: [[Int]] -> [[Int]] mostFreq' :: [[Int]] -> [[Int]]
mostFreq ids' = mostFreq' ids' =
let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids' let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
sup = (fst . maximum) groupIds sup = (fst . maximum) groupIds
in map snd $ filter (\gIds -> fst gIds == sup) groupIds in map snd $ filter (\gIds -> fst gIds == sup) groupIds
mergeMeta :: [Int] -> [PhyloGroup] -> Map Text [Double]
mergeMeta bId groups =
let ego = head' "mergeMeta" $ filter (\g -> (snd (g ^. phylo_groupBranchId)) == bId) groups
in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")]
groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]] groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
...@@ -64,10 +71,8 @@ groupsToBranches' groups = ...@@ -64,10 +71,8 @@ groupsToBranches' groups =
in map (\ids -> in map (\ids ->
let groups' = elems $ restrictKeys groups (Set.fromList ids) let groups' = elems $ restrictKeys groups (Set.fromList ids)
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups' bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl + 1,bId))) groups') graph in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl,bId))) groups') graph
getLastThr :: [PhyloGroup] -> Double
getLastThr childs = maximum $ concat $ map (\g -> (g ^. phylo_groupMeta) ! "thr") childs
mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
mergeGroups coocs id mapIds childs = mergeGroups coocs id mapIds childs =
...@@ -75,11 +80,15 @@ mergeGroups coocs id mapIds childs = ...@@ -75,11 +80,15 @@ mergeGroups coocs id mapIds childs =
in PhyloGroup (fst $ fst id) (snd $ fst id) (snd id) "" in PhyloGroup (fst $ fst id) (snd $ fst id) (snd id) ""
(sum $ map _phylo_groupSupport childs) ngrams (sum $ map _phylo_groupSupport childs) ngrams
(ngramsToCooc ngrams coocs) (ngramsToCooc ngrams coocs)
((snd $ fst id),(mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) childs)) ((snd $ fst id),bId)
(singleton "thr" [getLastThr childs]) [] (map (\g -> (getGroupId g, 1)) childs) (mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs)
(updatePointers $ concat $ map _phylo_groupPeriodParents childs) (updatePointers $ concat $ map _phylo_groupPeriodParents childs)
(updatePointers $ concat $ map _phylo_groupPeriodChilds childs) (updatePointers $ concat $ map _phylo_groupPeriodChilds childs)
where where
--------------------
bId :: [Int]
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) childs
--------------------
updatePointers :: [Pointer] -> [Pointer] updatePointers :: [Pointer] -> [Pointer]
updatePointers pointers = map (\(pId,w) -> (mapIds ! pId,w)) pointers updatePointers pointers = map (\(pId,w) -> (mapIds ! pId,w)) pointers
...@@ -92,7 +101,7 @@ addPhyloLevel lvl phylo = ...@@ -92,7 +101,7 @@ addPhyloLevel lvl phylo =
toNextLevel' :: Phylo -> [PhyloGroup] -> Phylo toNextLevel' :: Phylo -> [PhyloGroup] -> Phylo
toNextLevel' phylo groups = toNextLevel' phylo groups =
let curLvl = getLastLevel phylo let curLvl = getLastLevel phylo
oldGroups = fromList $ map (\g -> (getGroupId g, getLevelParentId g)) groups oldGroups = fromList $ map (\g -> (getGroupId g, getLevelParentId g)) groups
newGroups = concat $ groupsToBranches' newGroups = concat $ groupsToBranches'
...@@ -163,9 +172,9 @@ groupsToEdges prox sync nbDocs diago groups = ...@@ -163,9 +172,9 @@ groupsToEdges prox sync nbDocs diago groups =
toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)] toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)]
toEdges sens edges = toEdges sens edges =
case prox of case prox of
WeightedLogJaccard _ _ _ -> map (\(g,g') -> WeightedLogJaccard _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard' sens nbDocs diago ((g,g'), weightedLogJaccard' sens nbDocs diago
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
_ -> undefined _ -> undefined
...@@ -181,7 +190,7 @@ toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), ...@@ -181,7 +190,7 @@ toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1),
reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup] reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
reduceGroups prox sync docs diagos branch = reduceGroups prox sync docs diagos branch =
-- | 1) reduce a branch as a set of periods & groups -- | 1) reduce a branch as a set of periods & groups
let periods = fromListWith (++) let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch $ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
...@@ -198,21 +207,13 @@ reduceGroups prox sync docs diagos branch = ...@@ -198,21 +207,13 @@ reduceGroups prox sync docs diagos branch =
$ toRelatedComponents groups edges) periods $ toRelatedComponents groups edges) periods
getGroupRealBId :: Double -> PhyloGroup -> [Int] adjustClustering :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]]
getGroupRealBId step g = adjustClustering sync branches = case sync of
let nb = round(getGroupThr g / step) + 2 ByProximityThreshold _ _ scope _ -> case scope of
in take nb (snd $ g ^. phylo_groupBranchId)
adjustClustering :: Synchrony -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
adjustClustering sync step branches = case sync of
ByProximityThreshold _ _ scope _ ->
case scope of
SingleBranch -> branches SingleBranch -> branches
SiblingBranches -> groupBy (\g g' -> (init $ getGroupRealBId step g) == (init $ getGroupRealBId step g')) SiblingBranches -> groupBy (\g g' -> (last' "adjustClustering" $ (g ^. phylo_groupMeta) ! "breaks")
== (last' "adjustClustering" $ (g' ^. phylo_groupMeta) ! "breaks"))
$ sortOn _phylo_groupBranchId $ concat branches $ sortOn _phylo_groupBranchId $ concat branches
-- SiblingBranches -> elems $ fromListWith (++) $ map (\b -> ((init . snd . _phylo_groupBranchId) $ head' "adjustClustering" b,b)) branches
AllBranches -> [concat branches] AllBranches -> [concat branches]
ByProximityDistribution _ _ -> branches ByProximityDistribution _ _ -> branches
...@@ -226,7 +227,7 @@ synchronicClustering phylo = ...@@ -226,7 +227,7 @@ synchronicClustering phylo =
diagos = map coocToDiago $ phylo ^. phylo_timeCooc diagos = map coocToDiago $ phylo ^. phylo_timeCooc
newBranches = map (\branch -> reduceGroups prox sync docs diagos branch) newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
$ map processDynamics $ map processDynamics
$ adjustClustering sync (getPhyloThresholdStep phylo) $ adjustClustering sync
$ phyloToLastBranches $ phyloToLastBranches
$ traceSynchronyStart phylo $ traceSynchronyStart phylo
newBranches' = newBranches `using` parList rdeepseq newBranches' = newBranches `using` parList rdeepseq
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment