Commit 22578326 authored by qlobbe's avatar qlobbe Committed by Alexandre Delanoë

1 click phylo v1 is ok

parent 4ee73701
...@@ -149,6 +149,7 @@ seaToLabel :: PhyloConfig -> [Char] ...@@ -149,6 +149,7 @@ seaToLabel :: PhyloConfig -> [Char]
seaToLabel config = case (seaElevation config) of seaToLabel config = case (seaElevation config) of
Constante start step -> ("sea_cst_" <> (show start) <> "_" <> (show step)) Constante start step -> ("sea_cst_" <> (show start) <> "_" <> (show step))
Adaptative granularity -> ("sea_adapt" <> (show granularity)) Adaptative granularity -> ("sea_adapt" <> (show granularity))
Evolving _ -> ("sea_evolv")
sensToLabel :: PhyloConfig -> [Char] sensToLabel :: PhyloConfig -> [Char]
......
...@@ -31,7 +31,6 @@ import Control.Lens (makeLenses) ...@@ -31,7 +31,6 @@ import Control.Lens (makeLenses)
import Data.Aeson import Data.Aeson
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Map (Map) import Data.Map (Map)
import Data.Set (Set)
import Data.Swagger import Data.Swagger
import Data.Text (Text, pack) import Data.Text (Text, pack)
import Data.Vector (Vector) import Data.Vector (Vector)
...@@ -66,6 +65,8 @@ data SeaElevation = ...@@ -66,6 +65,8 @@ data SeaElevation =
, _cons_gap :: Double } , _cons_gap :: Double }
| Adaptative | Adaptative
{ _adap_steps :: Double } { _adap_steps :: Double }
| Evolving
{ _evol_neighborhood :: Bool }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
instance ToSchema SeaElevation instance ToSchema SeaElevation
...@@ -180,6 +181,7 @@ data PhyloConfig = ...@@ -180,6 +181,7 @@ data PhyloConfig =
, phyloScale :: Int , phyloScale :: Int
, similarity :: Similarity , similarity :: Similarity
, seaElevation :: SeaElevation , seaElevation :: SeaElevation
, defaultMode :: Bool
, findAncestors :: Bool , findAncestors :: Bool
, phyloSynchrony :: Synchrony , phyloSynchrony :: Synchrony
, phyloQuality :: Quality , phyloQuality :: Quality
...@@ -215,7 +217,7 @@ subConfig2config subConfig = defaultConfig { similarity = WeightedLogJaccard ...@@ -215,7 +217,7 @@ subConfig2config subConfig = defaultConfig { similarity = WeightedLogJaccard
------------------------------------------------------------------------ ------------------------------------------------------------------------
defaultConfig :: PhyloConfig defaultConfig :: PhyloConfig
defaultConfig = defaultConfig =
PhyloConfig { corpusPath = "corpus.csv" -- useful for commandline only PhyloConfig { corpusPath = "corpus.csv" -- useful for commandline only
, listPath = "list.csv" -- useful for commandline only , listPath = "list.csv" -- useful for commandline only
, outputPath = "data/" , outputPath = "data/"
, corpusParser = Csv 100000 , corpusParser = Csv 100000
...@@ -224,6 +226,7 @@ defaultConfig = ...@@ -224,6 +226,7 @@ defaultConfig =
, phyloScale = 2 , phyloScale = 2
, similarity = WeightedLogJaccard 0.5 1 , similarity = WeightedLogJaccard 0.5 1
, seaElevation = Constante 0.1 0.1 , seaElevation = Constante 0.1 0.1
, defaultMode = True
, findAncestors = False , findAncestors = False
, phyloSynchrony = ByProximityThreshold 0.5 0 AllBranches MergeAllGroups , phyloSynchrony = ByProximityThreshold 0.5 0 AllBranches MergeAllGroups
, phyloQuality = Quality 0.5 1 , phyloQuality = Quality 0.5 1
...@@ -365,14 +368,21 @@ data PhyloFoundations = PhyloFoundations ...@@ -365,14 +368,21 @@ data PhyloFoundations = PhyloFoundations
, _foundations_rootsInGroups :: Map Int [PhyloGroupId] -- map of roots associated to groups , _foundations_rootsInGroups :: Map Int [PhyloGroupId] -- map of roots associated to groups
} deriving (Generic, Show, Eq) } deriving (Generic, Show, Eq)
instance ToSchema PhyloFoundations where data PhyloCounts = PhyloCounts
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_foundations_") { coocByDate :: !(Map Date Cooc)
, docsByDate :: !(Map Date Double)
, rootsCount :: !(Map Int Double)
, rootsFreq :: !(Map Int Double)
, lastRootsFreq :: !(Map Int Double)
} deriving (Generic, Show, Eq)
data PhyloSources = PhyloSources data PhyloSources = PhyloSources
{ _sources :: !(Vector Text) } deriving (Generic, Show, Eq) { _sources :: !(Vector Text) } deriving (Generic, Show, Eq)
instance ToSchema PhyloFoundations where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_foundations_")
instance ToSchema PhyloCounts where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
instance ToSchema PhyloSources where instance ToSchema PhyloSources where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
...@@ -396,6 +406,8 @@ type Period = (Date,Date) ...@@ -396,6 +406,8 @@ type Period = (Date,Date)
type PeriodStr = (DateStr,DateStr) type PeriodStr = (DateStr,DateStr)
-- | Phylo datatype of a phylomemy -- | Phylo datatype of a phylomemy
-- foundations : the foundations of the phylo -- foundations : the foundations of the phylo
-- timeCooc : a Map of coocurency by minimal unit of time (ex: by year) -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
...@@ -405,14 +417,12 @@ type PeriodStr = (DateStr,DateStr) ...@@ -405,14 +417,12 @@ type PeriodStr = (DateStr,DateStr)
data Phylo = data Phylo =
Phylo { _phylo_foundations :: PhyloFoundations Phylo { _phylo_foundations :: PhyloFoundations
, _phylo_sources :: PhyloSources , _phylo_sources :: PhyloSources
, _phylo_timeCooc :: !(Map Date Cooc) , _phylo_counts :: PhyloCounts
, _phylo_timeDocs :: !(Map Date Double) , _phylo_seaLadder :: [Double]
, _phylo_termFreq :: !(Map Int Double)
, _phylo_lastTermFreq :: !(Map Int Double)
, _phylo_diaSimScan :: Set Double
, _phylo_param :: PhyloParam , _phylo_param :: PhyloParam
, _phylo_periods :: Map Period PhyloPeriod , _phylo_periods :: Map Period PhyloPeriod
, _phylo_quality :: Double , _phylo_quality :: Double
, _phylo_level :: Double
} }
deriving (Generic, Show, Eq) deriving (Generic, Show, Eq)
...@@ -620,6 +630,9 @@ instance ToJSON PhyloSources ...@@ -620,6 +630,9 @@ instance ToJSON PhyloSources
instance FromJSON PhyloParam instance FromJSON PhyloParam
instance ToJSON PhyloParam instance ToJSON PhyloParam
instance FromJSON PhyloCounts
instance ToJSON PhyloCounts
instance FromJSON PhyloPeriod instance FromJSON PhyloPeriod
instance ToJSON PhyloPeriod instance ToJSON PhyloPeriod
......
...@@ -30,7 +30,6 @@ import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering) ...@@ -30,7 +30,6 @@ import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching) import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
import qualified Data.Set as Set
import qualified Data.Map as Map import qualified Data.Map as Map
--------------------------------- ---------------------------------
...@@ -55,15 +54,11 @@ phyloCleopatre = synchronicClustering $ toHorizon flatPhylo ...@@ -55,15 +54,11 @@ phyloCleopatre = synchronicClustering $ toHorizon flatPhylo
----------------------------------------------- -----------------------------------------------
flatPhylo :: Phylo flatPhylo :: Phylo
flatPhylo = case (getSeaElevation emptyPhylo) of flatPhylo = temporalMatching (getLadder emptyPhylo') emptyPhylo'
Constante s g -> temporalMatching (constDiachronicLadder s g Set.empty)
$ scanSimilarity 1
$ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
Adaptative s -> temporalMatching (adaptDiachronicLadder s (emptyPhylo' ^. phylo_diaSimScan) Set.empty) emptyPhylo'
emptyPhylo' :: Phylo emptyPhylo' :: Phylo
emptyPhylo' = scanSimilarity 1 emptyPhylo' = joinRoots
$ joinRootsToGroups $ findSeaLadder
$ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo $ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
--------------------------------------------- ---------------------------------------------
...@@ -83,11 +78,9 @@ docsByPeriods = groupDocsByPeriod date periods docs ...@@ -83,11 +78,9 @@ docsByPeriods = groupDocsByPeriod date periods docs
-- | STEP 1 | -- Init the Phylo -- | STEP 1 | -- Init the Phylo
--------------------------------- ---------------------------------
emptyPhylo :: Phylo emptyPhylo :: Phylo
emptyPhylo = initPhylo docs config emptyPhylo = initPhylo docs config
phyloCooc :: Map Date Cooc phyloCooc :: Map Date Cooc
phyloCooc = docsToTimeScaleCooc docs (foundations ^. foundations_roots) phyloCooc = docsToTimeScaleCooc docs (foundations ^. foundations_roots)
...@@ -106,7 +99,7 @@ config :: PhyloConfig ...@@ -106,7 +99,7 @@ config :: PhyloConfig
config = config =
defaultConfig { phyloName = "Cesar et Cleopatre" defaultConfig { phyloName = "Cesar et Cleopatre"
, phyloScale = 2 , phyloScale = 2
, seaElevation = Adaptative 4 , seaElevation = Evolving True
, exportFilter = [ByBranchSize 0] , exportFilter = [ByBranchSize 0]
, clique = MaxClique 0 15 ByNeighbours } , clique = MaxClique 0 15 ByNeighbours }
......
...@@ -214,13 +214,13 @@ exportToDot phylo export = ...@@ -214,13 +214,13 @@ exportToDot phylo export =
{-- home made attributes -} {-- home made attributes -}
<> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo)) <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups)) ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs)) ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ getDocsByDate phylo))
,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods)) ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches)) ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups)) ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
,(toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo)) ,(toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo))
,(toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo) ,(toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo)
,(toAttr (fromStrict "PhyloScale") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo)) ,(toAttr (fromStrict "PhyloScale") $ pack $ show (getLevel phylo))
,(toAttr (fromStrict "phyloQuality") $ pack $ show (phylo ^. phylo_quality)) ,(toAttr (fromStrict "phyloQuality") $ pack $ show (phylo ^. phylo_quality))
,(toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (getPhyloSeaRiseStart phylo)) ,(toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (getPhyloSeaRiseStart phylo))
,(toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (getPhyloSeaRiseSteps phylo)) ,(toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (getPhyloSeaRiseSteps phylo))
...@@ -375,7 +375,8 @@ processSort sort' elev export = case sort' of ...@@ -375,7 +375,8 @@ processSort sort' elev export = case sort' of
ByBirthDate o -> sortByBirthDate o export ByBirthDate o -> sortByBirthDate o export
ByHierarchy _ -> case elev of ByHierarchy _ -> case elev of
Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches)) Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches))
Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches)) Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
Evolving _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
----------------- -----------------
-- | Metrics | -- -- | Metrics | --
...@@ -416,7 +417,7 @@ ngramsMetrics phylo export = ...@@ -416,7 +417,7 @@ ngramsMetrics phylo export =
& phylo_groupMeta %~ insert "inclusion" & phylo_groupMeta %~ insert "inclusion"
(map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams) (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "frequence" & phylo_groupMeta %~ insert "frequence"
(map (\n -> getInMap n (phylo ^. phylo_lastTermFreq)) $ g ^. phylo_groupNgrams) (map (\n -> getInMap n (getLastRootsFreq phylo)) $ g ^. phylo_groupNgrams)
) export ) export
...@@ -643,12 +644,13 @@ toHorizon phylo = ...@@ -643,12 +644,13 @@ toHorizon phylo =
heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds)) heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
$ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
noHeads = groups \\ heads noHeads = groups \\ heads
nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd] nbDocs = sum $ elems $ filterDocs (getDocsByDate phylo) [prd]
diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd] diago = reduceDiagos $ filterDiago (getCoocByDate phylo) [prd]
sim = (similarity $ getConfig phylo) sim = (similarity $ getConfig phylo)
step = case getSeaElevation phylo of step = case getSeaElevation phylo of
Constante _ s -> s Constante _ s -> s
Adaptative _ -> 0 Adaptative _ -> 0
Evolving _ -> 0
-- in headsToAncestors nbDocs diago Similarity heads groups [] -- in headsToAncestors nbDocs diago Similarity heads groups []
in map (\ego -> toAncestor nbDocs diago sim step noHeads ego) in map (\ego -> toAncestor nbDocs diago sim step noHeads ego)
$ headsToAncestors nbDocs diago sim step heads [] $ headsToAncestors nbDocs diago sim step heads []
...@@ -671,7 +673,7 @@ toPhyloExport :: Phylo -> DotGraph DotId ...@@ -671,7 +673,7 @@ toPhyloExport :: Phylo -> DotGraph DotId
toPhyloExport phylo = exportToDot phylo toPhyloExport phylo = exportToDot phylo
$ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo) $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
$ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo) $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
$ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo) $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (getLastRootsFreq phylo)
$ processMetrics phylo export $ processMetrics phylo export
where where
export :: PhyloExport export :: PhyloExport
...@@ -711,7 +713,7 @@ tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map ...@@ -711,7 +713,7 @@ tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map
tracePhyloInfo :: Phylo -> Phylo tracePhyloInfo :: Phylo -> Phylo
tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = " tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = "
<> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to " <> show(getLevel phylo) <> " applied to "
<> show(length $ Vector.toList $ getRoots phylo) <> " foundations" <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
) phylo ) phylo
......
...@@ -16,7 +16,7 @@ import Control.DeepSeq (NFData) ...@@ -16,7 +16,7 @@ import Control.DeepSeq (NFData)
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail) import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert) import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, insert)
import Data.Set (Set) import Data.Set (Set)
import Data.Text (Text) import Data.Text (Text)
import Data.Vector (Vector) import Data.Vector (Vector)
...@@ -30,7 +30,7 @@ import Gargantext.Core.Viz.Phylo ...@@ -30,7 +30,7 @@ import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon) import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toSimilarity) import Gargantext.Core.Viz.Phylo.TemporalMatching (toPhyloQuality, temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toSimilarity)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Set as Set import qualified Data.Set as Set
...@@ -55,8 +55,7 @@ toPhylo' (PhyloBase phylo) = toPhylo ...@@ -55,8 +55,7 @@ toPhylo' (PhyloBase phylo) = toPhylo
-- TODO an adaptative synchronic clustering with a slider -- TODO an adaptative synchronic clustering with a slider
toPhylo :: Phylo -> Phylo toPhylo :: Phylo -> Phylo
toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGroupsFromScale 1 flatPhylo)) toPhylo phylowithoutLink = traceToPhylo (phyloScale $ getConfig phylowithoutLink) $
$ traceToPhylo (phyloScale $ getConfig phylowithoutLink) $
if (phyloScale $ getConfig phylowithoutLink) > 1 if (phyloScale $ getConfig phylowithoutLink) > 1
then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloScale $ getConfig phylowithoutLink)] then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloScale $ getConfig phylowithoutLink)]
else phyloAncestors else phyloAncestors
...@@ -65,11 +64,11 @@ toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGrou ...@@ -65,11 +64,11 @@ toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGrou
phyloAncestors :: Phylo phyloAncestors :: Phylo
phyloAncestors = phyloAncestors =
if (findAncestors $ getConfig phylowithoutLink) if (findAncestors $ getConfig phylowithoutLink)
then toHorizon flatPhylo then toHorizon phyloWithLinks
else flatPhylo else phyloWithLinks
-------------------------------------- --------------------------------------
flatPhylo :: Phylo phyloWithLinks :: Phylo
flatPhylo = addTemporalLinksToPhylo phylowithoutLink phyloWithLinks = temporalMatching (getLadder phylowithoutLink) phylowithoutLink
-------------------------------------- --------------------------------------
...@@ -86,60 +85,111 @@ squareLadder ladder = List.map (\x -> x * x) ladder ...@@ -86,60 +85,111 @@ squareLadder ladder = List.map (\x -> x * x) ladder
{- {-
-- create an adaptative diachronic 'sea elevation' ladder -- create an adaptative 'sea elevation' ladder
-} -}
adaptDiachronicLadder :: Double -> Set Double -> Set Double -> [Double] adaptSeaLadder :: Double -> Set Double -> Set Double -> [Double]
adaptDiachronicLadder curr similarities ladder = adaptSeaLadder curr similarities ladder =
if curr <= 0 || Set.null similarities if curr <= 0 || Set.null similarities
then Set.toList ladder then Set.toList ladder
else else
let idx = ((Set.size similarities) `div` (floor curr)) - 1 let idx = ((Set.size similarities) `div` (floor curr)) - 1
thr = Set.elemAt idx similarities thr = Set.elemAt idx similarities
-- we use a sliding methods 1/10, then 1/9, then ... 1/2 -- we use a sliding methods 1/10, then 1/9, then ... 1/2
in adaptDiachronicLadder (curr -1) (Set.filter (> thr) similarities) (Set.insert thr ladder) in adaptSeaLadder (curr -1) (Set.filter (> thr) similarities) (Set.insert thr ladder)
{- {-
-- create a constante diachronic 'sea elevation' ladder -- create a constante 'sea elevation' ladder
-} -}
constDiachronicLadder :: Double -> Double -> Set Double -> [Double] constSeaLadder :: Double -> Double -> Set Double -> [Double]
constDiachronicLadder curr step ladder = constSeaLadder curr step ladder =
if curr > 1 if curr > 1
then Set.toList ladder then Set.toList ladder
else constDiachronicLadder (curr + step) step (Set.insert curr ladder) else constSeaLadder (curr + step) step (Set.insert curr ladder)
{- {-
-- process an initial scanning of the kinship links -- create an evolving 'sea elevation' ladder based on estimated & local quality maxima
-} -}
scanSimilarity :: Scale -> Phylo -> Phylo evolvSeaLadder :: Double -> Double -> Map Int Double -> Set Double -> [((PhyloGroup,PhyloGroup),Double)] -> [Double]
scanSimilarity lvl phylo = evolvSeaLadder nbFdt lambda freq similarities graph = map snd
let proximity = similarity $ getConfig phylo $ filter fst
scanning = foldlWithKey (\acc pId pds -> $ zip maxima (map fst qua')
-- 1) process period by period -- 3) find the corresponding measures of similarity and create the ladder
let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) where
$ elems --------
$ view ( phylo_periodScales -- 2) find the local maxima in the quality distribution
. traverse . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl) maxima :: [Bool]
. phylo_scaleGroups ) pds maxima = [snd (List.head qua') > snd (List.head $ List.tail qua')] ++ (findMaxima qua') ++ [snd (List.head $ reverse qua') > snd (List.head $ List.tail $ reverse qua')]
next = getNextPeriods ToParents (getTimeFrame $ timeUnit $ getConfig phylo) pId (keys $ phylo ^. phylo_periods) --------
targets = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ getGroupsFromScalePeriods lvl next phylo -- 1.2)
docs = filterDocs (phylo ^. phylo_timeDocs) ([pId] ++ next) qua' :: [(Double,Double)]
diagos = filterDiago (phylo ^. phylo_timeCooc) ([pId] ++ next) qua' = foldl (\acc (s,q) ->
-- 2) compute the pairs in parallel if length acc == 0
pairs = map (\(id,ngrams) -> then [(s,q)]
map (\(id',ngrams') -> else if (snd (List.last acc)) == q
let nbDocs = (sum . elems) $ filterDocs docs ([idToPrd id, idToPrd id']) then acc
diago = reduceDiagos $ filterDiago diagos ([idToPrd id, idToPrd id']) else acc ++ [(s,q)]
in ((id,id'),toSimilarity nbDocs diago proximity ngrams ngrams' ngrams') ) [] $ zip (Set.toList similarities) qua
) $ filter (\(_,ngrams') -> (not . null) $ intersect ngrams ngrams') targets --------
) egos -- 1.1) for each measure of similarity, prune the flat phylo, compute the branches and estimate the quality
pairs' = pairs `using` parList rdeepseq qua :: [Double]
in acc ++ (concat pairs') qua = map (\thr ->
) [] $ phylo ^. phylo_periods let edges = filter (\edge -> snd edge >= thr) graph
in phylo & phylo_diaSimScan .~ Set.fromList (traceGroupsProxi $ map snd scanning) nodes = nub $ concat $ map (\((n,n'),_) -> [n,n']) edges
branches = toRelatedComponents nodes edges
in toPhyloQuality nbFdt lambda freq branches
) $ (Set.toList similarities)
{-
-- find a similarity ladder regarding the "sea elevation" strategy
-}
findSeaLadder :: Phylo -> Phylo
findSeaLadder phylo = case getSeaElevation phylo of
Constante start gap -> phylo & phylo_seaLadder .~ (constSeaLadder start gap Set.empty)
Adaptative steps -> phylo & phylo_seaLadder .~ (squareLadder $ adaptSeaLadder steps similarities Set.empty)
Evolving _ -> let ladder = evolvSeaLadder
(fromIntegral $ Vector.length $ getRoots phylo)
(getLevel phylo)
(getRootsFreq phylo)
similarities simGraph
in phylo & phylo_seaLadder .~ (if length ladder > 0
then ladder
-- if we don't find any local maxima with the evolving strategy
else constSeaLadder 0.1 0.1 Set.empty)
where
--------
-- 2) extract the values of the kinship links
similarities :: Set Double
similarities = Set.fromList $ sort $ map snd simGraph
--------
-- 1) we process an initial calculation of the kinship links
-- this initial calculation is used to estimate the real sea ladder
simGraph :: [((PhyloGroup,PhyloGroup),Double)]
simGraph = foldl' (\acc period ->
-- 1.1) process period by period
let sources = getGroupsFromScalePeriods 1 [period] phylo
next = getNextPeriods ToParents 3 period (keys $ phylo ^. phylo_periods)
targets = getGroupsFromScalePeriods 1 next phylo
docs = filterDocs (getDocsByDate phylo) ([period] ++ next)
diagos = filterDiago (getCoocByDate phylo) ([period] ++ next)
-- 1.2) compute the kinship similarities between pairs of source & target in parallel
pairs = map (\source ->
let candidates = filter (\target -> (> 2) $ length
$ intersect (getGroupNgrams source) (getGroupNgrams target)) targets
in map (\target ->
let nbDocs = (sum . elems)
$ filterDocs docs ([idToPrd (getGroupId source), idToPrd (getGroupId target)])
diago = reduceDiagos
$ filterDiago diagos ([idToPrd (getGroupId source), idToPrd (getGroupId target)])
in ((source,target),toSimilarity nbDocs diago (getSimilarity phylo) (getGroupNgrams source) (getGroupNgrams target) (getGroupNgrams target))
) candidates
) sources
pairs' = pairs `using` parList rdeepseq
in acc ++ (concat pairs')
) [] $ keys $ phylo ^. phylo_periods
appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n") appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
...@@ -156,7 +206,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co ...@@ -156,7 +206,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
& phylo_scaleGroups .~ (fromList $ foldl (\groups obj -> & phylo_scaleGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups) groups ++ [ (((pId,lvl),length groups)
, f obj pId pId' lvl (length groups) , f obj pId pId' lvl (length groups)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId])) (elems $ restrictKeys (getCoocByDate phylo) $ periodsToYears [pId]))
] ) [] phyloCUnit) ] ) [] phyloCUnit)
else else
phyloLvl ) phyloLvl )
...@@ -174,16 +224,6 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx "" ...@@ -174,16 +224,6 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
(fromList [("breaks",[0]),("seaLevels",[0])]) (fromList [("breaks",[0]),("seaLevels",[0])])
[] [] [] [] [] [] [] [] [] [] [] [] [] []
{-
-- enhance the phylo with temporal links
-}
addTemporalLinksToPhylo :: Phylo -> Phylo
addTemporalLinksToPhylo phylowithoutLink = case strategy of
Constante start gap -> temporalMatching (constDiachronicLadder start gap Set.empty) phylowithoutLink
Adaptative steps -> temporalMatching (squareLadder $ adaptDiachronicLadder steps (phylowithoutLink ^. phylo_diaSimScan) Set.empty) phylowithoutLink
where
strategy :: SeaElevation
strategy = getSeaElevation phylowithoutLink
----------------------- -----------------------
-- | To Phylo Step | -- -- | To Phylo Step | --
...@@ -203,8 +243,8 @@ indexDates' m = map (\docs -> ...@@ -203,8 +243,8 @@ indexDates' m = map (\docs ->
-- create a map of roots and group ids -- create a map of roots and group ids
joinRootsToGroups :: Phylo -> Phylo joinRoots :: Phylo -> Phylo
joinRootsToGroups phylo = set (phylo_foundations . foundations_rootsInGroups) rootsMap phylo joinRoots phylo = set (phylo_foundations . foundations_rootsInGroups) rootsMap phylo
where where
-------------------------------------- --------------------------------------
rootsMap :: Map Int [PhyloGroupId] rootsMap :: Map Int [PhyloGroupId]
...@@ -215,16 +255,20 @@ joinRootsToGroups phylo = set (phylo_foundations . foundations_rootsInGroups) ro ...@@ -215,16 +255,20 @@ joinRootsToGroups phylo = set (phylo_foundations . foundations_rootsInGroups) ro
$ getGroupsFromScale 1 phylo $ getGroupsFromScale 1 phylo
maybeDefaultParams :: Phylo -> Phylo
maybeDefaultParams phylo = if (defaultMode (getConfig phylo))
then findDefaultLevel phylo
else phylo
-- To build the first phylo step from docs and terms -- To build the first phylo step from docs and terms
-- QL: backend entre phyloBase et Clustering -- QL: backend entre phyloBase et Clustering
-- tophylowithoutLink -- tophylowithoutLink
toPhyloWithoutLink :: [Document] -> PhyloConfig -> Phylo toPhyloWithoutLink :: [Document] -> PhyloConfig -> Phylo
toPhyloWithoutLink docs conf = case (getSeaElevation phyloBase) of toPhyloWithoutLink docs conf = joinRoots
Constante _ _ -> joinRootsToGroups $ findSeaLadder
$ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase) $ maybeDefaultParams
Adaptative _ -> joinRootsToGroups $ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
$ scanSimilarity 1
$ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
where where
-------------------------------------- --------------------------------------
seriesOfClustering :: Map (Date,Date) [Clustering] seriesOfClustering :: Map (Date,Date) [Clustering]
...@@ -340,6 +384,7 @@ docsToTimeScaleCooc docs fdt = ...@@ -340,6 +384,7 @@ docsToTimeScaleCooc docs fdt =
----------------------- -----------------------
-- | to Phylo Base | -- -- | to Phylo Base | --
----------------------- -----------------------
-- TODO anoe -- TODO anoe
groupDocsByPeriodRec :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] -> Map (date, date) [doc] groupDocsByPeriodRec :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] -> Map (date, date) [doc]
groupDocsByPeriodRec f prds docs acc = groupDocsByPeriodRec f prds docs acc =
...@@ -394,6 +439,14 @@ docsToTermFreq docs fdt = ...@@ -394,6 +439,14 @@ docsToTermFreq docs fdt =
sumFreqs = sum $ elems freqs sumFreqs = sum $ elems freqs
in map (/sumFreqs) freqs in map (/sumFreqs) freqs
docsToTermCount :: [Document] -> Vector Ngrams -> Map Int Double
docsToTermCount docs roots = fromList
$ map (\lst -> (head' "docsToTermCount" lst, fromIntegral $ length lst))
$ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) roots) docs
docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double
docsToLastTermFreq n docs fdt = docsToLastTermFreq n docs fdt =
let last = take n $ reverse $ sort $ map date docs let last = take n $ reverse $ sort $ map date docs
...@@ -420,24 +473,43 @@ initPhyloScales lvlMax pId = ...@@ -420,24 +473,43 @@ initPhyloScales lvlMax pId =
fromList $ map (\lvl -> ((pId,lvl),PhyloScale pId ("","") lvl empty)) [1..lvlMax] fromList $ map (\lvl -> ((pId,lvl),PhyloScale pId ("","") lvl empty)) [1..lvlMax]
setDefault :: PhyloConfig -> PhyloConfig
setDefault conf = conf {
phyloScale = 2,
similarity = WeightedLogJaccard 0.5 2,
findAncestors = True,
phyloSynchrony = ByProximityThreshold 0.6 0 SiblingBranches MergeAllGroups,
phyloQuality = Quality 0.5 3,
timeUnit = Year 3 1 3,
clique = MaxClique 5 30 ByNeighbours,
exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2],
exportSort = ByHierarchy Desc,
exportFilter = [ByBranchSize 3]
}
-- Init the basic elements of a Phylo -- Init the basic elements of a Phylo
-- --
initPhylo :: [Document] -> PhyloConfig -> Phylo initPhylo :: [Document] -> PhyloConfig -> Phylo
initPhylo docs conf = initPhylo docs conf =
let roots = Vector.fromList $ nub $ concat $ map text docs let roots = Vector.fromList $ nub $ concat $ map text docs
foundations = PhyloFoundations roots empty foundations = PhyloFoundations roots empty
docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs) docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs)
params = defaultPhyloParam { _phyloParam_config = conf } docsCounts = PhyloCounts (docsToTimeScaleCooc docs (foundations ^. foundations_roots))
(docsToTimeScaleNb docs)
(docsToTermCount docs (foundations ^. foundations_roots))
(docsToTermFreq docs (foundations ^. foundations_roots))
(docsToLastTermFreq (getTimePeriod $ timeUnit conf) docs (foundations ^. foundations_roots))
params = if (defaultMode conf)
then defaultPhyloParam { _phyloParam_config = setDefault conf }
else defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf) periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf)
in trace ("\n" <> "-- | Init a phylo out of " <> show(length docs) <> " docs \n") in trace ("\n" <> "-- | Init a phylo out of " <> show(length docs) <> " docs \n")
$ Phylo foundations $ Phylo foundations
docsSources docsSources
(docsToTimeScaleCooc docs (foundations ^. foundations_roots)) docsCounts
(docsToTimeScaleNb docs) []
(docsToTermFreq docs (foundations ^. foundations_roots))
(docsToLastTermFreq (getTimePeriod $ timeUnit conf) docs (foundations ^. foundations_roots))
Set.empty
params params
(fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloScales 1 prd))) periods) (fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloScales 1 prd))) periods)
0 0
(_qua_granularity $ phyloQuality $ conf)
...@@ -13,7 +13,7 @@ Portability : POSIX ...@@ -13,7 +13,7 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.PhyloTools where module Gargantext.Core.Viz.Phylo.PhyloTools where
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group, notElem) import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, iterate, transpose, partition, tails, nubBy, group, notElem, (!!))
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys) import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
import Data.Set (Set, disjoint) import Data.Set (Set, disjoint)
import Data.String (String) import Data.String (String)
...@@ -28,6 +28,7 @@ import qualified Data.List as List ...@@ -28,6 +28,7 @@ import qualified Data.List as List
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
import qualified Data.Map as Map
------------ ------------
-- | Io | -- -- | Io | --
...@@ -301,6 +302,9 @@ getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc ...@@ -301,6 +302,9 @@ getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
coocToDiago :: Cooc -> Cooc coocToDiago :: Cooc -> Cooc
coocToDiago cooc = filterWithKey (\(k,k') _ -> k == k') cooc coocToDiago cooc = filterWithKey (\(k,k') _ -> k == k') cooc
coocToAdjacency :: Cooc -> Cooc
coocToAdjacency cooc = Map.map (\_ -> 1) cooc
-- | To build the local cooc matrix of each phylogroup -- | To build the local cooc matrix of each phylogroup
ngramsToCooc :: [Int] -> [Cooc] -> Cooc ngramsToCooc :: [Int] -> [Cooc] -> Cooc
ngramsToCooc ngrams coocs = ngramsToCooc ngrams coocs =
...@@ -309,6 +313,75 @@ ngramsToCooc ngrams coocs = ...@@ -309,6 +313,75 @@ ngramsToCooc ngrams coocs =
in filterWithKey (\k _ -> elem k pairs) cooc in filterWithKey (\k _ -> elem k pairs) cooc
------------------
-- | Defaults | --
------------------
-- | find the local maxima in a list of values
findMaxima :: [(Double,Double)] -> [Bool]
findMaxima lst = map (hasMax) $ toChunk 3 lst
where
------
hasMax :: [(Double,Double)] -> Bool
hasMax chunk =
if (length chunk) /= 3
then False
else (snd(chunk !! 0) < snd(chunk !! 1)) && (snd(chunk !! 2) < snd(chunk !! 1))
-- | split a list into chunks of size n
toChunk :: Int -> [a] -> [[a]]
toChunk n = takeWhile ((== n) . length) . transpose . take n . iterate tail
-- | To compute the average degree from a cooc matrix
-- http://networksciencebook.com/chapter/2#degree
toAverageDegree :: Cooc -> Vector Ngrams -> Double
toAverageDegree cooc roots = 2 * (fromIntegral $ Map.size cooc) / (fromIntegral $ Vector.length roots)
-- | Use the giant component regime to estimate the default level
-- http://networksciencebook.com/chapter/3#networks-supercritical
regimeToDefaultLevel :: Cooc -> Vector Ngrams -> Double
regimeToDefaultLevel cooc roots
| avg == 0 = 1
| avg < 1 = avg * 0.6
| avg == 1 = 0.6
| avg < lnN = (avg * 0.2) / lnN
| otherwise = 0.2
where
avg :: Double
avg = toAverageDegree cooc roots
lnN :: Double
lnN = log (fromIntegral $ Vector.length roots)
coocToConfidence :: Phylo -> Cooc
coocToConfidence phylo =
let count = getRootsCount phylo
cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty
$ elems $ getCoocByDate phylo
in Map.mapWithKey (\(a,b) w -> confidence a b w count) cooc
where
----
-- confidence
confidence :: Int -> Int -> Double -> Map Int Double -> Double
confidence a b inter card = maximum [(inter / card ! a),(inter / card ! b)]
sumtest :: [Int] -> [Int] -> Int
sumtest l1 l2 = (head' "test" l1) + (head' "test" $ reverse l2)
findDefaultLevel :: Phylo -> Phylo
findDefaultLevel phylo =
let confidence = Map.filterWithKey (\(a,b) _ -> a /= b)
$ Map.filter (> 0.01)
$ coocToConfidence phylo
roots = getRoots phylo
level = regimeToDefaultLevel confidence roots
in updateLevel level phylo
-------------------- --------------------
-- | PhyloGroup | -- -- | PhyloGroup | --
-------------------- --------------------
...@@ -401,21 +474,46 @@ getScales phylo = nub ...@@ -401,21 +474,46 @@ getScales phylo = nub
getSeaElevation :: Phylo -> SeaElevation getSeaElevation :: Phylo -> SeaElevation
getSeaElevation phylo = seaElevation (getConfig phylo) getSeaElevation phylo = seaElevation (getConfig phylo)
getSimilarity :: Phylo -> Similarity
getSimilarity phylo = similarity (getConfig phylo)
getPhyloSeaRiseStart :: Phylo -> Double getPhyloSeaRiseStart :: Phylo -> Double
getPhyloSeaRiseStart phylo = case (getSeaElevation phylo) of getPhyloSeaRiseStart phylo = case (getSeaElevation phylo) of
Constante s _ -> s Constante s _ -> s
Adaptative _ -> 0 Adaptative _ -> 0
Evolving _ -> 0
getPhyloSeaRiseSteps :: Phylo -> Double getPhyloSeaRiseSteps :: Phylo -> Double
getPhyloSeaRiseSteps phylo = case (getSeaElevation phylo) of getPhyloSeaRiseSteps phylo = case (getSeaElevation phylo) of
Constante _ s -> s Constante _ s -> s
Adaptative s -> s Adaptative s -> s
Evolving _ -> 0.1
getConfig :: Phylo -> PhyloConfig getConfig :: Phylo -> PhyloConfig
getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
getLevel :: Phylo -> Double
getLevel phylo = _phylo_level phylo
getLadder :: Phylo -> [Double]
getLadder phylo = phylo ^. phylo_seaLadder
getCoocByDate :: Phylo -> Map Date Cooc
getCoocByDate phylo = coocByDate (phylo ^. phylo_counts)
getDocsByDate :: Phylo -> Map Date Double
getDocsByDate phylo = docsByDate (phylo ^. phylo_counts)
getRootsCount :: Phylo -> Map Int Double
getRootsCount phylo = rootsCount (phylo ^. phylo_counts)
getRootsFreq :: Phylo -> Map Int Double
getRootsFreq phylo = rootsFreq (phylo ^. phylo_counts)
getLastRootsFreq :: Phylo -> Map Int Double
getLastRootsFreq phylo = lastRootsFreq (phylo ^. phylo_counts)
setConfig :: PhyloConfig -> Phylo -> Phylo setConfig :: PhyloConfig -> Phylo -> Phylo
setConfig config phylo = phylo setConfig config phylo = phylo
...@@ -501,8 +599,10 @@ updatePeriods periods' phylo = ...@@ -501,8 +599,10 @@ updatePeriods periods' phylo =
) phylo ) phylo
updateQuality :: Double -> Phylo -> Phylo updateQuality :: Double -> Phylo -> Phylo
updateQuality quality phylo = phylo { _phylo_quality = quality } updateQuality quality phylo = phylo { _phylo_quality = quality }
updateLevel :: Double -> Phylo -> Phylo
updateLevel level phylo = phylo { _phylo_level = level }
traceToPhylo :: Scale -> Phylo -> Phylo traceToPhylo :: Scale -> Phylo -> Phylo
traceToPhylo lvl phylo = traceToPhylo lvl phylo =
......
...@@ -76,7 +76,7 @@ toNextScale phylo groups = ...@@ -76,7 +76,7 @@ toNextScale phylo groups =
$ fromList $ map (\g -> (getGroupId g, g)) $ fromList $ map (\g -> (getGroupId g, g))
$ foldlWithKey (\acc id groups' -> $ foldlWithKey (\acc id groups' ->
-- 4) create the parent group -- 4) create the parent group
let parent = mergeGroups (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [(fst . fst) id]) id oldGroups groups' let parent = mergeGroups (elems $ restrictKeys (getCoocByDate phylo) $ periodsToYears [(fst . fst) id]) id oldGroups groups'
in acc ++ [parent]) [] in acc ++ [parent]) []
-- 3) group the current groups by parentId -- 3) group the current groups by parentId
$ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups $ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups
...@@ -199,8 +199,8 @@ synchronicClustering :: Phylo -> Phylo ...@@ -199,8 +199,8 @@ synchronicClustering :: Phylo -> Phylo
synchronicClustering phylo = synchronicClustering phylo =
let prox = similarity $ getConfig phylo let prox = similarity $ getConfig phylo
sync = phyloSynchrony $ getConfig phylo sync = phyloSynchrony $ getConfig phylo
docs = phylo ^. phylo_timeDocs docs = getDocsByDate phylo
diagos = map coocToDiago $ phylo ^. phylo_timeCooc diagos = map coocToDiago $ getCoocByDate phylo
newBranches = map (\branch -> reduceGroups prox sync docs diagos branch) newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
$ map processDynamics $ map processDynamics
$ chooseClusteringStrategy sync $ chooseClusteringStrategy sync
......
...@@ -56,7 +56,7 @@ sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + 1/ tan (s * pi ...@@ -56,7 +56,7 @@ sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + 1/ tan (s * pi
-- process the sumLog -- process the sumLog
-} -}
sumLog' :: Double -> Double -> [Double] -> Double sumLog' :: Double -> Double -> [Double] -> Double
sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + 1/ tan (s * pi / 2)) / log (nb + 1/ tan (s * pi / 2)))) 0 diago sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + 1 / tan (s * pi / 2)) / log (nb + 1/ tan (s * pi / 2)))) 0 diago
{- {-
...@@ -695,14 +695,14 @@ temporalMatching ladder phylo = updatePhyloGroups 1 ...@@ -695,14 +695,14 @@ temporalMatching ladder phylo = updatePhyloGroups 1
sea :: ([(Branch,ShouldTry)],FinalQuality) sea :: ([(Branch,ShouldTry)],FinalQuality)
sea = seaLevelRise (fromIntegral $ Vector.length $ getRoots phylo) sea = seaLevelRise (fromIntegral $ Vector.length $ getRoots phylo)
(similarity $ getConfig phylo) (similarity $ getConfig phylo)
(_qua_granularity $ phyloQuality $ getConfig phylo) (getLevel phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo) (_qua_minBranch $ phyloQuality $ getConfig phylo)
(phylo ^. phylo_termFreq) (getRootsFreq phylo)
ladder 1 ladder 1
(getTimeFrame $ timeUnit $ getConfig phylo) (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (getPeriodIds phylo)
(phylo ^. phylo_timeDocs) (getDocsByDate phylo)
(phylo ^. phylo_timeCooc) (getCoocByDate phylo)
((phylo ^. phylo_foundations) ^. foundations_rootsInGroups) ((phylo ^. phylo_foundations) ^. foundations_rootsInGroups)
(reverse $ sortOn (length . fst) seabed) (reverse $ sortOn (length . fst) seabed)
...@@ -714,7 +714,7 @@ temporalMatching ladder phylo = updatePhyloGroups 1 ...@@ -714,7 +714,7 @@ temporalMatching ladder phylo = updatePhyloGroups 1
(getPeriodIds phylo) (getPeriodIds phylo)
(similarity $ getConfig phylo) (similarity $ getConfig phylo)
(List.head ladder) (List.head ladder)
(phylo ^. phylo_timeDocs) (getDocsByDate phylo)
(phylo ^. phylo_timeCooc) (getCoocByDate phylo)
((phylo ^. phylo_foundations) ^. foundations_rootsInGroups) ((phylo ^. phylo_foundations) ^. foundations_rootsInGroups)
(traceTemporalMatching $ getGroupsFromScale 1 phylo) (traceTemporalMatching $ getGroupsFromScale 1 phylo)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment