Commit 5f77499e authored by Alexandre Delanoë's avatar Alexandre Delanoë

[MERGE] dev-phylo

parent 5d9172ee
......@@ -38,7 +38,7 @@ import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
......@@ -50,7 +50,7 @@ import qualified Data.Text as T
import qualified Data.Vector as Vector
import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv
data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
data Backup = BackupPhyloWithoutLink | BackupPhylo deriving (Show)
---------------
-- | Tools | --
......@@ -153,9 +153,9 @@ seaToLabel config = case (seaElevation config) of
sensToLabel :: PhyloConfig -> [Char]
sensToLabel config = case (phyloProximity config) of
Hamming _ -> undefined
WeightedLogJaccard s -> ("WeightedLogJaccard_" <> show s)
WeightedLogSim s -> ( "WeightedLogSim-sens_" <> show s)
Hamming _ _ -> undefined
WeightedLogJaccard s _ -> ("WeightedLogJaccard_" <> show s)
WeightedLogSim s _ -> ( "WeightedLogSim-sens_" <> show s)
cliqueToLabel :: PhyloConfig -> [Char]
......@@ -179,7 +179,7 @@ configToLabel :: PhyloConfig -> [Char]
configToLabel config = outputPath config
<> (unpack $ phyloName config)
<> "-" <> (timeToLabel config)
<> "-scale_" <> (show (phyloLevel config))
<> "-scale_" <> (show (phyloScale config))
<> "-" <> (seaToLabel config)
<> "-" <> (sensToLabel config)
<> "-" <> (cliqueToLabel config)
......@@ -189,18 +189,18 @@ configToLabel config = outputPath config
-- To write a sha256 from a set of config's parameters
configToSha :: PhyloStage -> PhyloConfig -> [Char]
configToSha :: Backup -> PhyloConfig -> [Char]
configToSha stage config = unpack
$ replace "/" "-"
$ T.pack (show (hash $ C8.pack label))
where
label :: [Char]
label = case stage of
PhyloWithCliques -> (corpusPath config)
BackupPhyloWithoutLink -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
PhyloWithLinks -> (corpusPath config)
BackupPhylo -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
......@@ -208,7 +208,7 @@ configToSha stage config = unpack
<> (seaToLabel config)
<> (syncToLabel config)
<> (qualToConfig config)
<> (show (phyloLevel config))
<> (show (phyloScale config))
readListV4 :: [Char] -> IO NgramsList
......@@ -255,55 +255,38 @@ main = do
printIOMsg "Reconstruct the phylo"
let phyloWithCliquesFile = (outputPath config) <> "phyloWithCliques_" <> (configToSha PhyloWithCliques config) <> ".json"
let phyloWithLinksFile = (outputPath config) <> "phyloWithLinks_" <> (configToSha PhyloWithLinks config) <> ".json"
-- check the existing backup files
phyloWithCliquesExists <- doesFileExist phyloWithCliquesFile
phyloWithLinksExists <- doesFileExist phyloWithLinksFile
let backupPhyloWithoutLink = (outputPath config) <> "backupPhyloWithoutLink_" <> (configToSha BackupPhyloWithoutLink config) <> ".json"
let backupPhylo = (outputPath config) <> "backupPhylo_" <> (configToSha BackupPhylo config) <> ".json"
-- phyloStep <- if phyloWithCliquesExists
-- then do
-- printIOMsg "Reconstruct the phylo step from an existing file"
-- readPhylo phyloWithCliquesFile
-- else do
-- printIOMsg "Reconstruct the phylo step from scratch"
-- pure $ toPhyloStep corpus mapList config
phyloWithoutLinkExists <- doesFileExist backupPhyloWithoutLink
phyloExists <- doesFileExist backupPhylo
-- writePhylo phyloWithCliquesFile phyloStep
-- reconstruct the phylo
-- let phylo = toPhylo (setConfig config phyloStep)
phylo <- if phyloExists
then do
printIOMsg "Reconstruct the phylo from an existing file"
readPhylo backupPhylo
else do
if phyloWithoutLinkExists
then do
printIOMsg "Reconstruct the phylo from an existing file without links"
phyloWithoutLink <- readPhylo backupPhyloWithoutLink
writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithoutLink)
else do
printIOMsg "Reconstruct the phylo from scratch"
phyloWithoutLink <- pure $ toPhyloWithoutLink corpus mapList config
writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithoutLink)
phyloWithLinks <- if phyloWithLinksExists
then do
printIOMsg "Reconstruct the phylo from an existing file with intertemporal links"
readPhylo phyloWithLinksFile
else do
if phyloWithCliquesExists
then do
printIOMsg "Reconstruct the phylo from an existing file with cliques"
phyloWithCliques <- readPhylo phyloWithCliquesFile
writePhylo phyloWithCliquesFile phyloWithCliques
pure $ toPhylo (setConfig config phyloWithCliques)
else do
printIOMsg "Reconstruct the phylo from scratch"
phyloWithCliques <- pure $ toPhyloStep corpus mapList config
writePhylo phyloWithCliquesFile phyloWithCliques
pure $ toPhylo (setConfig config phyloWithCliques)
writePhylo phyloWithLinksFile phyloWithLinks
-- probes
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_synchronic_distance_cumu_jaccard.txt")
-- $ synchronicDistance' phylo 1
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_inflexion_points.txt")
-- $ inflexionPoints phylo 1
writePhylo backupPhylo phylo
printIOMsg "End of reconstruction, start the export"
let dot = toPhyloExport (setConfig config phyloWithLinks)
let dot = toPhyloExport (setConfig config phylo)
let output = configToLabel config
......
......@@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack
name: gargantext
version: 0.0.6.5.1
version: 0.0.6.5.1
synopsis: Search, map, share
description: Please see README.md
category: Data
......
......@@ -72,7 +72,7 @@ type Neighbor = Node
-- | getMaxCliques
-- TODO chose distance order
getMaxCliques :: Ord a => CliqueFilter -> Distance -> Threshold -> Map (a, a) Int -> [[a]]
getMaxCliques :: Ord a => MaxCliqueFilter -> Distance -> Threshold -> Map (a, a) Int -> [[a]]
getMaxCliques f d t m = map fromIndices $ getMaxCliques' t m'
where
m' = toIndex to m
......
This diff is collapsed.
......@@ -27,7 +27,7 @@ import Gargantext.Core.Types (TODO(..))
import Gargantext.Core.Viz.LegacyPhylo
import Gargantext.Core.Viz.Phylo (defaultConfig)
import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Core.Viz.Phylo.Example (phyloExample)
import Gargantext.Core.Viz.Phylo.Example (phyloCleopatre)
import Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node -- (PhyloId, ListId, CorpusId, UserId, NodeId(..))
......@@ -118,7 +118,8 @@ getPhylo phyloId lId _level _minSizeBranch = do
getPhyloDataJson :: PhyloId -> GargNoServer Value
getPhyloDataJson phyloId = do
phyloData <- fromMaybe phyloExample <$> getPhyloData phyloId
maybePhyloData <- getPhyloData phyloId
let phyloData = fromMaybe phyloCleopatre maybePhyloData
phyloJson <- liftBase $ phylo2dot2json phyloData
pure phyloJson
......
......@@ -30,7 +30,7 @@ import Gargantext.Core.Types (Context)
import Gargantext.Core.Types.Main (ListType(MapTerm))
import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo)
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} setConfig)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataPhylo(..))
......@@ -87,9 +87,9 @@ phylo2dot2json phylo = do
flowPhyloAPI :: PhyloConfig -> CorpusId -> GargNoServer Phylo
flowPhyloAPI config cId = do
(mapList, corpus) <- corpusIdtoDocuments (timeUnit config) cId
temporalSeries <- pure $ toPhyloStep corpus mapList config
phyloWithCliques <- pure $ toPhyloWithoutLink corpus mapList config
-- writePhylo phyloWithCliquesFile phyloWithCliques
pure $ toPhylo (setConfig config temporalSeries)
pure $ toPhylo (setConfig config phyloWithCliques)
--------------------------------------------------------------------
corpusIdtoDocuments :: TimeUnit -> CorpusId -> GargNoServer (TermList, [Document])
......
......@@ -27,9 +27,10 @@ import Gargantext.Core.Viz.Phylo.PhyloExport
import Gargantext.Core.Viz.Phylo.PhyloMaker
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching)
import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching)
import Gargantext.Prelude
import qualified Data.Vector as Vector
import qualified Data.Set as Set
---------------------------------
-- | STEP 5 | -- Export the phylo
......@@ -39,49 +40,50 @@ phyloExport :: IO ()
phyloExport = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot" phyloDot
phyloDot :: DotGraph DotId
phyloDot = toPhyloExport phyloExample
phyloDot = toPhyloExport phyloCleopatre
--------------------------------------------------
-- | STEP 4 | -- Process the synchronic clustering
--------------------------------------------------
phyloExample :: Phylo
phyloExample = synchronicClustering $ toHorizon phylo1
phyloCleopatre :: Phylo
phyloCleopatre = synchronicClustering $ toHorizon flatPhylo
-----------------------------------------------
-- | STEP 3 | -- Build the Level 1 of the Phylo
-----------------------------------------------
phylo1 :: Phylo
phylo1 = case (getSeaElevation phyloBase) of
Constante s g -> constanteTemporalMatching s g
$ toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
Adaptative s -> adaptativeTemporalMatching s
$ toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
flatPhylo :: Phylo
flatPhylo = case (getSeaElevation emptyPhylo) of
Constante s g -> temporalMatching (constDiachronicLadder s g Set.empty)
$ scanSimilarity 1
$ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
Adaptative s -> temporalMatching (adaptDiachronicLadder s (emptyPhylo' ^. phylo_diaSimScan) Set.empty) emptyPhylo'
emptyPhylo' :: Phylo
emptyPhylo' = scanSimilarity 1
$ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
---------------------------------------------
-- | STEP 2 | -- Build the cliques
---------------------------------------------
phyloClique :: Map (Date,Date) [PhyloClique]
phyloClique = toPhyloClique phyloBase docsByPeriods
seriesOfClustering :: Map (Date,Date) [Clustering]
seriesOfClustering = toSeriesOfClustering emptyPhylo docsByPeriods
docsByPeriods :: Map (Date,Date) [Document]
docsByPeriods = groupDocsByPeriod date periods docs
--------------------------------------------
-- | STEP 1 | -- Init the Base of the Phylo
--------------------------------------------
---------------------------------
-- | STEP 1 | -- Init the Phylo
---------------------------------
phyloBase :: Phylo
phyloBase = toPhyloBase docs mapList config
emptyPhylo :: Phylo
emptyPhylo = initPhylo docs mapList config
phyloCooc :: Map Date Cooc
......@@ -101,7 +103,8 @@ nbDocsByYear = docsToTimeScaleNb docs
config :: PhyloConfig
config =
defaultConfig { phyloName = "Cesar et Cleopatre"
, phyloLevel = 2
, phyloScale = 2
, seaElevation = Adaptative 4
, exportFilter = [ByBranchSize 0]
, clique = MaxClique 0 15 ByNeighbours }
......
......@@ -26,7 +26,7 @@ import Debug.Trace (trace)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
import Gargantext.Prelude
import Gargantext.Prelude hiding (scale)
import Prelude (writeFile)
import System.FilePath
import qualified Data.GraphViz.Attributes.HTML as H
......@@ -73,7 +73,7 @@ groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show
branchIdToDotId :: PhyloBranchId -> DotId
branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
periodIdToDotId :: PhyloPeriodId -> DotId
periodIdToDotId :: Period -> DotId
periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
......@@ -220,9 +220,10 @@ exportToDot phylo export =
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
,(toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo))
,(toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo)
,(toAttr (fromStrict "phyloLevel") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
,(toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (_cons_start $ getSeaElevation phylo))
,(toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (_cons_step $ getSeaElevation phylo))
,(toAttr (fromStrict "PhyloScale") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
,(toAttr (fromStrict "phyloQuality") $ pack $ show (phylo ^. phylo_quality))
,(toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (getPhyloSeaRiseStart phylo))
,(toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (getPhyloSeaRiseSteps phylo))
-- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
])
......@@ -249,7 +250,7 @@ exportToDot phylo export =
_ <- mapM (\period ->
subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
graphAttrs [Rank SameRank]
periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriod')
periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriodStr)
{-- 6) create a node for each group -}
mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
......@@ -372,9 +373,9 @@ sortByBirthDate order export =
processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
processSort sort' elev export = case sort' of
ByBirthDate o -> sortByBirthDate o export
ByHierarchy _ -> export & export_branches .~ (branchToIso' (_cons_start elev) (_cons_step elev)
$ sortByHierarchy 0 (export ^. export_branches))
ByHierarchy _ -> case elev of
Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches))
Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
-----------------
-- | Metrics | --
......@@ -545,9 +546,10 @@ processLabels labels foundations freq export =
-- | Dynamics | --
------------------
toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
toDynamics n parents g m =
-- utiliser & creer une Map FdtId [PhyloGroup]
-- n = index of the current term
toDynamics :: FdtId -> [PhyloGroup] -> PhyloGroup -> Map FdtId (Date,Date) -> Double
toDynamics n elders g m =
let prd = g ^. phylo_groupPeriod
end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
......@@ -563,18 +565,18 @@ toDynamics n parents g m =
where
--------------------------------------
isNew :: Bool
isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders
type FdtId = Int
processDynamics :: [PhyloGroup] -> [PhyloGroup]
processDynamics groups =
map (\g ->
let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
let elders = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
&& ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n elders g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
where
--------------------------------------
mapNgrams :: Map Int (Date,Date)
mapNgrams :: Map FdtId (Date,Date)
mapNgrams = map (\dates ->
let dates' = sort dates
in (head' "dynamics" dates', last' "dynamics" dates'))
......@@ -615,28 +617,28 @@ headsToAncestors nbDocs diago proximity step heads acc =
toHorizon :: Phylo -> Phylo
toHorizon phylo =
let phyloAncestor = updatePhyloGroups
level
scale
(fromList $ map (\g -> (getGroupId g, g))
$ concat
$ tracePhyloAncestors newGroups) phylo
reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
$ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
in updatePhyloGroups level reBranched phylo
$ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromScale scale phyloAncestor
in updatePhyloGroups scale reBranched phylo
where
-- | 1) for each periods
periods :: [PhyloPeriodId]
periods :: [Period]
periods = getPeriodIds phylo
-- --
level :: Level
level = getLastLevel phylo
scale :: Scale
scale = getLastLevel phylo
-- --
frame :: Int
frame = getTimeFrame $ timeUnit $ getConfig phylo
-- | 2) find ancestors between groups without parents
mapGroups :: [[PhyloGroup]]
mapGroups = map (\prd ->
let groups = getGroupsFromLevelPeriods level [prd] phylo
childs = getPreviousChildIds level frame prd periods phylo
let groups = getGroupsFromScalePeriods scale [prd] phylo
childs = getPreviousChildIds scale frame prd periods phylo
-- maybe add a better filter for non isolated ancestors
heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
$ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
......@@ -646,7 +648,7 @@ toHorizon phylo =
proximity = (phyloProximity $ getConfig phylo)
step = case getSeaElevation phylo of
Constante _ s -> s
Adaptative _ -> undefined
Adaptative _ -> 0
-- in headsToAncestors nbDocs diago proximity heads groups []
in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
$ headsToAncestors nbDocs diago proximity step heads []
......@@ -656,10 +658,10 @@ toHorizon phylo =
newGroups = mapGroups `using` parList rdeepseq
--------------------------------------
getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
getPreviousChildIds lvl frame curr prds phylo =
concat $ map ((map fst) . _phylo_groupPeriodChilds)
$ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
$ getGroupsFromScalePeriods lvl (getNextPeriods ToParents frame curr prds) phylo
---------------------
-- | phyloExport | --
......@@ -694,10 +696,10 @@ toPhyloExport phylo = exportToDot phylo
--------------------------------------
groups :: [PhyloGroup]
groups = traceExportGroups
-- necessaire ?
$ processDynamics
$ getGroupsFromLevel (phyloLevel $ getConfig phylo)
$ getGroupsFromScale (phyloScale $ getConfig phylo)
$ tracePhyloInfo phylo
-- \$ toHorizon phylo
traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
......
This diff is collapsed.
This diff is collapsed.
......@@ -60,19 +60,19 @@ mergeGroups coocs id mapIds childs =
mergeAncestors :: [Pointer] -> [Pointer]
mergeAncestors pointers = Map.toList $ fromListWith max pointers
addPhyloLevel :: Level -> Phylo -> Phylo
addPhyloLevel lvl phylo =
addPhyloScale :: Scale -> Phylo -> Phylo
addPhyloScale lvl phylo =
over ( phylo_periods . traverse )
(\phyloPrd -> phyloPrd & phylo_periodLevels
(\phyloPrd -> phyloPrd & phylo_periodScales
%~ (insert (phyloPrd ^. phylo_periodPeriod, lvl)
(PhyloLevel (phyloPrd ^. phylo_periodPeriod) (phyloPrd ^. phylo_periodPeriod') lvl empty))) phylo
(PhyloScale (phyloPrd ^. phylo_periodPeriod) (phyloPrd ^. phylo_periodPeriodStr) lvl empty))) phylo
toNextLevel' :: Phylo -> [PhyloGroup] -> Phylo
toNextLevel' phylo groups =
toNextScale :: Phylo -> [PhyloGroup] -> Phylo
toNextScale phylo groups =
let curLvl = getLastLevel phylo
oldGroups = fromList $ map (\g -> (getGroupId g, getLevelParentId g)) groups
newGroups = concat $ groupsToBranches
newGroups = concat $ groupsToBranches'
$ fromList $ map (\g -> (getGroupId g, g))
$ foldlWithKey (\acc id groups' ->
-- 4) create the parent group
......@@ -83,17 +83,17 @@ toNextLevel' phylo groups =
newPeriods = fromListWith (++) $ map (\g -> (g ^. phylo_groupPeriod, [g])) newGroups
in traceSynchronyEnd
$ over ( phylo_periods . traverse . phylo_periodLevels . traverse
$ over ( phylo_periods . traverse . phylo_periodScales . traverse
-- 6) update each period at curLvl + 1
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == (curLvl + 1)))
. filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == (curLvl + 1)))
-- 7) by adding the parents
(\phyloLvl ->
if member (phyloLvl ^. phylo_levelPeriod) newPeriods
then phyloLvl & phylo_levelGroups
.~ fromList (map (\g -> (getGroupId g, g)) $ newPeriods ! (phyloLvl ^. phylo_levelPeriod))
if member (phyloLvl ^. phylo_scalePeriod) newPeriods
then phyloLvl & phylo_scaleGroups
.~ fromList (map (\g -> (getGroupId g, g)) $ newPeriods ! (phyloLvl ^. phylo_scalePeriod))
else phyloLvl)
-- 2) add the curLvl + 1 phyloLevel to the phylo
$ addPhyloLevel (curLvl + 1)
-- 2) add the curLvl + 1 PhyloScale to the phylo
$ addPhyloScale (curLvl + 1)
-- 1) update the current groups (with level parent pointers) in the phylo
$ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo
......@@ -140,17 +140,17 @@ groupsToEdges prox sync nbDocs diago groups =
toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)]
toEdges sens edges =
case prox of
WeightedLogJaccard _ -> map (\(g,g') ->
WeightedLogJaccard _ _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard' (sens) nbDocs diago
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
WeightedLogSim _ -> map (\(g,g') ->
WeightedLogSim _ _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard' (1 / sens) nbDocs diago
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
_ -> undefined
toParentId :: PhyloGroup -> PhyloGroupId
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex)
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupScale + 1), child ^. phylo_groupIndex)
reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
......@@ -159,6 +159,7 @@ reduceGroups prox sync docs diagos branch =
let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems)
-- TODO : ajouter un parallelisme
$ mapWithKey (\prd groups ->
-- 2) for each period, transform the groups as a proximity graph filtered by a threshold
let diago = reduceDiagos $ filterDiago diagos [prd]
......@@ -166,17 +167,17 @@ reduceGroups prox sync docs diagos branch =
in map (\comp ->
-- 4) add to each groups their futur level parent group
let parentId = toParentId (head' "parentId" comp)
in map (\g -> g & phylo_groupLevelParents %~ (++ [(parentId,1)]) ) comp )
in map (\g -> g & phylo_groupScaleParents %~ (++ [(parentId,1)]) ) comp )
-- 3) reduce the graph a a set of related components
$ toRelatedComponents groups edges) periods
adjustClustering :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]]
adjustClustering sync branches = case sync of
chooseClusteringStrategy :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]]
chooseClusteringStrategy sync branches = case sync of
ByProximityThreshold _ _ scope _ -> case scope of
SingleBranch -> branches
SiblingBranches -> groupBy (\g g' -> (last' "adjustClustering" $ (g ^. phylo_groupMeta) ! "breaks")
== (last' "adjustClustering" $ (g' ^. phylo_groupMeta) ! "breaks"))
SiblingBranches -> groupBy (\g g' -> (last' "chooseClusteringStrategy" $ (g ^. phylo_groupMeta) ! "breaks")
== (last' "chooseClusteringStrategy" $ (g' ^. phylo_groupMeta) ! "breaks"))
$ sortOn _phylo_groupBranchId $ concat branches
AllBranches -> [concat branches]
ByProximityDistribution _ _ -> branches
......@@ -185,7 +186,7 @@ adjustClustering sync branches = case sync of
levelUpAncestors :: [PhyloGroup] -> [PhyloGroup]
levelUpAncestors groups =
-- 1) create an associative map of (old,new) ids
let ids' = fromList $ map (\g -> (getGroupId g, fst $ head' "levelUpAncestors" ( g ^. phylo_groupLevelParents))) groups
let ids' = fromList $ map (\g -> (getGroupId g, fst $ head' "levelUpAncestors" ( g ^. phylo_groupScaleParents))) groups
in map (\g ->
let id' = ids' ! (getGroupId g)
ancestors = g ^. phylo_groupAncestors
......@@ -202,11 +203,11 @@ synchronicClustering phylo =
diagos = map coocToDiago $ phylo ^. phylo_timeCooc
newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
$ map processDynamics
$ adjustClustering sync
$ phyloToLastBranches
$ chooseClusteringStrategy sync
$ phyloLastScale
$ traceSynchronyStart phylo
newBranches' = newBranches `using` parList rdeepseq
in toNextLevel' phylo $ levelUpAncestors $ concat newBranches'
in toNextScale phylo $ levelUpAncestors $ concat newBranches'
-- synchronicDistance :: Phylo -> Level -> String
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment