Commit 5f77499e authored by Alexandre Delanoë's avatar Alexandre Delanoë

[MERGE] dev-phylo

parent 5d9172ee
...@@ -38,7 +38,7 @@ import Gargantext.Core.Types.Main (ListType(..)) ...@@ -38,7 +38,7 @@ import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Phylo import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API.Tools import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile) import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep) import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig) import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Database.Schema.Ngrams (NgramsType(..))
...@@ -50,7 +50,7 @@ import qualified Data.Text as T ...@@ -50,7 +50,7 @@ import qualified Data.Text as T
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv
data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show) data Backup = BackupPhyloWithoutLink | BackupPhylo deriving (Show)
--------------- ---------------
-- | Tools | -- -- | Tools | --
...@@ -153,9 +153,9 @@ seaToLabel config = case (seaElevation config) of ...@@ -153,9 +153,9 @@ seaToLabel config = case (seaElevation config) of
sensToLabel :: PhyloConfig -> [Char] sensToLabel :: PhyloConfig -> [Char]
sensToLabel config = case (phyloProximity config) of sensToLabel config = case (phyloProximity config) of
Hamming _ -> undefined Hamming _ _ -> undefined
WeightedLogJaccard s -> ("WeightedLogJaccard_" <> show s) WeightedLogJaccard s _ -> ("WeightedLogJaccard_" <> show s)
WeightedLogSim s -> ( "WeightedLogSim-sens_" <> show s) WeightedLogSim s _ -> ( "WeightedLogSim-sens_" <> show s)
cliqueToLabel :: PhyloConfig -> [Char] cliqueToLabel :: PhyloConfig -> [Char]
...@@ -179,7 +179,7 @@ configToLabel :: PhyloConfig -> [Char] ...@@ -179,7 +179,7 @@ configToLabel :: PhyloConfig -> [Char]
configToLabel config = outputPath config configToLabel config = outputPath config
<> (unpack $ phyloName config) <> (unpack $ phyloName config)
<> "-" <> (timeToLabel config) <> "-" <> (timeToLabel config)
<> "-scale_" <> (show (phyloLevel config)) <> "-scale_" <> (show (phyloScale config))
<> "-" <> (seaToLabel config) <> "-" <> (seaToLabel config)
<> "-" <> (sensToLabel config) <> "-" <> (sensToLabel config)
<> "-" <> (cliqueToLabel config) <> "-" <> (cliqueToLabel config)
...@@ -189,18 +189,18 @@ configToLabel config = outputPath config ...@@ -189,18 +189,18 @@ configToLabel config = outputPath config
-- To write a sha256 from a set of config's parameters -- To write a sha256 from a set of config's parameters
configToSha :: PhyloStage -> PhyloConfig -> [Char] configToSha :: Backup -> PhyloConfig -> [Char]
configToSha stage config = unpack configToSha stage config = unpack
$ replace "/" "-" $ replace "/" "-"
$ T.pack (show (hash $ C8.pack label)) $ T.pack (show (hash $ C8.pack label))
where where
label :: [Char] label :: [Char]
label = case stage of label = case stage of
PhyloWithCliques -> (corpusPath config) BackupPhyloWithoutLink -> (corpusPath config)
<> (listPath config) <> (listPath config)
<> (timeToLabel config) <> (timeToLabel config)
<> (cliqueToLabel config) <> (cliqueToLabel config)
PhyloWithLinks -> (corpusPath config) BackupPhylo -> (corpusPath config)
<> (listPath config) <> (listPath config)
<> (timeToLabel config) <> (timeToLabel config)
<> (cliqueToLabel config) <> (cliqueToLabel config)
...@@ -208,7 +208,7 @@ configToSha stage config = unpack ...@@ -208,7 +208,7 @@ configToSha stage config = unpack
<> (seaToLabel config) <> (seaToLabel config)
<> (syncToLabel config) <> (syncToLabel config)
<> (qualToConfig config) <> (qualToConfig config)
<> (show (phyloLevel config)) <> (show (phyloScale config))
readListV4 :: [Char] -> IO NgramsList readListV4 :: [Char] -> IO NgramsList
...@@ -255,55 +255,38 @@ main = do ...@@ -255,55 +255,38 @@ main = do
printIOMsg "Reconstruct the phylo" printIOMsg "Reconstruct the phylo"
let phyloWithCliquesFile = (outputPath config) <> "phyloWithCliques_" <> (configToSha PhyloWithCliques config) <> ".json" -- check the existing backup files
let phyloWithLinksFile = (outputPath config) <> "phyloWithLinks_" <> (configToSha PhyloWithLinks config) <> ".json"
phyloWithCliquesExists <- doesFileExist phyloWithCliquesFile let backupPhyloWithoutLink = (outputPath config) <> "backupPhyloWithoutLink_" <> (configToSha BackupPhyloWithoutLink config) <> ".json"
phyloWithLinksExists <- doesFileExist phyloWithLinksFile let backupPhylo = (outputPath config) <> "backupPhylo_" <> (configToSha BackupPhylo config) <> ".json"
-- phyloStep <- if phyloWithCliquesExists phyloWithoutLinkExists <- doesFileExist backupPhyloWithoutLink
-- then do phyloExists <- doesFileExist backupPhylo
-- printIOMsg "Reconstruct the phylo step from an existing file"
-- readPhylo phyloWithCliquesFile
-- else do
-- printIOMsg "Reconstruct the phylo step from scratch"
-- pure $ toPhyloStep corpus mapList config
-- writePhylo phyloWithCliquesFile phyloStep -- reconstruct the phylo
-- let phylo = toPhylo (setConfig config phyloStep) phylo <- if phyloExists
phyloWithLinks <- if phyloWithLinksExists
then do then do
printIOMsg "Reconstruct the phylo from an existing file with intertemporal links" printIOMsg "Reconstruct the phylo from an existing file"
readPhylo phyloWithLinksFile readPhylo backupPhylo
else do else do
if phyloWithCliquesExists if phyloWithoutLinkExists
then do then do
printIOMsg "Reconstruct the phylo from an existing file with cliques" printIOMsg "Reconstruct the phylo from an existing file without links"
phyloWithCliques <- readPhylo phyloWithCliquesFile phyloWithoutLink <- readPhylo backupPhyloWithoutLink
writePhylo phyloWithCliquesFile phyloWithCliques writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithCliques) pure $ toPhylo (setConfig config phyloWithoutLink)
else do else do
printIOMsg "Reconstruct the phylo from scratch" printIOMsg "Reconstruct the phylo from scratch"
phyloWithCliques <- pure $ toPhyloStep corpus mapList config phyloWithoutLink <- pure $ toPhyloWithoutLink corpus mapList config
writePhylo phyloWithCliquesFile phyloWithCliques writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithCliques) pure $ toPhylo (setConfig config phyloWithoutLink)
writePhylo phyloWithLinksFile phyloWithLinks
-- probes
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_synchronic_distance_cumu_jaccard.txt")
-- $ synchronicDistance' phylo 1
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_inflexion_points.txt") writePhylo backupPhylo phylo
-- $ inflexionPoints phylo 1
printIOMsg "End of reconstruction, start the export" printIOMsg "End of reconstruction, start the export"
let dot = toPhyloExport (setConfig config phyloWithLinks) let dot = toPhyloExport (setConfig config phylo)
let output = configToLabel config let output = configToLabel config
......
...@@ -72,7 +72,7 @@ type Neighbor = Node ...@@ -72,7 +72,7 @@ type Neighbor = Node
-- | getMaxCliques -- | getMaxCliques
-- TODO chose distance order -- TODO chose distance order
getMaxCliques :: Ord a => CliqueFilter -> Distance -> Threshold -> Map (a, a) Int -> [[a]] getMaxCliques :: Ord a => MaxCliqueFilter -> Distance -> Threshold -> Map (a, a) Int -> [[a]]
getMaxCliques f d t m = map fromIndices $ getMaxCliques' t m' getMaxCliques f d t m = map fromIndices $ getMaxCliques' t m'
where where
m' = toIndex to m m' = toIndex to m
......
...@@ -31,6 +31,7 @@ import Control.Lens (makeLenses) ...@@ -31,6 +31,7 @@ import Control.Lens (makeLenses)
import Data.Aeson import Data.Aeson
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Map (Map) import Data.Map (Map)
import Data.Set (Set)
import Data.Swagger import Data.Swagger
import Data.Text (Text, pack) import Data.Text (Text, pack)
import Data.Vector (Vector) import Data.Vector (Vector)
...@@ -63,9 +64,9 @@ instance ToSchema ListParser ...@@ -63,9 +64,9 @@ instance ToSchema ListParser
data SeaElevation = data SeaElevation =
Constante Constante
{ _cons_start :: Double { _cons_start :: Double
, _cons_step :: Double } , _cons_gap :: Double }
| Adaptative | Adaptative
{ _adap_granularity :: Double } { _adap_steps :: Double }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
instance ToSchema SeaElevation instance ToSchema SeaElevation
...@@ -73,23 +74,13 @@ instance ToSchema SeaElevation ...@@ -73,23 +74,13 @@ instance ToSchema SeaElevation
data Proximity = data Proximity =
WeightedLogJaccard WeightedLogJaccard
{ _wlj_sensibility :: Double { _wlj_sensibility :: Double
{- , _wlj_minSharedNgrams :: Int }
-- , _wlj_thresholdInit :: Double
-- , _wlj_thresholdStep :: Double
-- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double
-}
}
| WeightedLogSim | WeightedLogSim
{ _wlj_sensibility :: Double { _wls_sensibility :: Double
{- , _wls_minSharedNgrams :: Int }
-- , _wlj_thresholdInit :: Double | Hamming
-- , _wlj_thresholdStep :: Double { _hmg_sensibility :: Double
-- | max height for sea level in temporal matching , _hmg_minSharedNgrams :: Int}
-- , _wlj_elevation :: Double
-}
}
| Hamming { _wlj_sensibility :: Double }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
...@@ -150,24 +141,24 @@ instance ToSchema TimeUnit where ...@@ -150,24 +141,24 @@ instance ToSchema TimeUnit where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
data CliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq) data MaxCliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq)
instance ToSchema CliqueFilter where instance ToSchema MaxCliqueFilter where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
data Clique = data Cluster =
Fis Fis
{ _fis_support :: Int { _fis_support :: Int
, _fis_size :: Int } , _fis_size :: Int }
| MaxClique | MaxClique
{ _mcl_size :: Int { _mcl_size :: Int
, _mcl_threshold :: Double , _mcl_threshold :: Double
, _mcl_filter :: CliqueFilter } , _mcl_filter :: MaxCliqueFilter }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
instance ToSchema Clique where instance ToSchema Cluster where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
...@@ -187,14 +178,14 @@ data PhyloConfig = ...@@ -187,14 +178,14 @@ data PhyloConfig =
, corpusParser :: CorpusParser , corpusParser :: CorpusParser
, listParser :: ListParser , listParser :: ListParser
, phyloName :: Text , phyloName :: Text
, phyloLevel :: Int , phyloScale :: Int
, phyloProximity :: Proximity , phyloProximity :: Proximity
, seaElevation :: SeaElevation , seaElevation :: SeaElevation
, findAncestors :: Bool , findAncestors :: Bool
, phyloSynchrony :: Synchrony , phyloSynchrony :: Synchrony
, phyloQuality :: Quality , phyloQuality :: Quality
, timeUnit :: TimeUnit , timeUnit :: TimeUnit
, clique :: Clique , clique :: Cluster
, exportLabel :: [PhyloLabel] , exportLabel :: [PhyloLabel]
, exportSort :: Sort , exportSort :: Sort
, exportFilter :: [Filter] , exportFilter :: [Filter]
...@@ -207,14 +198,14 @@ data PhyloSubConfig = ...@@ -207,14 +198,14 @@ data PhyloSubConfig =
, _sc_phyloSynchrony :: Double , _sc_phyloSynchrony :: Double
, _sc_phyloQuality :: Double , _sc_phyloQuality :: Double
, _sc_timeUnit :: TimeUnit , _sc_timeUnit :: TimeUnit
, _sc_clique :: Clique , _sc_clique :: Cluster
, _sc_exportFilter :: Double , _sc_exportFilter :: Double
} }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
subConfig2config :: PhyloSubConfig -> PhyloConfig subConfig2config :: PhyloSubConfig -> PhyloConfig
subConfig2config subConfig = defaultConfig { phyloProximity = WeightedLogJaccard $ _sc_phyloProximity subConfig subConfig2config subConfig = defaultConfig { phyloProximity = WeightedLogJaccard (_sc_phyloProximity subConfig) 1
, phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups , phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups
, phyloQuality = Quality (_sc_phyloQuality subConfig) 1 , phyloQuality = Quality (_sc_phyloQuality subConfig) 1
, timeUnit = _sc_timeUnit subConfig , timeUnit = _sc_timeUnit subConfig
...@@ -231,8 +222,8 @@ defaultConfig = ...@@ -231,8 +222,8 @@ defaultConfig =
, corpusParser = Csv 100000 , corpusParser = Csv 100000
, listParser = V4 , listParser = V4
, phyloName = pack "Phylo Name" , phyloName = pack "Phylo Name"
, phyloLevel = 2 , phyloScale = 2
, phyloProximity = WeightedLogJaccard 0.5 , phyloProximity = WeightedLogJaccard 0.5 1
, seaElevation = Constante 0.1 0.1 , seaElevation = Constante 0.1 0.1
, findAncestors = False , findAncestors = False
, phyloSynchrony = ByProximityThreshold 0.5 0 AllBranches MergeAllGroups , phyloSynchrony = ByProximityThreshold 0.5 0 AllBranches MergeAllGroups
...@@ -269,11 +260,11 @@ instance ToJSON SeaElevation ...@@ -269,11 +260,11 @@ instance ToJSON SeaElevation
instance FromJSON TimeUnit instance FromJSON TimeUnit
instance ToJSON TimeUnit instance ToJSON TimeUnit
instance FromJSON CliqueFilter instance FromJSON MaxCliqueFilter
instance ToJSON CliqueFilter instance ToJSON MaxCliqueFilter
instance FromJSON Clique instance FromJSON Cluster
instance ToJSON Clique instance ToJSON Cluster
instance FromJSON PhyloLabel instance FromJSON PhyloLabel
instance ToJSON PhyloLabel instance ToJSON PhyloLabel
...@@ -316,8 +307,8 @@ instance ToSchema Software where ...@@ -316,8 +307,8 @@ instance ToSchema Software where
defaultSoftware :: Software defaultSoftware :: Software
defaultSoftware = defaultSoftware =
Software { _software_name = pack "Gargantext" Software { _software_name = pack "GarganText"
, _software_version = pack "v4" } , _software_version = pack "v5" }
-- | Global parameters of a Phylo -- | Global parameters of a Phylo
...@@ -334,7 +325,7 @@ instance ToSchema PhyloParam where ...@@ -334,7 +325,7 @@ instance ToSchema PhyloParam where
defaultPhyloParam :: PhyloParam defaultPhyloParam :: PhyloParam
defaultPhyloParam = defaultPhyloParam =
PhyloParam { _phyloParam_version = pack "v2.adaptative" PhyloParam { _phyloParam_version = pack "v3"
, _phyloParam_software = defaultSoftware , _phyloParam_software = defaultSoftware
, _phyloParam_config = defaultConfig } , _phyloParam_config = defaultConfig }
...@@ -346,6 +337,9 @@ defaultPhyloParam = ...@@ -346,6 +337,9 @@ defaultPhyloParam =
-- | Date : a simple Integer -- | Date : a simple Integer
type Date = Int type Date = Int
-- | DateStr : the string version of a Date
type DateStr = Text
-- | Ngrams : a contiguous sequence of n terms -- | Ngrams : a contiguous sequence of n terms
type Ngrams = Text type Ngrams = Text
...@@ -354,7 +348,7 @@ type Ngrams = Text ...@@ -354,7 +348,7 @@ type Ngrams = Text
-- Export Database to Document -- Export Database to Document
data Document = Document data Document = Document
{ date :: Date -- datatype Date {unDate :: Int} { date :: Date -- datatype Date {unDate :: Int}
, date' :: Text -- show date , date' :: DateStr -- show date
, text :: [Ngrams] , text :: [Ngrams]
, weight :: Maybe Double , weight :: Maybe Double
, sources :: [Text] , sources :: [Text]
...@@ -396,6 +390,12 @@ type Cooc = Map (Int,Int) Double ...@@ -396,6 +390,12 @@ type Cooc = Map (Int,Int) Double
-- | Phylomemy | -- -- | Phylomemy | --
------------------- -------------------
-- | Period : a tuple of Dates
type Period = (Date,Date)
-- | PeriodStr : a tuple of DateStr
type PeriodStr = (DateStr,DateStr)
-- | Phylo datatype of a phylomemy -- | Phylo datatype of a phylomemy
-- foundations : the foundations of the phylo -- foundations : the foundations of the phylo
...@@ -410,10 +410,10 @@ data Phylo = ...@@ -410,10 +410,10 @@ data Phylo =
, _phylo_timeDocs :: !(Map Date Double) , _phylo_timeDocs :: !(Map Date Double)
, _phylo_termFreq :: !(Map Int Double) , _phylo_termFreq :: !(Map Int Double)
, _phylo_lastTermFreq :: !(Map Int Double) , _phylo_lastTermFreq :: !(Map Int Double)
, _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double) , _phylo_diaSimScan :: Set Double
, _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
, _phylo_param :: PhyloParam , _phylo_param :: PhyloParam
, _phylo_periods :: Map PhyloPeriodId PhyloPeriod , _phylo_periods :: Map Period PhyloPeriod
, _phylo_quality :: Double
} }
deriving (Generic, Show, Eq) deriving (Generic, Show, Eq)
...@@ -421,57 +421,56 @@ instance ToSchema Phylo where ...@@ -421,57 +421,56 @@ instance ToSchema Phylo where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
-- | PhyloPeriodId : the id of a given period ----------------
type PhyloPeriodId = (Date,Date) -- | Period | --
----------------
-- | PhyloPeriod : steps of a phylomemy on a temporal axis -- | PhyloPeriod : steps of a phylomemy on a temporal axis
-- id: tuple (start date, end date) of the temporal step of the phylomemy -- id: tuple (start date, end date) of the temporal step of the phylomemy
-- levels: levels of granularity -- scales: scales of synchronic description
data PhyloPeriod = data PhyloPeriod =
PhyloPeriod { _phylo_periodPeriod :: (Date,Date) PhyloPeriod { _phylo_periodPeriod :: Period
, _phylo_periodPeriod' :: (Text,Text) , _phylo_periodPeriodStr :: PeriodStr
, _phylo_periodLevels :: Map PhyloLevelId PhyloLevel , _phylo_periodScales :: Map PhyloScaleId PhyloScale
} deriving (Generic, Show, Eq) } deriving (Generic, Show, Eq)
instance ToSchema PhyloPeriod where instance ToSchema PhyloPeriod where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
---------------
-- | Scale | --
---------------
-- | Scale : a scale of synchronic description
type Scale = Int
-- | Level : a level of clustering -- | PhyloScaleId : the id of a scale of synchronic description
type Level = Int type PhyloScaleId = (Period,Scale)
-- | PhyloLevelId : the id of a level of clustering in a given period
type PhyloLevelId = (PhyloPeriodId,Level)
-- | PhyloLevel : levels of phylomemy on a synchronic axis -- | PhyloScale : sub-structure of the phylomemy in scale of synchronic description
-- Levels description: data PhyloScale =
-- Level 0: The foundations and the base of the phylo PhyloScale { _phylo_scalePeriod :: Period
-- Level 1: First level of clustering (the Fis) , _phylo_scalePeriodStr :: PeriodStr
-- Level [2..N]: Nth level of synchronic clustering (cluster of Fis) , _phylo_scaleScale :: Scale
data PhyloLevel = , _phylo_scaleGroups :: Map PhyloGroupId PhyloGroup
PhyloLevel { _phylo_levelPeriod :: (Date,Date)
, _phylo_levelPeriod' :: (Text,Text)
, _phylo_levelLevel :: Level
, _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
} }
deriving (Generic, Show, Eq) deriving (Generic, Show, Eq)
instance ToSchema PhyloLevel where instance ToSchema PhyloScale where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
type PhyloGroupId = (PhyloLevelId, Int) type PhyloGroupId = (PhyloScaleId, Int)
-- | BranchId : (a level, a sequence of branch index) -- | BranchId : (a scale, a sequence of branch index)
-- the sequence is a path of heritage from the most to the less specific branch -- the sequence is a path of heritage from the most to the less specific branch
type PhyloBranchId = (Level, [Int]) type PhyloBranchId = (Scale, [Int])
-- | PhyloGroup : group of ngrams at each level and period -- | PhyloGroup : group of ngrams at each scale and period
data PhyloGroup = data PhyloGroup =
PhyloGroup { _phylo_groupPeriod :: (Date,Date) PhyloGroup { _phylo_groupPeriod :: Period
, _phylo_groupPeriod' :: (Text,Text) , _phylo_groupPeriod' :: (Text,Text)
, _phylo_groupLevel :: Level , _phylo_groupScale :: Scale
, _phylo_groupIndex :: Int , _phylo_groupIndex :: Int
, _phylo_groupLabel :: Text , _phylo_groupLabel :: Text
, _phylo_groupSupport :: Support , _phylo_groupSupport :: Support
...@@ -481,8 +480,8 @@ data PhyloGroup = ...@@ -481,8 +480,8 @@ data PhyloGroup =
, _phylo_groupCooc :: !(Cooc) , _phylo_groupCooc :: !(Cooc)
, _phylo_groupBranchId :: PhyloBranchId , _phylo_groupBranchId :: PhyloBranchId
, _phylo_groupMeta :: Map Text [Double] , _phylo_groupMeta :: Map Text [Double]
, _phylo_groupLevelParents :: [Pointer] , _phylo_groupScaleParents :: [Pointer]
, _phylo_groupLevelChilds :: [Pointer] , _phylo_groupScaleChilds :: [Pointer]
, _phylo_groupPeriodParents :: [Pointer] , _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer] , _phylo_groupPeriodChilds :: [Pointer]
, _phylo_groupAncestors :: [Pointer] , _phylo_groupAncestors :: [Pointer]
...@@ -505,22 +504,23 @@ type Pointer = (PhyloGroupId, Weight) ...@@ -505,22 +504,23 @@ type Pointer = (PhyloGroupId, Weight)
type Pointer' = (PhyloGroupId, (Thr,Weight)) type Pointer' = (PhyloGroupId, (Thr,Weight))
data Filiation = ToParents | ToChilds | ToParentsMemory | ToChildsMemory deriving (Generic, Show) data Filiation = ToParents | ToChilds | ToParentsMemory | ToChildsMemory deriving (Generic, Show)
data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show) data PointerType = TemporalPointer | ScalePointer deriving (Generic, Show)
---------------------- --------------------------
-- | Phylo Clique | -- -- | Phylo Clustering | --
---------------------- --------------------------
-- | Support : Number of Documents where a Clique occurs -- | Support : Number of Documents where a Cluster occurs
type Support = Int type Support = Int
data PhyloClique = PhyloClique data Clustering = Clustering
{ _phyloClique_nodes :: [Int] { _clustering_roots :: [Int]
, _phyloClique_support :: Support , _clustering_support :: Support
, _phyloClique_period :: (Date,Date) , _clustering_period :: Period
, _phyloClique_weight :: Maybe Double -- additional materials for visualization
, _phyloClique_sources :: [Int] , _clustering_visWeighting :: Maybe Double
, _clustering_visFiltering :: [Int]
} deriving (Generic,NFData,Show,Eq) } deriving (Generic,NFData,Show,Eq)
---------------- ----------------
...@@ -595,14 +595,14 @@ makeLenses ''PhyloSubConfig ...@@ -595,14 +595,14 @@ makeLenses ''PhyloSubConfig
makeLenses ''Proximity makeLenses ''Proximity
makeLenses ''SeaElevation makeLenses ''SeaElevation
makeLenses ''Quality makeLenses ''Quality
makeLenses ''Clique makeLenses ''Cluster
makeLenses ''PhyloLabel makeLenses ''PhyloLabel
makeLenses ''TimeUnit makeLenses ''TimeUnit
makeLenses ''PhyloFoundations makeLenses ''PhyloFoundations
makeLenses ''PhyloClique makeLenses ''Clustering
makeLenses ''Phylo makeLenses ''Phylo
makeLenses ''PhyloPeriod makeLenses ''PhyloPeriod
makeLenses ''PhyloLevel makeLenses ''PhyloScale
makeLenses ''PhyloGroup makeLenses ''PhyloGroup
makeLenses ''PhyloParam makeLenses ''PhyloParam
makeLenses ''PhyloExport makeLenses ''PhyloExport
...@@ -624,8 +624,8 @@ instance ToJSON PhyloParam ...@@ -624,8 +624,8 @@ instance ToJSON PhyloParam
instance FromJSON PhyloPeriod instance FromJSON PhyloPeriod
instance ToJSON PhyloPeriod instance ToJSON PhyloPeriod
instance FromJSON PhyloLevel instance FromJSON PhyloScale
instance ToJSON PhyloLevel instance ToJSON PhyloScale
instance FromJSON Software instance FromJSON Software
instance ToJSON Software instance ToJSON Software
......
...@@ -27,7 +27,7 @@ import Gargantext.Core.Types (TODO(..)) ...@@ -27,7 +27,7 @@ import Gargantext.Core.Types (TODO(..))
import Gargantext.Core.Viz.LegacyPhylo import Gargantext.Core.Viz.LegacyPhylo
import Gargantext.Core.Viz.Phylo (defaultConfig) import Gargantext.Core.Viz.Phylo (defaultConfig)
import Gargantext.Core.Viz.Phylo.API.Tools import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Core.Viz.Phylo.Example (phyloExample) import Gargantext.Core.Viz.Phylo.Example (phyloCleopatre)
import Gargantext.Core.Viz.Phylo.Legacy.LegacyMain import Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
import Gargantext.Database.Admin.Types.Hyperdata import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node -- (PhyloId, ListId, CorpusId, UserId, NodeId(..)) import Gargantext.Database.Admin.Types.Node -- (PhyloId, ListId, CorpusId, UserId, NodeId(..))
...@@ -118,7 +118,8 @@ getPhylo phyloId lId _level _minSizeBranch = do ...@@ -118,7 +118,8 @@ getPhylo phyloId lId _level _minSizeBranch = do
getPhyloDataJson :: PhyloId -> GargNoServer Value getPhyloDataJson :: PhyloId -> GargNoServer Value
getPhyloDataJson phyloId = do getPhyloDataJson phyloId = do
phyloData <- fromMaybe phyloExample <$> getPhyloData phyloId maybePhyloData <- getPhyloData phyloId
let phyloData = fromMaybe phyloCleopatre maybePhyloData
phyloJson <- liftBase $ phylo2dot2json phyloData phyloJson <- liftBase $ phylo2dot2json phyloData
pure phyloJson pure phyloJson
......
...@@ -30,7 +30,7 @@ import Gargantext.Core.Types (Context) ...@@ -30,7 +30,7 @@ import Gargantext.Core.Types (Context)
import Gargantext.Core.Types.Main (ListType(MapTerm)) import Gargantext.Core.Types.Main (ListType(MapTerm))
import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo) import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo)
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile) import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep) import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} setConfig) import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} setConfig)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataPhylo(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataPhylo(..))
...@@ -87,9 +87,9 @@ phylo2dot2json phylo = do ...@@ -87,9 +87,9 @@ phylo2dot2json phylo = do
flowPhyloAPI :: PhyloConfig -> CorpusId -> GargNoServer Phylo flowPhyloAPI :: PhyloConfig -> CorpusId -> GargNoServer Phylo
flowPhyloAPI config cId = do flowPhyloAPI config cId = do
(mapList, corpus) <- corpusIdtoDocuments (timeUnit config) cId (mapList, corpus) <- corpusIdtoDocuments (timeUnit config) cId
temporalSeries <- pure $ toPhyloStep corpus mapList config phyloWithCliques <- pure $ toPhyloWithoutLink corpus mapList config
-- writePhylo phyloWithCliquesFile phyloWithCliques -- writePhylo phyloWithCliquesFile phyloWithCliques
pure $ toPhylo (setConfig config temporalSeries) pure $ toPhylo (setConfig config phyloWithCliques)
-------------------------------------------------------------------- --------------------------------------------------------------------
corpusIdtoDocuments :: TimeUnit -> CorpusId -> GargNoServer (TermList, [Document]) corpusIdtoDocuments :: TimeUnit -> CorpusId -> GargNoServer (TermList, [Document])
......
...@@ -27,9 +27,10 @@ import Gargantext.Core.Viz.Phylo.PhyloExport ...@@ -27,9 +27,10 @@ import Gargantext.Core.Viz.Phylo.PhyloExport
import Gargantext.Core.Viz.Phylo.PhyloMaker import Gargantext.Core.Viz.Phylo.PhyloMaker
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching) import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
import qualified Data.Set as Set
--------------------------------- ---------------------------------
-- | STEP 5 | -- Export the phylo -- | STEP 5 | -- Export the phylo
...@@ -39,49 +40,50 @@ phyloExport :: IO () ...@@ -39,49 +40,50 @@ phyloExport :: IO ()
phyloExport = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot" phyloDot phyloExport = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot" phyloDot
phyloDot :: DotGraph DotId phyloDot :: DotGraph DotId
phyloDot = toPhyloExport phyloExample phyloDot = toPhyloExport phyloCleopatre
-------------------------------------------------- --------------------------------------------------
-- | STEP 4 | -- Process the synchronic clustering -- | STEP 4 | -- Process the synchronic clustering
-------------------------------------------------- --------------------------------------------------
phyloExample :: Phylo phyloCleopatre :: Phylo
phyloExample = synchronicClustering $ toHorizon phylo1 phyloCleopatre = synchronicClustering $ toHorizon flatPhylo
----------------------------------------------- -----------------------------------------------
-- | STEP 3 | -- Build the Level 1 of the Phylo -- | STEP 3 | -- Build the Level 1 of the Phylo
----------------------------------------------- -----------------------------------------------
phylo1 :: Phylo flatPhylo :: Phylo
phylo1 = case (getSeaElevation phyloBase) of flatPhylo = case (getSeaElevation emptyPhylo) of
Constante s g -> constanteTemporalMatching s g Constante s g -> temporalMatching (constDiachronicLadder s g Set.empty)
$ toGroupsProxi 1 $ scanSimilarity 1
$ appendGroups cliqueToGroup 1 phyloClique phyloBase $ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
Adaptative s -> adaptativeTemporalMatching s Adaptative s -> temporalMatching (adaptDiachronicLadder s (emptyPhylo' ^. phylo_diaSimScan) Set.empty) emptyPhylo'
$ toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
emptyPhylo' :: Phylo
emptyPhylo' = scanSimilarity 1
$ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
--------------------------------------------- ---------------------------------------------
-- | STEP 2 | -- Build the cliques -- | STEP 2 | -- Build the cliques
--------------------------------------------- ---------------------------------------------
phyloClique :: Map (Date,Date) [PhyloClique] seriesOfClustering :: Map (Date,Date) [Clustering]
phyloClique = toPhyloClique phyloBase docsByPeriods seriesOfClustering = toSeriesOfClustering emptyPhylo docsByPeriods
docsByPeriods :: Map (Date,Date) [Document] docsByPeriods :: Map (Date,Date) [Document]
docsByPeriods = groupDocsByPeriod date periods docs docsByPeriods = groupDocsByPeriod date periods docs
-------------------------------------------- ---------------------------------
-- | STEP 1 | -- Init the Base of the Phylo -- | STEP 1 | -- Init the Phylo
-------------------------------------------- ---------------------------------
phyloBase :: Phylo emptyPhylo :: Phylo
phyloBase = toPhyloBase docs mapList config emptyPhylo = initPhylo docs mapList config
phyloCooc :: Map Date Cooc phyloCooc :: Map Date Cooc
...@@ -101,7 +103,8 @@ nbDocsByYear = docsToTimeScaleNb docs ...@@ -101,7 +103,8 @@ nbDocsByYear = docsToTimeScaleNb docs
config :: PhyloConfig config :: PhyloConfig
config = config =
defaultConfig { phyloName = "Cesar et Cleopatre" defaultConfig { phyloName = "Cesar et Cleopatre"
, phyloLevel = 2 , phyloScale = 2
, seaElevation = Adaptative 4
, exportFilter = [ByBranchSize 0] , exportFilter = [ByBranchSize 0]
, clique = MaxClique 0 15 ByNeighbours } , clique = MaxClique 0 15 ByNeighbours }
......
...@@ -26,7 +26,7 @@ import Debug.Trace (trace) ...@@ -26,7 +26,7 @@ import Debug.Trace (trace)
import Gargantext.Core.Viz.Phylo import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods) import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
import Gargantext.Prelude import Gargantext.Prelude hiding (scale)
import Prelude (writeFile) import Prelude (writeFile)
import System.FilePath import System.FilePath
import qualified Data.GraphViz.Attributes.HTML as H import qualified Data.GraphViz.Attributes.HTML as H
...@@ -73,7 +73,7 @@ groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show ...@@ -73,7 +73,7 @@ groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show
branchIdToDotId :: PhyloBranchId -> DotId branchIdToDotId :: PhyloBranchId -> DotId
branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId)) branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
periodIdToDotId :: PhyloPeriodId -> DotId periodIdToDotId :: Period -> DotId
periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd)) periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
...@@ -220,9 +220,10 @@ exportToDot phylo export = ...@@ -220,9 +220,10 @@ exportToDot phylo export =
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups)) ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
,(toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo)) ,(toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo))
,(toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo) ,(toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo)
,(toAttr (fromStrict "phyloLevel") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo)) ,(toAttr (fromStrict "PhyloScale") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
,(toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (_cons_start $ getSeaElevation phylo)) ,(toAttr (fromStrict "phyloQuality") $ pack $ show (phylo ^. phylo_quality))
,(toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (_cons_step $ getSeaElevation phylo)) ,(toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (getPhyloSeaRiseStart phylo))
,(toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (getPhyloSeaRiseSteps phylo))
-- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo)) -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
]) ])
...@@ -249,7 +250,7 @@ exportToDot phylo export = ...@@ -249,7 +250,7 @@ exportToDot phylo export =
_ <- mapM (\period -> _ <- mapM (\period ->
subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
graphAttrs [Rank SameRank] graphAttrs [Rank SameRank]
periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriod') periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriodStr)
{-- 6) create a node for each group -} {-- 6) create a node for each group -}
mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups) mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
...@@ -372,9 +373,9 @@ sortByBirthDate order export = ...@@ -372,9 +373,9 @@ sortByBirthDate order export =
processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
processSort sort' elev export = case sort' of processSort sort' elev export = case sort' of
ByBirthDate o -> sortByBirthDate o export ByBirthDate o -> sortByBirthDate o export
ByHierarchy _ -> export & export_branches .~ (branchToIso' (_cons_start elev) (_cons_step elev) ByHierarchy _ -> case elev of
$ sortByHierarchy 0 (export ^. export_branches)) Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches))
Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
----------------- -----------------
-- | Metrics | -- -- | Metrics | --
...@@ -545,9 +546,10 @@ processLabels labels foundations freq export = ...@@ -545,9 +546,10 @@ processLabels labels foundations freq export =
-- | Dynamics | -- -- | Dynamics | --
------------------ ------------------
-- utiliser & creer une Map FdtId [PhyloGroup]
toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double -- n = index of the current term
toDynamics n parents g m = toDynamics :: FdtId -> [PhyloGroup] -> PhyloGroup -> Map FdtId (Date,Date) -> Double
toDynamics n elders g m =
let prd = g ^. phylo_groupPeriod let prd = g ^. phylo_groupPeriod
end = last' "dynamics" (sort $ map snd $ elems m) end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end)) in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
...@@ -563,18 +565,18 @@ toDynamics n parents g m = ...@@ -563,18 +565,18 @@ toDynamics n parents g m =
where where
-------------------------------------- --------------------------------------
isNew :: Bool isNew :: Bool
isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders
type FdtId = Int
processDynamics :: [PhyloGroup] -> [PhyloGroup] processDynamics :: [PhyloGroup] -> [PhyloGroup]
processDynamics groups = processDynamics groups =
map (\g -> map (\g ->
let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId) let elders = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
&& ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n elders g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
where where
-------------------------------------- --------------------------------------
mapNgrams :: Map Int (Date,Date) mapNgrams :: Map FdtId (Date,Date)
mapNgrams = map (\dates -> mapNgrams = map (\dates ->
let dates' = sort dates let dates' = sort dates
in (head' "dynamics" dates', last' "dynamics" dates')) in (head' "dynamics" dates', last' "dynamics" dates'))
...@@ -615,28 +617,28 @@ headsToAncestors nbDocs diago proximity step heads acc = ...@@ -615,28 +617,28 @@ headsToAncestors nbDocs diago proximity step heads acc =
toHorizon :: Phylo -> Phylo toHorizon :: Phylo -> Phylo
toHorizon phylo = toHorizon phylo =
let phyloAncestor = updatePhyloGroups let phyloAncestor = updatePhyloGroups
level scale
(fromList $ map (\g -> (getGroupId g, g)) (fromList $ map (\g -> (getGroupId g, g))
$ concat $ concat
$ tracePhyloAncestors newGroups) phylo $ tracePhyloAncestors newGroups) phylo
reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
$ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromScale scale phyloAncestor
in updatePhyloGroups level reBranched phylo in updatePhyloGroups scale reBranched phylo
where where
-- | 1) for each periods -- | 1) for each periods
periods :: [PhyloPeriodId] periods :: [Period]
periods = getPeriodIds phylo periods = getPeriodIds phylo
-- -- -- --
level :: Level scale :: Scale
level = getLastLevel phylo scale = getLastLevel phylo
-- -- -- --
frame :: Int frame :: Int
frame = getTimeFrame $ timeUnit $ getConfig phylo frame = getTimeFrame $ timeUnit $ getConfig phylo
-- | 2) find ancestors between groups without parents -- | 2) find ancestors between groups without parents
mapGroups :: [[PhyloGroup]] mapGroups :: [[PhyloGroup]]
mapGroups = map (\prd -> mapGroups = map (\prd ->
let groups = getGroupsFromLevelPeriods level [prd] phylo let groups = getGroupsFromScalePeriods scale [prd] phylo
childs = getPreviousChildIds level frame prd periods phylo childs = getPreviousChildIds scale frame prd periods phylo
-- maybe add a better filter for non isolated ancestors -- maybe add a better filter for non isolated ancestors
heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds)) heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
$ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
...@@ -646,7 +648,7 @@ toHorizon phylo = ...@@ -646,7 +648,7 @@ toHorizon phylo =
proximity = (phyloProximity $ getConfig phylo) proximity = (phyloProximity $ getConfig phylo)
step = case getSeaElevation phylo of step = case getSeaElevation phylo of
Constante _ s -> s Constante _ s -> s
Adaptative _ -> undefined Adaptative _ -> 0
-- in headsToAncestors nbDocs diago proximity heads groups [] -- in headsToAncestors nbDocs diago proximity heads groups []
in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego) in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
$ headsToAncestors nbDocs diago proximity step heads [] $ headsToAncestors nbDocs diago proximity step heads []
...@@ -656,10 +658,10 @@ toHorizon phylo = ...@@ -656,10 +658,10 @@ toHorizon phylo =
newGroups = mapGroups `using` parList rdeepseq newGroups = mapGroups `using` parList rdeepseq
-------------------------------------- --------------------------------------
getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId] getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
getPreviousChildIds lvl frame curr prds phylo = getPreviousChildIds lvl frame curr prds phylo =
concat $ map ((map fst) . _phylo_groupPeriodChilds) concat $ map ((map fst) . _phylo_groupPeriodChilds)
$ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo $ getGroupsFromScalePeriods lvl (getNextPeriods ToParents frame curr prds) phylo
--------------------- ---------------------
-- | phyloExport | -- -- | phyloExport | --
...@@ -694,10 +696,10 @@ toPhyloExport phylo = exportToDot phylo ...@@ -694,10 +696,10 @@ toPhyloExport phylo = exportToDot phylo
-------------------------------------- --------------------------------------
groups :: [PhyloGroup] groups :: [PhyloGroup]
groups = traceExportGroups groups = traceExportGroups
-- necessaire ?
$ processDynamics $ processDynamics
$ getGroupsFromLevel (phyloLevel $ getConfig phylo) $ getGroupsFromScale (phyloScale $ getConfig phylo)
$ tracePhyloInfo phylo $ tracePhyloInfo phylo
-- \$ toHorizon phylo
traceExportBranches :: [PhyloBranch] -> [PhyloBranch] traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
......
...@@ -15,9 +15,11 @@ import Control.Lens hiding (Level) ...@@ -15,9 +15,11 @@ import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail) import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert) import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert)
import Data.Set (Set)
import Data.Text (Text) import Data.Text (Text)
import Data.Vector (Vector) import Data.Vector (Vector)
import Debug.Trace (trace) import Debug.Trace (trace)
import Prelude (floor)
import Gargantext.Core.Methods.Distances (Distance(Conditional)) import Gargantext.Core.Methods.Distances (Distance(Conditional))
import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques) import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
...@@ -27,7 +29,7 @@ import Gargantext.Core.Viz.Phylo ...@@ -27,7 +29,7 @@ import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon) import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity) import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Set as Set import qualified Data.Set as Set
...@@ -40,7 +42,7 @@ import qualified Data.Vector as Vector ...@@ -40,7 +42,7 @@ import qualified Data.Vector as Vector
{- {-
-- TODO AD -- TODO AD
data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo} data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo}
| PhyloN { _phylo'_phylo1 :: Phylo} | PhyloN { _phylo'_flatPhylo :: Phylo}
toPhylo' :: Phylo' -> [Document] -> TermList -> PhyloConfig -> Phylo toPhylo' :: Phylo' -> [Document] -> TermList -> PhyloConfig -> Phylo
...@@ -48,42 +50,70 @@ toPhylo' (PhyloN phylo) = toPhylo' ...@@ -48,42 +50,70 @@ toPhylo' (PhyloN phylo) = toPhylo'
toPhylo' (PhyloBase phylo) = toPhylo toPhylo' (PhyloBase phylo) = toPhylo
-} -}
-- TODO an adaptative synchronic clustering with a slider
toPhylo :: Phylo -> Phylo toPhylo :: Phylo -> Phylo
toPhylo phyloStep = trace ("# phylo1 groups " <> show(length $ getGroupsFromLevel 1 phylo1)) toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGroupsFromScale 1 flatPhylo))
$ traceToPhylo (phyloLevel $ getConfig phyloStep) $ $ traceToPhylo (phyloScale $ getConfig phylowithoutLink) $
if (phyloLevel $ getConfig phyloStep) > 1 if (phyloScale $ getConfig phylowithoutLink) > 1
then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloLevel $ getConfig phyloStep)] then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloScale $ getConfig phylowithoutLink)]
else phylo1 else phyloAncestors
where where
-------------------------------------- --------------------------------------
phyloAncestors :: Phylo phyloAncestors :: Phylo
phyloAncestors = phyloAncestors =
if (findAncestors $ getConfig phyloStep) if (findAncestors $ getConfig phylowithoutLink)
then toHorizon phylo1 then toHorizon flatPhylo
else phylo1 else flatPhylo
-------------------------------------- --------------------------------------
phylo1 :: Phylo flatPhylo :: Phylo
phylo1 = toPhylo1 phyloStep flatPhylo = addTemporalLinksToPhylo phylowithoutLink
-------------------------------------- --------------------------------------
-------------------- -----------------------------
-- | To Phylo 1 | -- -- | Create a flat Phylo | --
-------------------- -----------------------------
{-
-- create an adaptative diachronic 'sea elevation' ladder
-}
adaptDiachronicLadder :: Double -> Set Double -> Set Double -> [Double]
adaptDiachronicLadder curr similarities ladder =
if curr <= 0 || Set.null similarities
then Set.toList ladder
else
let idx = ((Set.size similarities) `div` (floor curr)) - 1
thr = Set.elemAt idx similarities
-- we use a sliding methods 1/10, then 1/9, then ... 1/2
in adaptDiachronicLadder (curr -1) (Set.filter (> thr) similarities) (Set.insert thr ladder)
{-
-- create a constante diachronic 'sea elevation' ladder
-}
constDiachronicLadder :: Double -> Double -> Set Double -> [Double]
constDiachronicLadder curr step ladder =
if curr > 1
then Set.toList ladder
else constDiachronicLadder (curr + step) step (Set.insert curr ladder)
toGroupsProxi :: Level -> Phylo -> Phylo {-
toGroupsProxi lvl phylo = -- process an initial scanning of the kinship links
-}
scanSimilarity :: Scale -> Phylo -> Phylo
scanSimilarity lvl phylo =
let proximity = phyloProximity $ getConfig phylo let proximity = phyloProximity $ getConfig phylo
groupsProxi = foldlWithKey (\acc pId pds -> scanning = foldlWithKey (\acc pId pds ->
-- 1) process period by period -- 1) process period by period
let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams))
$ elems $ elems
$ view ( phylo_periodLevels $ view ( phylo_periodScales
. traverse . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl) . traverse . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
. phylo_levelGroups ) pds . phylo_scaleGroups ) pds
next = getNextPeriods ToParents (getTimeFrame $ timeUnit $ getConfig phylo) pId (keys $ phylo ^. phylo_periods) next = getNextPeriods ToParents (getTimeFrame $ timeUnit $ getConfig phylo) pId (keys $ phylo ^. phylo_periods)
targets = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ getGroupsFromLevelPeriods lvl next phylo targets = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ getGroupsFromScalePeriods lvl next phylo
docs = filterDocs (phylo ^. phylo_timeDocs) ([pId] ++ next) docs = filterDocs (phylo ^. phylo_timeDocs) ([pId] ++ next)
diagos = filterDiago (phylo ^. phylo_timeCooc) ([pId] ++ next) diagos = filterDiago (phylo ^. phylo_timeCooc) ([pId] ++ next)
-- 2) compute the pairs in parallel -- 2) compute the pairs in parallel
...@@ -97,22 +127,23 @@ toGroupsProxi lvl phylo = ...@@ -97,22 +127,23 @@ toGroupsProxi lvl phylo =
pairs' = pairs `using` parList rdeepseq pairs' = pairs `using` parList rdeepseq
in acc ++ (concat pairs') in acc ++ (concat pairs')
) [] $ phylo ^. phylo_periods ) [] $ phylo ^. phylo_periods
in phylo & phylo_groupsProxi .~ ((traceGroupsProxi . fromList) groupsProxi) in phylo & phylo_diaSimScan .~ Set.fromList (traceGroupsProxi $ map snd scanning)
appendGroups :: (a -> PhyloPeriodId -> (Text,Text) -> Level -> Int -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n") appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
$ over ( phylo_periods $ over ( phylo_periods
. traverse . traverse
. phylo_periodLevels . phylo_periodScales
. traverse) . traverse)
(\phyloLvl -> if lvl == (phyloLvl ^. phylo_levelLevel) (\phyloLvl -> if lvl == (phyloLvl ^. phylo_scaleScale)
then then
let pId = phyloLvl ^. phylo_levelPeriod let pId = phyloLvl ^. phylo_scalePeriod
pId' = phyloLvl ^. phylo_levelPeriod' pId' = phyloLvl ^. phylo_scalePeriodStr
phyloCUnit = m ! pId phyloCUnit = m ! pId
in phyloLvl in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj -> & phylo_scaleGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups) groups ++ [ (((pId,lvl),length groups)
, f obj pId pId' lvl (length groups) , f obj pId pId' lvl (length groups)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId])) (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
...@@ -122,22 +153,27 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co ...@@ -122,22 +153,27 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
phylo phylo
cliqueToGroup :: PhyloClique -> PhyloPeriodId -> (Text,Text) -> Level -> Int -> [Cooc] -> PhyloGroup clusterToGroup :: Clustering -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> PhyloGroup
cliqueToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx "" clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
(fis ^. phyloClique_support) (fis ^. clustering_support )
(fis ^. phyloClique_weight) (fis ^. clustering_visWeighting)
(fis ^. phyloClique_sources) (fis ^. clustering_visFiltering)
(fis ^. phyloClique_nodes) (fis ^. clustering_roots)
(ngramsToCooc (fis ^. phyloClique_nodes) coocs) (ngramsToCooc (fis ^. clustering_roots) coocs)
(1,[0]) -- branchid (lvl,[path in the branching tree]) (1,[0]) -- branchid (lvl,[path in the branching tree])
(fromList [("breaks",[0]),("seaLevels",[0])]) (fromList [("breaks",[0]),("seaLevels",[0])])
[] [] [] [] [] [] [] [] [] [] [] [] [] []
{-
toPhylo1 :: Phylo -> Phylo -- enhance the phylo with temporal links
toPhylo1 phyloStep = case (getSeaElevation phyloStep) of -}
Constante start gap -> constanteTemporalMatching start gap phyloStep addTemporalLinksToPhylo :: Phylo -> Phylo
Adaptative steps -> adaptativeTemporalMatching steps phyloStep addTemporalLinksToPhylo phylowithoutLink = case strategy of
Constante start gap -> temporalMatching (constDiachronicLadder start gap Set.empty) phylowithoutLink
Adaptative steps -> temporalMatching (adaptDiachronicLadder steps (phylowithoutLink ^. phylo_diaSimScan) Set.empty) phylowithoutLink
where
strategy :: SeaElevation
strategy = getSeaElevation phylowithoutLink
----------------------- -----------------------
-- | To Phylo Step | -- -- | To Phylo Step | --
...@@ -157,23 +193,24 @@ indexDates' m = map (\docs -> ...@@ -157,23 +193,24 @@ indexDates' m = map (\docs ->
-- To build the first phylo step from docs and terms -- To build the first phylo step from docs and terms
-- QL: backend entre phyloBase et phyloClique -- QL: backend entre phyloBase et Clustering
toPhyloStep :: [Document] -> TermList -> PhyloConfig -> Phylo -- tophylowithoutLink
toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of toPhyloWithoutLink :: [Document] -> TermList -> PhyloConfig -> Phylo
Constante _ _ -> appendGroups cliqueToGroup 1 phyloClique (updatePeriods (indexDates' docs') phyloBase) toPhyloWithoutLink docs lst conf = case (getSeaElevation phyloBase) of
Adaptative _ -> toGroupsProxi 1 Constante _ _ -> appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
$ appendGroups cliqueToGroup 1 phyloClique (updatePeriods (indexDates' docs') phyloBase) Adaptative _ -> scanSimilarity 1
$ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
where where
-------------------------------------- --------------------------------------
phyloClique :: Map (Date,Date) [PhyloClique] seriesOfClustering :: Map (Date,Date) [Clustering]
phyloClique = toPhyloClique phyloBase docs' seriesOfClustering = toSeriesOfClustering phyloBase docs'
-------------------------------------- --------------------------------------
docs' :: Map (Date,Date) [Document] docs' :: Map (Date,Date) [Document]
-- QL: Time Consuming here -- QL: Time Consuming here
docs' = groupDocsByPeriodRec date (getPeriodIds phyloBase) (sortOn date docs) empty docs' = groupDocsByPeriodRec date (getPeriodIds phyloBase) (sortOn date docs) empty
-------------------------------------- --------------------------------------
phyloBase :: Phylo phyloBase :: Phylo
phyloBase = toPhyloBase docs lst conf phyloBase = initPhylo docs lst conf
-------------------------------------- --------------------------------------
--------------------------- ---------------------------
...@@ -182,30 +219,30 @@ toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of ...@@ -182,30 +219,30 @@ toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of
-- To apply a filter with the possibility of keeping some periods non empty (keep : True|False) -- To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterClique :: Bool -> Int -> (Int -> [PhyloClique] -> [PhyloClique]) -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique] filterClique :: Bool -> Int -> (Int -> [Clustering] -> [Clustering]) -> Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
filterClique keep thr f m = case keep of filterClique keep thr f m = case keep of
False -> map (\l -> f thr l) m False -> map (\l -> f thr l) m
True -> map (\l -> keepFilled (f) thr l) m True -> map (\l -> keepFilled (f) thr l) m
-- To filter Fis with small Support -- To filter Fis with small Support
filterCliqueBySupport :: Int -> [PhyloClique] -> [PhyloClique] filterCliqueBySupport :: Int -> [Clustering] -> [Clustering]
filterCliqueBySupport thr l = filter (\clq -> (clq ^. phyloClique_support) >= thr) l filterCliqueBySupport thr l = filter (\clq -> (clq ^. clustering_support ) >= thr) l
-- To filter Fis with small Clique size -- To filter Fis with small Clique size
filterCliqueBySize :: Int -> [PhyloClique] -> [PhyloClique] filterCliqueBySize :: Int -> [Clustering] -> [Clustering]
filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. phyloClique_nodes) >= thr) l filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. clustering_roots) >= thr) l
-- To filter nested Fis -- To filter nested Fis
filterCliqueByNested :: Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique] filterCliqueByNested :: Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
filterCliqueByNested m = filterCliqueByNested m =
let clq = map (\l -> let clq = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (f' ^. phyloClique_nodes) (f ^. phyloClique_nodes)) mem) foldl (\mem f -> if (any (\f' -> isNested (f' ^. clustering_roots) (f ^. clustering_roots)) mem)
then mem then mem
else else
let fMax = filter (\f' -> not $ isNested (f ^. phyloClique_nodes) (f' ^. phyloClique_nodes)) mem let fMax = filter (\f' -> not $ isNested (f ^. clustering_roots) (f' ^. clustering_roots)) mem
in fMax ++ [f] ) [] l) in fMax ++ [f] ) [] l)
$ elems m $ elems m
clq' = clq `using` parList rdeepseq clq' = clq `using` parList rdeepseq
...@@ -213,8 +250,8 @@ filterCliqueByNested m = ...@@ -213,8 +250,8 @@ filterCliqueByNested m =
-- | To transform a time map of docs into a time map of Fis with some filters -- | To transform a time map of docs into a time map of Fis with some filters
toPhyloClique :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [PhyloClique] toSeriesOfClustering :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [Clustering]
toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
Fis s s' -> -- traceFis "Filtered Fis" Fis s s' -> -- traceFis "Filtered Fis"
filterCliqueByNested filterCliqueByNested
{- \$ traceFis "Filtered by clique size" -} {- \$ traceFis "Filtered by clique size" -}
...@@ -222,22 +259,22 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of ...@@ -222,22 +259,22 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
{- \$ traceFis "Filtered by support" -} {- \$ traceFis "Filtered by support" -}
$ filterClique True s (filterCliqueBySupport) $ filterClique True s (filterCliqueBySupport)
{- \$ traceFis "Unfiltered Fis" -} {- \$ traceFis "Unfiltered Fis" -}
phyloClique seriesOfClustering
MaxClique s _ _ -> filterClique True s (filterCliqueBySize) MaxClique s _ _ -> filterClique True s (filterCliqueBySize)
phyloClique seriesOfClustering
where where
-------------------------------------- --------------------------------------
phyloClique :: Map (Date,Date) [PhyloClique] seriesOfClustering :: Map (Date,Date) [Clustering]
phyloClique = case (clique $ getConfig phylo) of seriesOfClustering = case (clique $ getConfig phylo) of
Fis _ _ -> Fis _ _ ->
let fis = map (\(prd,docs) -> let fis = map (\(prd,docs) ->
case (corpusParser $ getConfig phylo) of case (corpusParser $ getConfig phylo) of
Csv' _ -> let lst = toList Csv' _ -> let lst = toList
$ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs) $ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs)
in (prd, map (\f -> PhyloClique (Set.toList $ fst f) ((fst . snd) f) prd ((fst . snd . snd) f) (((snd . snd . snd) f))) lst) in (prd, map (\f -> Clustering (Set.toList $ fst f) ((fst . snd) f) prd ((fst . snd . snd) f) (((snd . snd . snd) f))) lst)
_ -> let lst = toList _ -> let lst = toList
$ fisWithSizePolyMap (Segment 1 20) 1 (map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs) $ fisWithSizePolyMap (Segment 1 20) 1 (map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs)
in (prd, map (\f -> PhyloClique (Set.toList $ fst f) (snd f) prd (Just $ fromIntegral $ snd f) []) lst) in (prd, map (\f -> Clustering (Set.toList $ fst f) (snd f) prd (Just $ fromIntegral $ snd f) []) lst)
) )
$ toList phyloDocs $ toList phyloDocs
fis' = fis `using` parList rdeepseq fis' = fis `using` parList rdeepseq
...@@ -248,7 +285,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of ...@@ -248,7 +285,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
$ foldl sumCooc empty $ foldl sumCooc empty
$ map listToMatrix $ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs $ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> PhyloClique cl 0 prd Nothing []) $ getMaxCliques filterType Conditional thr cooc)) in (prd, map (\cl -> Clustering cl 0 prd Nothing []) $ getMaxCliques filterType Conditional thr cooc))
$ toList phyloDocs $ toList phyloDocs
mcl' = mcl `using` parList rdeepseq mcl' = mcl `using` parList rdeepseq
in fromList mcl' in fromList mcl'
...@@ -353,27 +390,28 @@ docsToTimeScaleNb docs = ...@@ -353,27 +390,28 @@ docsToTimeScaleNb docs =
$ unionWith (+) time docs' $ unionWith (+) time docs'
initPhyloLevels :: Int -> PhyloPeriodId -> Map PhyloLevelId PhyloLevel initPhyloScales :: Int -> Period -> Map PhyloScaleId PhyloScale
initPhyloLevels lvlMax pId = initPhyloScales lvlMax pId =
fromList $ map (\lvl -> ((pId,lvl),PhyloLevel pId ("","") lvl empty)) [1..lvlMax] fromList $ map (\lvl -> ((pId,lvl),PhyloScale pId ("","") lvl empty)) [1..lvlMax]
-- To init the basic elements of a Phylo -- Init the basic elements of a Phylo
toPhyloBase :: [Document] -> TermList -> PhyloConfig -> Phylo --
toPhyloBase docs lst conf = initPhylo :: [Document] -> TermList -> PhyloConfig -> Phylo
initPhylo docs lst conf =
let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst
docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs) docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs)
params = defaultPhyloParam { _phyloParam_config = conf } params = defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf) periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf)
in trace ("\n" <> "-- | Create PhyloBase out of " <> show(length docs) <> " docs \n") in trace ("\n" <> "-- | Init a phylo out of " <> show(length docs) <> " docs \n")
$ Phylo foundations $ Phylo foundations
docsSources docsSources
(docsToTimeScaleCooc docs (foundations ^. foundations_roots)) (docsToTimeScaleCooc docs (foundations ^. foundations_roots))
(docsToTimeScaleNb docs) (docsToTimeScaleNb docs)
(docsToTermFreq docs (foundations ^. foundations_roots)) (docsToTermFreq docs (foundations ^. foundations_roots))
(docsToLastTermFreq (getTimePeriod $ timeUnit conf) docs (foundations ^. foundations_roots)) (docsToLastTermFreq (getTimePeriod $ timeUnit conf) docs (foundations ^. foundations_roots))
empty Set.empty
empty
params params
(fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloLevels 1 prd))) periods) (fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloScales 1 prd))) periods)
0
...@@ -25,7 +25,6 @@ import Gargantext.Prelude ...@@ -25,7 +25,6 @@ import Gargantext.Prelude
import Prelude (floor,read) import Prelude (floor,read)
import Text.Printf import Text.Printf
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
...@@ -232,41 +231,41 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l) ...@@ -232,41 +231,41 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l)
else f thr l else f thr l
traceClique :: Map (Date, Date) [PhyloClique] -> String traceClique :: Map (Date, Date) [Clustering] -> String
traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6] traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where where
-------------------------------------- --------------------------------------
cliques :: [Double] cliques :: [Double]
cliques = sort $ map (fromIntegral . length . _phyloClique_nodes) $ concat $ elems mFis cliques = sort $ map (fromIntegral . length . _clustering_roots) $ concat $ elems mFis
-------------------------------------- --------------------------------------
traceSupport :: Map (Date, Date) [PhyloClique] -> String traceSupport :: Map (Date, Date) [Clustering] -> String
traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6] traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where where
-------------------------------------- --------------------------------------
supports :: [Double] supports :: [Double]
supports = sort $ map (fromIntegral . _phyloClique_support) $ concat $ elems mFis supports = sort $ map (fromIntegral . _clustering_support) $ concat $ elems mFis
-------------------------------------- --------------------------------------
traceFis :: [Char] -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique] traceFis :: [Char] -> Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n" traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
<> "Support : " <> (traceSupport mFis) <> "\n" <> "Support : " <> (traceSupport mFis) <> "\n"
<> "Nb Ngrams : " <> (traceClique mFis) <> "\n" ) mFis <> "Nb Ngrams : " <> (traceClique mFis) <> "\n" ) mFis
--------------- ----------------
-- | Clique| -- -- | Cluster| --
--------------- ----------------
getCliqueSupport :: Clique -> Int getCliqueSupport :: Cluster -> Int
getCliqueSupport unit = case unit of getCliqueSupport unit = case unit of
Fis s _ -> s Fis s _ -> s
MaxClique _ _ _ -> 0 MaxClique _ _ _ -> 0
getCliqueSize :: Clique -> Int getCliqueSize :: Cluster -> Int
getCliqueSize unit = case unit of getCliqueSize unit = case unit of
Fis _ s -> s Fis _ s -> s
MaxClique s _ _ -> s MaxClique s _ _ -> s
...@@ -316,9 +315,9 @@ ngramsToCooc ngrams coocs = ...@@ -316,9 +315,9 @@ ngramsToCooc ngrams coocs =
-------------------- --------------------
getGroupId :: PhyloGroup -> PhyloGroupId getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId g = ((g ^. phylo_groupPeriod, g ^. phylo_groupLevel), g ^. phylo_groupIndex) getGroupId g = ((g ^. phylo_groupPeriod, g ^. phylo_groupScale), g ^. phylo_groupIndex)
idToPrd :: PhyloGroupId -> PhyloPeriodId idToPrd :: PhyloGroupId -> Period
idToPrd id = (fst . fst) id idToPrd id = (fst . fst) id
groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup] groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup]
...@@ -335,16 +334,16 @@ getPeriodPointers fil g = ...@@ -335,16 +334,16 @@ getPeriodPointers fil g =
filterProximity :: Proximity -> Double -> Double -> Bool filterProximity :: Proximity -> Double -> Double -> Bool
filterProximity proximity thr local = filterProximity proximity thr local =
case proximity of case proximity of
WeightedLogJaccard _ -> local >= thr WeightedLogJaccard _ _ -> local >= thr
WeightedLogSim _ -> local >= thr WeightedLogSim _ _ -> local >= thr
Hamming _ -> undefined Hamming _ _ -> undefined
getProximityName :: Proximity -> String getProximityName :: Proximity -> String
getProximityName proximity = getProximityName proximity =
case proximity of case proximity of
WeightedLogJaccard _ -> "WLJaccard" WeightedLogJaccard _ _ -> "WLJaccard"
WeightedLogSim _ -> "WeightedLogSim" WeightedLogSim _ _ -> "WeightedLogSim"
Hamming _ -> "Hamming" Hamming _ _ -> "Hamming"
--------------- ---------------
-- | Phylo | -- -- | Phylo | --
...@@ -358,9 +357,9 @@ addPointers fil pty pointers g = ...@@ -358,9 +357,9 @@ addPointers fil pty pointers g =
ToParents -> g & phylo_groupPeriodParents .~ pointers ToParents -> g & phylo_groupPeriodParents .~ pointers
ToChildsMemory -> undefined ToChildsMemory -> undefined
ToParentsMemory -> undefined ToParentsMemory -> undefined
LevelPointer -> case fil of ScalePointer -> case fil of
ToChilds -> g & phylo_groupLevelChilds .~ pointers ToChilds -> g & phylo_groupScaleChilds .~ pointers
ToParents -> g & phylo_groupLevelParents .~ pointers ToParents -> g & phylo_groupScaleParents .~ pointers
ToChildsMemory -> undefined ToChildsMemory -> undefined
ToParentsMemory -> undefined ToParentsMemory -> undefined
...@@ -376,7 +375,7 @@ addMemoryPointers fil pty thr pointers g = ...@@ -376,7 +375,7 @@ addMemoryPointers fil pty thr pointers g =
ToParents -> undefined ToParents -> undefined
ToChildsMemory -> g & phylo_groupPeriodMemoryChilds .~ (concat [(g ^. phylo_groupPeriodMemoryChilds),(map (\pt -> toPointer' thr pt) pointers)]) ToChildsMemory -> g & phylo_groupPeriodMemoryChilds .~ (concat [(g ^. phylo_groupPeriodMemoryChilds),(map (\pt -> toPointer' thr pt) pointers)])
ToParentsMemory -> g & phylo_groupPeriodMemoryParents .~ (concat [(g ^. phylo_groupPeriodMemoryParents),(map (\pt -> toPointer' thr pt) pointers)]) ToParentsMemory -> g & phylo_groupPeriodMemoryParents .~ (concat [(g ^. phylo_groupPeriodMemoryParents),(map (\pt -> toPointer' thr pt) pointers)])
LevelPointer -> undefined ScalePointer -> undefined
getPeriodIds :: Phylo -> [(Date,Date)] getPeriodIds :: Phylo -> [(Date,Date)]
...@@ -385,22 +384,33 @@ getPeriodIds phylo = sortOn fst ...@@ -385,22 +384,33 @@ getPeriodIds phylo = sortOn fst
$ phylo ^. phylo_periods $ phylo ^. phylo_periods
getLevelParentId :: PhyloGroup -> PhyloGroupId getLevelParentId :: PhyloGroup -> PhyloGroupId
getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupLevelParents getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupScaleParents
getLastLevel :: Phylo -> Level getLastLevel :: Phylo -> Scale
getLastLevel phylo = last' "lastLevel" $ getLevels phylo getLastLevel phylo = last' "lastLevel" $ getScales phylo
getLevels :: Phylo -> [Level] getScales :: Phylo -> [Scale]
getLevels phylo = nub getScales phylo = nub
$ map snd $ map snd
$ keys $ view ( phylo_periods $ keys $ view ( phylo_periods
. traverse . traverse
. phylo_periodLevels ) phylo . phylo_periodScales ) phylo
getSeaElevation :: Phylo -> SeaElevation getSeaElevation :: Phylo -> SeaElevation
getSeaElevation phylo = seaElevation (getConfig phylo) getSeaElevation phylo = seaElevation (getConfig phylo)
getPhyloSeaRiseStart :: Phylo -> Double
getPhyloSeaRiseStart phylo = case (getSeaElevation phylo) of
Constante s _ -> s
Adaptative _ -> 0
getPhyloSeaRiseSteps :: Phylo -> Double
getPhyloSeaRiseSteps phylo = case (getSeaElevation phylo) of
Constante _ s -> s
Adaptative s -> s
getConfig :: Phylo -> PhyloConfig getConfig :: Phylo -> PhyloConfig
getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
...@@ -421,50 +431,52 @@ getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots ...@@ -421,50 +431,52 @@ getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
getSources :: Phylo -> Vector Text getSources :: Phylo -> Vector Text
getSources phylo = _sources (phylo ^. phylo_sources) getSources phylo = _sources (phylo ^. phylo_sources)
phyloToLastBranches :: Phylo -> [[PhyloGroup]]
phyloToLastBranches phylo = elems -- get the groups distributed by branches at the last scale
phyloLastScale :: Phylo -> [[PhyloGroup]]
phyloLastScale phylo = elems
$ fromListWith (++) $ fromListWith (++)
$ map (\g -> (g ^. phylo_groupBranchId, [g])) $ map (\g -> (g ^. phylo_groupBranchId, [g]))
$ getGroupsFromLevel (last' "byBranches" $ getLevels phylo) phylo $ getGroupsFromScale (last' "byBranches" $ getScales phylo) phylo
getGroupsFromLevel :: Level -> Phylo -> [PhyloGroup] getGroupsFromScale :: Scale -> Phylo -> [PhyloGroup]
getGroupsFromLevel lvl phylo = getGroupsFromScale lvl phylo =
elems $ view ( phylo_periods elems $ view ( phylo_periods
. traverse . traverse
. phylo_periodLevels . phylo_periodScales
. traverse . traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl) . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
. phylo_levelGroups ) phylo . phylo_scaleGroups ) phylo
getGroupsFromLevelPeriods :: Level -> [PhyloPeriodId] -> Phylo -> [PhyloGroup] getGroupsFromScalePeriods :: Scale -> [Period] -> Phylo -> [PhyloGroup]
getGroupsFromLevelPeriods lvl periods phylo = getGroupsFromScalePeriods lvl periods phylo =
elems $ view ( phylo_periods elems $ view ( phylo_periods
. traverse . traverse
. filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods) . filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
. phylo_periodLevels . phylo_periodScales
. traverse . traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl) . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
. phylo_levelGroups ) phylo . phylo_scaleGroups ) phylo
getGroupsFromPeriods :: Level -> Map PhyloPeriodId PhyloPeriod -> [PhyloGroup] getGroupsFromPeriods :: Scale -> Map Period PhyloPeriod -> [PhyloGroup]
getGroupsFromPeriods lvl periods = getGroupsFromPeriods lvl periods =
elems $ view ( traverse elems $ view ( traverse
. phylo_periodLevels . phylo_periodScales
. traverse . traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl) . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
. phylo_levelGroups ) periods . phylo_scaleGroups ) periods
updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo updatePhyloGroups :: Scale -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
updatePhyloGroups lvl m phylo = updatePhyloGroups lvl m phylo =
over ( phylo_periods over ( phylo_periods
. traverse . traverse
. phylo_periodLevels . phylo_periodScales
. traverse . traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl) . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
. phylo_levelGroups . phylo_scaleGroups
. traverse . traverse
) (\g -> ) (\g ->
let id = getGroupId g let id = getGroupId g
...@@ -478,17 +490,20 @@ updatePeriods periods' phylo = ...@@ -478,17 +490,20 @@ updatePeriods periods' phylo =
over (phylo_periods . traverse) over (phylo_periods . traverse)
(\prd -> (\prd ->
let prd' = periods' ! (prd ^. phylo_periodPeriod) let prd' = periods' ! (prd ^. phylo_periodPeriod)
lvls = map (\lvl -> lvl & phylo_levelPeriod' .~ prd') $ prd ^. phylo_periodLevels lvls = map (\lvl -> lvl & phylo_scalePeriodStr .~ prd') $ prd ^. phylo_periodScales
in prd & phylo_periodPeriod' .~ prd' in prd & phylo_periodPeriodStr .~ prd'
& phylo_periodLevels .~ lvls & phylo_periodScales .~ lvls
) phylo ) phylo
updateQuality :: Double -> Phylo -> Phylo
updateQuality quality phylo = phylo { _phylo_quality = quality }
traceToPhylo :: Level -> Phylo -> Phylo traceToPhylo :: Scale -> Phylo -> Phylo
traceToPhylo lvl phylo = traceToPhylo lvl phylo =
trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with " trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with "
<> show (length $ getGroupsFromLevel lvl phylo) <> " groups and " <> show (length $ getGroupsFromScale lvl phylo) <> " groups and "
<> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel lvl phylo) <> " branches" <> "\n") phylo <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale lvl phylo) <> " branches" <> "\n") phylo
-------------------- --------------------
-- | Clustering | -- -- | Clustering | --
...@@ -517,8 +532,8 @@ mergeMeta bId groups = ...@@ -517,8 +532,8 @@ mergeMeta bId groups =
in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")] in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")]
groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]] groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches groups = groupsToBranches' groups =
{- run the related component algorithm -} {- run the related component algorithm -}
let egos = map (\g -> [getGroupId g] let egos = map (\g -> [getGroupId g]
++ (map fst $ g ^. phylo_groupPeriodParents) ++ (map fst $ g ^. phylo_groupPeriodParents)
...@@ -531,13 +546,15 @@ groupsToBranches groups = ...@@ -531,13 +546,15 @@ groupsToBranches groups =
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups' bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl,bId))) groups') graph in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl,bId))) groups') graph
relatedComponents :: Ord a => [[a]] -> [[a]] relatedComponents :: Ord a => [[a]] -> [[a]]
relatedComponents graph = foldl' (\acc groups -> relatedComponents graph = foldl' (\branches groups ->
if (null acc) if (null branches)
then acc ++ [groups] then branches ++ [groups]
else else
let acc' = partition (\groups' -> disjoint (Set.fromList groups') (Set.fromList groups)) acc let branchPart = partition (\branch -> disjoint (Set.fromList branch) (Set.fromList groups)) branches
in (fst acc') ++ [nub $ concat $ (snd acc') ++ [groups]]) [] graph in (fst branchPart) ++ [nub $ concat $ (snd branchPart) ++ [groups]]) [] graph
toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]] toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]]
toRelatedComponents nodes edges = toRelatedComponents nodes edges =
...@@ -549,15 +566,15 @@ toRelatedComponents nodes edges = ...@@ -549,15 +566,15 @@ toRelatedComponents nodes edges =
traceSynchronyEnd :: Phylo -> Phylo traceSynchronyEnd :: Phylo -> Phylo
traceSynchronyEnd phylo = traceSynchronyEnd phylo =
trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo) trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups" <> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches" <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo <> "\n" ) phylo
traceSynchronyStart :: Phylo -> Phylo traceSynchronyStart :: Phylo -> Phylo
traceSynchronyStart phylo = traceSynchronyStart phylo =
trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo) trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups" <> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches" <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo <> "\n" ) phylo
...@@ -567,9 +584,15 @@ traceSynchronyStart phylo = ...@@ -567,9 +584,15 @@ traceSynchronyStart phylo =
getSensibility :: Proximity -> Double getSensibility :: Proximity -> Double
getSensibility proxi = case proxi of getSensibility proxi = case proxi of
WeightedLogJaccard s -> s WeightedLogJaccard s _ -> s
WeightedLogSim s -> s WeightedLogSim s _ -> s
Hamming _ -> undefined Hamming _ _ -> undefined
getMinSharedNgrams :: Proximity -> Int
getMinSharedNgrams proxi = case proxi of
WeightedLogJaccard _ m -> m
WeightedLogSim _ m -> m
Hamming _ _ -> undefined
---------------- ----------------
-- | Branch | -- -- | Branch | --
...@@ -638,6 +661,6 @@ traceTemporalMatching groups = ...@@ -638,6 +661,6 @@ traceTemporalMatching groups =
trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups
traceGroupsProxi :: Map (PhyloGroupId,PhyloGroupId) Double -> Map (PhyloGroupId,PhyloGroupId) Double traceGroupsProxi :: [Double] -> [Double]
traceGroupsProxi m = traceGroupsProxi l =
trace ( "\n" <> "-- | " <> show(Map.size m) <> " computed pairs of groups proximity" <> "\n") m trace ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups proximity" <> "\n") l
...@@ -60,19 +60,19 @@ mergeGroups coocs id mapIds childs = ...@@ -60,19 +60,19 @@ mergeGroups coocs id mapIds childs =
mergeAncestors :: [Pointer] -> [Pointer] mergeAncestors :: [Pointer] -> [Pointer]
mergeAncestors pointers = Map.toList $ fromListWith max pointers mergeAncestors pointers = Map.toList $ fromListWith max pointers
addPhyloLevel :: Level -> Phylo -> Phylo addPhyloScale :: Scale -> Phylo -> Phylo
addPhyloLevel lvl phylo = addPhyloScale lvl phylo =
over ( phylo_periods . traverse ) over ( phylo_periods . traverse )
(\phyloPrd -> phyloPrd & phylo_periodLevels (\phyloPrd -> phyloPrd & phylo_periodScales
%~ (insert (phyloPrd ^. phylo_periodPeriod, lvl) %~ (insert (phyloPrd ^. phylo_periodPeriod, lvl)
(PhyloLevel (phyloPrd ^. phylo_periodPeriod) (phyloPrd ^. phylo_periodPeriod') lvl empty))) phylo (PhyloScale (phyloPrd ^. phylo_periodPeriod) (phyloPrd ^. phylo_periodPeriodStr) lvl empty))) phylo
toNextLevel' :: Phylo -> [PhyloGroup] -> Phylo toNextScale :: Phylo -> [PhyloGroup] -> Phylo
toNextLevel' phylo groups = toNextScale phylo groups =
let curLvl = getLastLevel phylo let curLvl = getLastLevel phylo
oldGroups = fromList $ map (\g -> (getGroupId g, getLevelParentId g)) groups oldGroups = fromList $ map (\g -> (getGroupId g, getLevelParentId g)) groups
newGroups = concat $ groupsToBranches newGroups = concat $ groupsToBranches'
$ fromList $ map (\g -> (getGroupId g, g)) $ fromList $ map (\g -> (getGroupId g, g))
$ foldlWithKey (\acc id groups' -> $ foldlWithKey (\acc id groups' ->
-- 4) create the parent group -- 4) create the parent group
...@@ -83,17 +83,17 @@ toNextLevel' phylo groups = ...@@ -83,17 +83,17 @@ toNextLevel' phylo groups =
newPeriods = fromListWith (++) $ map (\g -> (g ^. phylo_groupPeriod, [g])) newGroups newPeriods = fromListWith (++) $ map (\g -> (g ^. phylo_groupPeriod, [g])) newGroups
in traceSynchronyEnd in traceSynchronyEnd
$ over ( phylo_periods . traverse . phylo_periodLevels . traverse $ over ( phylo_periods . traverse . phylo_periodScales . traverse
-- 6) update each period at curLvl + 1 -- 6) update each period at curLvl + 1
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == (curLvl + 1))) . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == (curLvl + 1)))
-- 7) by adding the parents -- 7) by adding the parents
(\phyloLvl -> (\phyloLvl ->
if member (phyloLvl ^. phylo_levelPeriod) newPeriods if member (phyloLvl ^. phylo_scalePeriod) newPeriods
then phyloLvl & phylo_levelGroups then phyloLvl & phylo_scaleGroups
.~ fromList (map (\g -> (getGroupId g, g)) $ newPeriods ! (phyloLvl ^. phylo_levelPeriod)) .~ fromList (map (\g -> (getGroupId g, g)) $ newPeriods ! (phyloLvl ^. phylo_scalePeriod))
else phyloLvl) else phyloLvl)
-- 2) add the curLvl + 1 phyloLevel to the phylo -- 2) add the curLvl + 1 PhyloScale to the phylo
$ addPhyloLevel (curLvl + 1) $ addPhyloScale (curLvl + 1)
-- 1) update the current groups (with level parent pointers) in the phylo -- 1) update the current groups (with level parent pointers) in the phylo
$ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo $ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo
...@@ -140,17 +140,17 @@ groupsToEdges prox sync nbDocs diago groups = ...@@ -140,17 +140,17 @@ groupsToEdges prox sync nbDocs diago groups =
toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)] toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)]
toEdges sens edges = toEdges sens edges =
case prox of case prox of
WeightedLogJaccard _ -> map (\(g,g') -> WeightedLogJaccard _ _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard' (sens) nbDocs diago ((g,g'), weightedLogJaccard' (sens) nbDocs diago
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
WeightedLogSim _ -> map (\(g,g') -> WeightedLogSim _ _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard' (1 / sens) nbDocs diago ((g,g'), weightedLogJaccard' (1 / sens) nbDocs diago
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
_ -> undefined _ -> undefined
toParentId :: PhyloGroup -> PhyloGroupId toParentId :: PhyloGroup -> PhyloGroupId
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex) toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupScale + 1), child ^. phylo_groupIndex)
reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup] reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
...@@ -159,6 +159,7 @@ reduceGroups prox sync docs diagos branch = ...@@ -159,6 +159,7 @@ reduceGroups prox sync docs diagos branch =
let periods = fromListWith (++) let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch $ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems) in (concat . concat . elems)
-- TODO : ajouter un parallelisme
$ mapWithKey (\prd groups -> $ mapWithKey (\prd groups ->
-- 2) for each period, transform the groups as a proximity graph filtered by a threshold -- 2) for each period, transform the groups as a proximity graph filtered by a threshold
let diago = reduceDiagos $ filterDiago diagos [prd] let diago = reduceDiagos $ filterDiago diagos [prd]
...@@ -166,17 +167,17 @@ reduceGroups prox sync docs diagos branch = ...@@ -166,17 +167,17 @@ reduceGroups prox sync docs diagos branch =
in map (\comp -> in map (\comp ->
-- 4) add to each groups their futur level parent group -- 4) add to each groups their futur level parent group
let parentId = toParentId (head' "parentId" comp) let parentId = toParentId (head' "parentId" comp)
in map (\g -> g & phylo_groupLevelParents %~ (++ [(parentId,1)]) ) comp ) in map (\g -> g & phylo_groupScaleParents %~ (++ [(parentId,1)]) ) comp )
-- 3) reduce the graph a a set of related components -- 3) reduce the graph a a set of related components
$ toRelatedComponents groups edges) periods $ toRelatedComponents groups edges) periods
adjustClustering :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]] chooseClusteringStrategy :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]]
adjustClustering sync branches = case sync of chooseClusteringStrategy sync branches = case sync of
ByProximityThreshold _ _ scope _ -> case scope of ByProximityThreshold _ _ scope _ -> case scope of
SingleBranch -> branches SingleBranch -> branches
SiblingBranches -> groupBy (\g g' -> (last' "adjustClustering" $ (g ^. phylo_groupMeta) ! "breaks") SiblingBranches -> groupBy (\g g' -> (last' "chooseClusteringStrategy" $ (g ^. phylo_groupMeta) ! "breaks")
== (last' "adjustClustering" $ (g' ^. phylo_groupMeta) ! "breaks")) == (last' "chooseClusteringStrategy" $ (g' ^. phylo_groupMeta) ! "breaks"))
$ sortOn _phylo_groupBranchId $ concat branches $ sortOn _phylo_groupBranchId $ concat branches
AllBranches -> [concat branches] AllBranches -> [concat branches]
ByProximityDistribution _ _ -> branches ByProximityDistribution _ _ -> branches
...@@ -185,7 +186,7 @@ adjustClustering sync branches = case sync of ...@@ -185,7 +186,7 @@ adjustClustering sync branches = case sync of
levelUpAncestors :: [PhyloGroup] -> [PhyloGroup] levelUpAncestors :: [PhyloGroup] -> [PhyloGroup]
levelUpAncestors groups = levelUpAncestors groups =
-- 1) create an associative map of (old,new) ids -- 1) create an associative map of (old,new) ids
let ids' = fromList $ map (\g -> (getGroupId g, fst $ head' "levelUpAncestors" ( g ^. phylo_groupLevelParents))) groups let ids' = fromList $ map (\g -> (getGroupId g, fst $ head' "levelUpAncestors" ( g ^. phylo_groupScaleParents))) groups
in map (\g -> in map (\g ->
let id' = ids' ! (getGroupId g) let id' = ids' ! (getGroupId g)
ancestors = g ^. phylo_groupAncestors ancestors = g ^. phylo_groupAncestors
...@@ -202,11 +203,11 @@ synchronicClustering phylo = ...@@ -202,11 +203,11 @@ synchronicClustering phylo =
diagos = map coocToDiago $ phylo ^. phylo_timeCooc diagos = map coocToDiago $ phylo ^. phylo_timeCooc
newBranches = map (\branch -> reduceGroups prox sync docs diagos branch) newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
$ map processDynamics $ map processDynamics
$ adjustClustering sync $ chooseClusteringStrategy sync
$ phyloToLastBranches $ phyloLastScale
$ traceSynchronyStart phylo $ traceSynchronyStart phylo
newBranches' = newBranches `using` parList rdeepseq newBranches' = newBranches `using` parList rdeepseq
in toNextLevel' phylo $ levelUpAncestors $ concat newBranches' in toNextScale phylo $ levelUpAncestors $ concat newBranches'
-- synchronicDistance :: Phylo -> Level -> String -- synchronicDistance :: Phylo -> Level -> String
......
...@@ -6,46 +6,62 @@ License : AGPL + CECILL v3 ...@@ -6,46 +6,62 @@ License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
Reference : Chavalarias, D., Lobbé, Q. & Delanoë, A. Draw me Science. Scientometrics 127, 545–575 (2022). https://doi.org/10.1007/s11192-021-04186-5
-} -}
module Gargantext.Core.Viz.Phylo.TemporalMatching where module Gargantext.Core.Viz.Phylo.TemporalMatching where
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, nubBy, union, dropWhile, partition, or, sort, (!!)) import Data.Ord
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust) import Data.List (concat, splitAt, tail, sortOn, sortBy, (++), intersect, null, inits, groupBy, scanl, nub, nubBy, union, dropWhile, partition, or)
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), empty, mapKeys, adjust)
import Debug.Trace (trace) import Debug.Trace (trace)
import Gargantext.Core.Viz.Phylo import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Prelude import Gargantext.Prelude
import Prelude (floor,tan,pi) import Prelude (tan,pi)
import Text.Printf import Text.Printf
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
type Branch = [PhyloGroup]
type FinalQuality = Double
type LocalQuality = Double
type ShouldTry = Bool
------------------- ----------------------------
-- | Proximity | -- -- | Similarity Measure | --
------------------- ----------------------------
-- | To compute a jaccard similarity between two lists {-
-- compute a jaccard similarity between two lists
-}
jaccard :: [Int] -> [Int] -> Double jaccard :: [Int] -> [Int] -> Double
jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union') jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
-- | Process the inverse sumLog {-
-- process the inverse sumLog
-}
sumInvLog' :: Double -> Double -> [Double] -> Double sumInvLog' :: Double -> Double -> [Double] -> Double
sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + 1/ tan (s * pi / 2)) / log (nb + 1/ tan (s * pi / 2))))) 0 diago sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + 1/ tan (s * pi / 2)) / log (nb + 1/ tan (s * pi / 2))))) 0 diago
-- | Process the sumLog {-
-- process the sumLog
-}
sumLog' :: Double -> Double -> [Double] -> Double sumLog' :: Double -> Double -> [Double] -> Double
sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + 1/ tan (s * pi / 2)) / log (nb + 1/ tan (s * pi / 2)))) 0 diago sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + 1/ tan (s * pi / 2)) / log (nb + 1/ tan (s * pi / 2)))) 0 diago
{-
-- compute the weightedLogJaccard
-}
weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
weightedLogJaccard' sens nbDocs diago ngrams ngrams' weightedLogJaccard' sens nbDocs diago ngrams ngrams'
| null ngramsInter = 0 | null ngramsInter = 0
...@@ -68,8 +84,12 @@ weightedLogJaccard' sens nbDocs diago ngrams ngrams' ...@@ -68,8 +84,12 @@ weightedLogJaccard' sens nbDocs diago ngrams ngrams'
diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion) diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
-------------------------------------- --------------------------------------
-- | Process the weighted similarity between clusters. Adapted from Wang, X., Cheng, Q., Lu, W., 2014. Analyzing evolution of research topics with NEViewer: a new method based on dynamic co-word networks. Scientometrics 101, 1253–1271. https://doi.org/10.1007/s11192-014-1347-y (log added in the formula + pair comparison)
{-
-- compute the weightedLogSim
-- Adapted from Wang, X., Cheng, Q., Lu, W., 2014. Analyzing evolution of research topics with NEViewer: a new method based on dynamic co-word networks. Scientometrics 101, 1253–1271. https://doi.org/10.1007/s11192-014-1347-y (log added in the formula + pair comparison)
-- tests not conclusive -- tests not conclusive
-}
weightedLogSim' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double weightedLogSim' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
weightedLogSim' sens nbDocs diago ego_ngrams target_ngrams weightedLogSim' sens nbDocs diago ego_ngrams target_ngrams
| null ngramsInter = 0 | null ngramsInter = 0
...@@ -95,36 +115,39 @@ weightedLogSim' sens nbDocs diago ego_ngrams target_ngrams ...@@ -95,36 +115,39 @@ weightedLogSim' sens nbDocs diago ego_ngrams target_ngrams
diagoTarget = elems $ restrictKeys diago (Set.fromList target_ngrams) diagoTarget = elems $ restrictKeys diago (Set.fromList target_ngrams)
-------------------------------------- --------------------------------------
{-
-- perform a seamilarity measure between a given group and a pair of targeted groups
-}
toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
-- | To process the proximity between a current group and a pair of targets group using the adapted Wang et al. Similarity
toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' = toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
case proximity of case proximity of
WeightedLogJaccard sens -> WeightedLogJaccard sens _ ->
let pairNgrams = if targetNgrams == targetNgrams' let pairNgrams = if targetNgrams == targetNgrams'
then targetNgrams then targetNgrams
else union targetNgrams targetNgrams' else union targetNgrams targetNgrams'
in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams
WeightedLogSim sens -> WeightedLogSim sens _ ->
let pairNgrams = if targetNgrams == targetNgrams' let pairNgrams = if targetNgrams == targetNgrams'
then targetNgrams then targetNgrams
else union targetNgrams targetNgrams' else union targetNgrams targetNgrams'
in weightedLogSim' sens nbDocs diago egoNgrams pairNgrams in weightedLogSim' sens nbDocs diago egoNgrams pairNgrams
Hamming _ -> undefined Hamming _ _ -> undefined
------------------------
-- | Local Matching | --
------------------------
findLastPeriod :: Filiation -> [PhyloPeriodId] -> PhyloPeriodId -----------------------------
-- | Pointers & Matrices | --
-----------------------------
findLastPeriod :: Filiation -> [Period] -> Period
findLastPeriod fil periods = case fil of findLastPeriod fil periods = case fil of
ToParents -> head' "findLastPeriod" (sortOn fst periods) ToParents -> head' "findLastPeriod" (sortOn fst periods)
ToChilds -> last' "findLastPeriod" (sortOn fst periods) ToChilds -> last' "findLastPeriod" (sortOn fst periods)
ToChildsMemory -> undefined ToChildsMemory -> undefined
ToParentsMemory -> undefined ToParentsMemory -> undefined
removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> Period
-- | To filter pairs of candidates related to old pointers periods
removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> PhyloPeriodId
-> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))] -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
-> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))] -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
removeOldPointers oldPointers fil thr prox prd pairs removeOldPointers oldPointers fil thr prox prd pairs
...@@ -143,26 +166,6 @@ removeOldPointers oldPointers fil thr prox prd pairs ...@@ -143,26 +166,6 @@ removeOldPointers oldPointers fil thr prox prd pairs
|| (((fst . fst . fst) id') > (fst lastMatchedPrd))) pairs || (((fst . fst . fst) id') > (fst lastMatchedPrd))) pairs
| otherwise = [] | otherwise = []
makePairs' :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [PhyloPeriodId] -> [Pointer] -> Filiation -> Double -> Proximity
-> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
makePairs' (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos =
if (null periods)
then []
else removeOldPointers oldPointers fil thr prox lastPrd
{- at least on of the pair candidates should be from the last added period -}
$ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd))
$ listToKeys
$ filter (\(id,ngrams) ->
let nbDocs = (sum . elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id])
diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id])
in (toProximity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr
) candidates
where
lastPrd :: PhyloPeriodId
lastPrd = findLastPeriod fil periods
filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer] filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer]
filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts
...@@ -188,56 +191,121 @@ filterPointersByPeriod fil pts = ...@@ -188,56 +191,121 @@ filterPointersByPeriod fil pts =
ToChildsMemory -> undefined ToChildsMemory -> undefined
ToParentsMemory -> undefined ToParentsMemory -> undefined
filterDocs :: Map Date Double -> [Period] -> Map Date Double
filterDocs d pds = restrictKeys d $ periodsToYears pds
filterDiago :: Map Date Cooc -> [Period] -> Map Date Cooc
filterDiago diago pds = restrictKeys diago $ periodsToYears pds
---------------------------------
-- | Inter-temporal matching | --
---------------------------------
{-
-- perform the related component algorithm, construct the resulting branch id and update the corresponding group's branch id
-}
groupsToBranches :: Map PhyloGroupId PhyloGroup -> [Branch]
groupsToBranches groups =
{- run the related component algorithm -}
let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
$ sortOn (\gs -> fst $ fst $ head' "egos" gs)
$ map (\group -> [getGroupId group]
++ (map fst $ group ^. phylo_groupPeriodParents)
++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
-- first find the related components by inside each ego's period
-- a supprimer
graph' = map relatedComponents egos
-- then run it for the all the periods
branches = zip [1..]
$ relatedComponents $ concat (graph' `using` parList rdeepseq)
-- update each group's branch id
in map (\(bId,branch) ->
let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
$ elems $ restrictKeys groups (Set.fromList branch)
in groups' `using` parList rdeepseq
) branches `using` parList rdeepseq
{-
-- find the best pair/singleton of parents/childs for a given group
-}
makePairs :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [Period] -> [Pointer] -> Filiation -> Double -> Proximity
-> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos =
if (null periods)
then []
else removeOldPointers oldPointers fil thr prox lastPrd
{- at least on of the pair candidates should be from the last added period -}
$ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd))
$ filter (\((id,_),(id',_)) -> (elem id inPairs) || (elem id' inPairs))
$ listToCombi' candidates
where
--------------------------------------
inPairs :: [PhyloGroupId]
inPairs = map fst
$ filter (\(id,ngrams) ->
let nbDocs = (sum . elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id])
diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id])
in (toProximity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr
) candidates
--------------------------------------
lastPrd :: Period
lastPrd = findLastPeriod fil periods
--------------------------------------
{-
-- find the best temporal links between a given group and its parents/childs
-}
phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
-> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer] -> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
phyloGroupMatching candidates fil proxi docs diagos thr oldPointers (id,ngrams) = phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ngrams) =
if (null $ filterPointers proxi thr oldPointers) if (null $ filterPointers proxi thr oldPointers)
{- let's find new pointers -} -- if no previous pointers satisfy the current threshold then let's find new pointers
then if null nextPointers then if null nextPointers
then [] then []
else filterPointersByPeriod fil else filterPointersByPeriod filiation
-- 2) keep only the best set of pointers grouped by proximity
$ head' "phyloGroupMatching" $ head' "phyloGroupMatching"
-- Keep only the best set of pointers grouped by proximity
$ groupBy (\pt pt' -> (snd . fst) pt == (snd . fst) pt') $ groupBy (\pt pt' -> (snd . fst) pt == (snd . fst) pt')
$ reverse $ sortOn (snd . fst) $ head' "pointers" nextPointers -- 1) find the first time frame where at leats one pointer satisfies the proximity threshold
-- Find the first time frame where at leats one pointer satisfies the proximity threshold $ sortBy (comparing (Down . snd . fst)) $ head' "pointers" nextPointers
else oldPointers else oldPointers
where where
nextPointers :: [[(Pointer,[Int])]] nextPointers :: [[(Pointer,[Int])]]
nextPointers = take 1 nextPointers = take 1
$ dropWhile null -- stop as soon as we find a time frame where at least one singleton / pair satisfies the threshold
{- for each time frame, process the proximity on relevant pairs of targeted groups -} $ dropWhile (null)
$ scanl (\acc groups -> -- for each time frame, process the proximity on relevant pairs of targeted groups
let periods = nub $ map (fst . fst . fst) $ concat groups $ scanl (\acc targets ->
let periods = nub $ map (fst . fst . fst) targets
lastPrd = findLastPeriod filiation periods
nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods)) nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods))
diago = reduceDiagos diago = reduceDiagos
$ filterDiago diagos ([(fst . fst) id] ++ periods) $ filterDiago diagos ([(fst . fst) id] ++ periods)
{- important resize nbdocs et diago dans le make pairs -} singletons = processProximity nbdocs diago $ map (\g -> (g,g)) $ filter (\g -> (fst . fst . fst) g == lastPrd) targets
pairs = makePairs' (id,ngrams) (concat groups) periods oldPointers fil thr proxi docs diagos pairs = makePairs (id,ngrams) targets periods oldPointers filiation thr proxi docs diagos
in acc ++ ( filterPointers' proxi thr in
if (null singletons)
then acc ++ ( processProximity nbdocs diago pairs )
else acc ++ singletons
) [] $ map concat $ inits candidates -- groups from [[1900],[1900,1901],[1900,1901,1902],...]
-----------------------------
processProximity :: Double -> Map Int Double -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))] -> [(Pointer,[Int])]
processProximity nbdocs diago targets = filterPointers' proxi thr
$ concat $ concat
$ map (\(c,c') -> $ map (\(c,c') ->
{- process the proximity between the current group and a pair of candidates -}
let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c') let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c')
in if ((c == c') || (snd c == snd c')) in if ((c == c') || (snd c == snd c'))
then [((fst c,proximity),snd c)] then [((fst c,proximity),snd c)]
else [((fst c,proximity),snd c),((fst c',proximity),snd c')] ) pairs )) [] else [((fst c,proximity),snd c),((fst c',proximity),snd c')] ) targets
$ inits candidates -- groups from [[1900],[1900,1901],[1900,1901,1902],...]
filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
filterDocs d pds = restrictKeys d $ periodsToYears pds
filterDiago :: Map Date Cooc -> [PhyloPeriodId] -> Map Date Cooc
filterDiago diago pds = restrictKeys diago $ periodsToYears pds
-----------------------------
-- | Matching Processing | --
-----------------------------
getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId] {-
-- get the upstream/downstream timescale of a given period
-}
getNextPeriods :: Filiation -> Int -> Period -> [Period] -> [Period]
getNextPeriods fil max' pId pIds = getNextPeriods fil max' pId pIds =
case fil of case fil of
ToChilds -> take max' $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds ToChilds -> take max' $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds
...@@ -246,17 +314,23 @@ getNextPeriods fil max' pId pIds = ...@@ -246,17 +314,23 @@ getNextPeriods fil max' pId pIds =
ToParentsMemory -> undefined ToParentsMemory -> undefined
getCandidates :: PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]] {-
getCandidates ego targets = -- find all the candidates parents/childs of ego
-}
getCandidates :: Int -> PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]]
getCandidates minNgrams ego targets =
if (length (ego ^. phylo_groupNgrams)) > 1 if (length (ego ^. phylo_groupNgrams)) > 1
then then
map (\groups' -> filter (\g' -> (> 1) $ length $ intersect (ego ^. phylo_groupNgrams) (snd g')) groups') targets map (\groups' -> filter (\g' -> (> minNgrams) $ length $ intersect (ego ^. phylo_groupNgrams) (snd g')) groups') targets
else else
map (\groups' -> filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')) groups') targets map (\groups' -> filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')) groups') targets
matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup] {-
matchGroupsToGroups frame periods proximity thr docs coocs groups = -- set up and start performing the upstream/downstream inter‐temporal matching period by period
-}
reconstructTemporalLinks :: Int -> [Period] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
reconstructTemporalLinks frame periods proximity thr docs coocs groups =
let groups' = groupByField _phylo_groupPeriod groups let groups' = groupByField _phylo_groupPeriod groups
in foldl' (\acc prd -> in foldl' (\acc prd ->
let -- 1) find the parents/childs matching periods let -- 1) find the parents/childs matching periods
...@@ -265,7 +339,7 @@ matchGroupsToGroups frame periods proximity thr docs coocs groups = ...@@ -265,7 +339,7 @@ matchGroupsToGroups frame periods proximity thr docs coocs groups =
-- 2) find the parents/childs matching candidates -- 2) find the parents/childs matching candidates
candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar
candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi
-- 3) find the parents/child number of docs by years -- 3) find the parents/childs number of docs by years
docsPar = filterDocs docs ([prd] ++ periodsPar) docsPar = filterDocs docs ([prd] ++ periodsPar)
docsChi = filterDocs docs ([prd] ++ periodsChi) docsChi = filterDocs docs ([prd] ++ periodsChi)
-- 4) find the parents/child diago by years -- 4) find the parents/child diago by years
...@@ -273,9 +347,9 @@ matchGroupsToGroups frame periods proximity thr docs coocs groups = ...@@ -273,9 +347,9 @@ matchGroupsToGroups frame periods proximity thr docs coocs groups =
diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar) diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
-- 5) match in parallel all the groups (egos) to their possible candidates -- 5) match in parallel all the groups (egos) to their possible candidates
egos = map (\ego -> egos = map (\ego ->
let pointersPar = phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar diagoPar let pointersPar = phyloGroupMatching (getCandidates (getMinSharedNgrams proximity) ego candidatesPar) ToParents proximity docsPar diagoPar
thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams) thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
pointersChi = phyloGroupMatching (getCandidates ego candidatesChi) ToChilds proximity docsChi diagoChi pointersChi = phyloGroupMatching (getCandidates (getMinSharedNgrams proximity) ego candidatesChi) ToChilds proximity docsChi diagoChi
thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams) thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams)
in addPointers ToChilds TemporalPointer pointersChi in addPointers ToChilds TemporalPointer pointersChi
$ addPointers ToParents TemporalPointer pointersPar $ addPointers ToParents TemporalPointer pointersPar
...@@ -287,27 +361,51 @@ matchGroupsToGroups frame periods proximity thr docs coocs groups = ...@@ -287,27 +361,51 @@ matchGroupsToGroups frame periods proximity thr docs coocs groups =
) [] periods ) [] periods
----------------------- {-
-- | Phylo Quality | -- -- reconstruct a phylomemetic network from a list of groups and from a given threshold
----------------------- -}
toPhylomemeticNetwork :: Int -> [Period] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [Branch]
toPhylomemeticNetwork timescale periods similarity thr docs coocs groups =
groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ reconstructTemporalLinks timescale periods similarity thr docs coocs groups
----------------------------
-- | Quality Assessment | --
----------------------------
relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]] {-
relevantBranches term branches = -- filter the branches containing x
filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches -}
relevantBranches :: Int -> [Branch] -> [Branch]
relevantBranches x branches =
filter (\groups -> (any (\group -> elem x $ group ^. phylo_groupNgrams) groups)) branches
accuracy :: Int -> [(Date,Date)] -> [PhyloGroup] -> Double {-
-- The accuracy of a branch relatively to a term x is computed only over the periods there exist some cluster mentionning x in the phylomemy -- compute the accuracy ξ
accuracy x periods bk = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk') -- the accuracy of a branch relatively to a root x is computed only over the periods where clusters mentionning x in the phylo do exist
/ (fromIntegral $ length bk')) -}
accuracy :: Int -> [(Date,Date)] -> Branch -> Double
accuracy x periods bk = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk') / (fromIntegral $ length bk'))
where where
---
bk' :: [PhyloGroup] bk' :: [PhyloGroup]
bk' = filter (\g -> elem (g ^. phylo_groupPeriod) periods) bk bk' = filter (\g -> elem (g ^. phylo_groupPeriod) periods) bk
recall :: Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
{-
-- compute the recall ρ
-}
recall :: Int -> Branch -> [Branch] -> Double
recall x bk bx = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk) recall x bk bx = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk)
/ (fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx)) / (fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx))
{-
-- compute the F-score function
-}
fScore :: Double -> Int -> [(Date,Date)] -> [PhyloGroup] -> [[PhyloGroup]] -> Double fScore :: Double -> Int -> [(Date,Date)] -> [PhyloGroup] -> [[PhyloGroup]] -> Double
fScore lambda x periods bk bx = fScore lambda x periods bk bx =
let rec = recall x bk bx let rec = recall x bk bx
...@@ -316,23 +414,18 @@ fScore lambda x periods bk bx = ...@@ -316,23 +414,18 @@ fScore lambda x periods bk bx =
/ (((lambda ** 2) * acc + rec)) / (((lambda ** 2) * acc + rec))
{-
-- compute the number of groups
-}
wk :: [PhyloGroup] -> Double wk :: [PhyloGroup] -> Double
wk bk = fromIntegral $ length bk wk bk = fromIntegral $ length bk
toPhyloQuality' :: Double -> Map Int Double -> [[PhyloGroup]] -> Double {-
toPhyloQuality' lambda freq branches = -- compute the recall ρ for all the branches
if (null branches) -}
then 0 globalRecall :: Map Int Double -> [Branch] -> Double
else sum globalRecall freq branches =
$ map (\i ->
let bks = relevantBranches i branches
periods = nub $ map _phylo_groupPeriod $ filter (\g -> elem i $ g ^. phylo_groupNgrams) $ concat bks
in (freq ! i) * (sum $ map (\bk -> ((wk bk) / (sum $ map wk bks)) * (fScore lambda i periods bk bks)) bks))
$ keys freq
toRecall :: Map Int Double -> [[PhyloGroup]] -> Double
toRecall freq branches =
if (null branches) if (null branches)
then 0 then 0
else sum else sum
...@@ -347,8 +440,11 @@ toRecall freq branches = ...@@ -347,8 +440,11 @@ toRecall freq branches =
pys = sum (elems freq) pys = sum (elems freq)
toAccuracy :: Map Int Double -> [[PhyloGroup]] -> Double {-
toAccuracy freq branches = -- compute the accuracy ξ for all the branches
-}
globalAccuracy :: Map Int Double -> [Branch] -> Double
globalAccuracy freq branches =
if (null branches) if (null branches)
then 0 then 0
else sum else sum
...@@ -365,7 +461,9 @@ toAccuracy freq branches = ...@@ -365,7 +461,9 @@ toAccuracy freq branches =
pys = sum (elems freq) pys = sum (elems freq)
-- | here we do the average of all the local f_scores {-
-- compute the quality score F(λ)
-}
toPhyloQuality :: Double -> Double -> Map Int Double -> [[PhyloGroup]] -> Double toPhyloQuality :: Double -> Double -> Map Int Double -> [[PhyloGroup]] -> Double
toPhyloQuality fdt lambda freq branches = toPhyloQuality fdt lambda freq branches =
if (null branches) if (null branches)
...@@ -385,304 +483,166 @@ toPhyloQuality fdt lambda freq branches = ...@@ -385,304 +483,166 @@ toPhyloQuality fdt lambda freq branches =
-- pys :: Double -- pys :: Double
-- pys = sum (elems freq) -- pys = sum (elems freq)
-- 1 / nb de foundation
------------------------------------ -------------------------
-- | Constant Temporal Matching | -- -- | Sea-level Rise | --
------------------------------------ -------------------------
groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]] {-
groupsToBranches' groups = -- attach a rise value to branches & groups metadata
{- run the related component algorithm -} -}
let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs')) riseToMeta :: Double -> [Branch] -> [Branch]
$ sortOn (\gs -> fst $ fst $ head' "egos" gs) riseToMeta rise branches =
$ map (\group -> [getGroupId group] let break = length branches > 1
++ (map fst $ group ^. phylo_groupPeriodParents) in map (\b ->
++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups map (\g ->
-- first find the related components by inside each ego's period if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [rise]) "breaks"(g ^. phylo_groupMeta))
-- a supprimer else g) b) branches
graph' = map relatedComponents egos
-- then run it for the all the periods
graph = zip [1..]
$ relatedComponents $ concat (graph' `using` parList rdeepseq)
-- update each group's branch id
in map (\(bId,ids) ->
let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
$ elems $ restrictKeys groups (Set.fromList ids)
in groups' `using` parList rdeepseq ) graph
reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double {-
reduceFrequency frequency branches = -- attach a thr value to branches & groups metadata
restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches) -}
thrToMeta :: Double -> [Branch] -> [Branch]
thrToMeta thr branches =
map (\b ->
map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]] {-
updateThr thr branches = map (\b -> map (\g -> -- TODO
g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches -- 1) try the zipper structure https://wiki.haskell.org/Zipper to performe the sea-level rise algorithme
-- 2) investigate how the branches order influences the 'separateBranches' function
-}
-- Sequentially break each branch of a phylo where {-
-- done = all the allready broken branches -- sequentially separate each branch for a given threshold and check if it locally increases the quality score
-- ego = the current branch we want to break -- sequence = [done] | currentBranch | [rest]
-- rest = the branches we still have to break -- done = all the already separated branches
breakBranches :: Double -> Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double -- rest = all the branches we still have to separate
-> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)] -}
breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods done ego rest = separateBranches :: Double -> Proximity -> Double -> Map Int Double -> Int -> Double -> Double
-- 1) keep or not the new division of ego -> Int -> Map Date Double -> Map Date Cooc -> [Period]
let done' = done ++ (if snd ego -> [(Branch,ShouldTry)] -> (Branch,ShouldTry) -> [(Branch,ShouldTry)]
-> [(Branch,ShouldTry)]
separateBranches fdt similarity lambda frequency minBranch thr rise timescale docs coocs periods done currentBranch rest =
let done' = done ++ (if snd currentBranch
then then
(if ((null (fst ego')) || (quality > quality')) (if ((null (fst branches')) || (quality > quality'))
---- 5) if the quality is not increased by the new branches or if the new branches are all small
---- then undo the separation and localy stop the sea rise
---- else validate the separation and authorise next sea rise in the long new branches
then then
-- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality') -- trace (" ✗ F(λ) = " <> show(quality) <> " (vs) " <> show(quality')
-- <> " | " <> show(length $ fst ego) <> " groups : " -- <> " | " <> show(length $ fst ego) <> " groups : "
-- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego') -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
-- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]") -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
[(fst ego,False)] [(fst currentBranch,False)]
else else
-- trace (" ✓ level = " <> printf "%.1f" thr <> "") -- trace (" ✓ F(λ) = " <> show(quality) <> " (vs) " <> show(quality')
-- trace (" ✓ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
-- <> " | " <> show(length $ fst ego) <> " groups : " -- <> " | " <> show(length $ fst ego) <> " groups : "
-- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego') -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
-- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]") -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
((map (\e -> (e,True)) (fst ego')) ++ (map (\e -> (e,False)) (snd ego')))) ((map (\e -> (e,True)) (fst branches')) ++ (map (\e -> (e,False)) (snd branches'))))
else [ego]) else [currentBranch])
in in
-- 2) if there is no more branches in rest then return else continue -- 6) if there is no more branch to separate tne return [done'] else continue with [rest]
if null rest if null rest
then done' then done'
else breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods else separateBranches fdt similarity lambda frequency minBranch thr rise timescale docs coocs periods
done' (head' "breakBranches" rest) (tail' "breakBranches" rest) done' (List.head rest) (List.tail rest)
where where
-------------------------------------- ------- 1) compute the quality before splitting any branch
quality :: Double quality :: LocalQuality
quality = toPhyloQuality fdt lambda frequency ((map fst done) ++ [fst ego] ++ (map fst rest)) quality = toPhyloQuality fdt lambda frequency ((map fst done) ++ [fst currentBranch] ++ (map fst rest))
--------------------------------------
ego' :: ([[PhyloGroup]],[[PhyloGroup]]) ------------------- 2) split the current branch and create a new phylomemetic network
ego' = phylomemeticNetwork :: [Branch]
let branches = groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) phylomemeticNetwork = toPhylomemeticNetwork timescale periods similarity thr docs coocs (fst currentBranch)
$ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego)
branches' = branches `using` parList rdeepseq --------- 3) change the new phylomemetic network into a tuple of new branches
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch) --------- on the left : the long branches, on the right : the small ones
branches' :: ([Branch],[Branch])
branches' = partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
$ thrToMeta thr $ thrToMeta thr
$ depthToMeta (elevation - depth) branches' $ riseToMeta rise phylomemeticNetwork
--------------------------------------
quality' :: Double -------- 4) compute again the quality by considering the new branches
quality' :: LocalQuality
quality' = toPhyloQuality fdt lambda frequency quality' = toPhyloQuality fdt lambda frequency
((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest)) ((map fst done) ++ (fst branches') ++ (snd branches') ++ (map fst rest))
seaLevelMatching :: Double -> Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double {-
-> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)] -- perform the sea-level rise algorithm, browse the similarity ladder and check that we can try out the next step
seaLevelMatching fdt proximity lambda minBranch frequency thr step depth elevation frame periods docs coocs branches = -}
-- if there is no branch to break or if seaLvl level > 1 then end seaLevelRise :: Double -> Proximity -> Double -> Int -> Map Int Double
if (thr >= 1) || ((not . or) $ map snd branches) -> [Double] -> Double
then branches -> Int -> [Period]
-> Map Date Double -> Map Date Cooc
-> [(Branch,ShouldTry)]
-> ([(Branch,ShouldTry)],FinalQuality)
seaLevelRise fdt proximity lambda minBranch frequency ladder rise frame periods docs coocs branches =
-- if the ladder is empty or thr > 1 or there is no branch to break then stop
if (null ladder) || ((List.head ladder) > 1) || (stopRise branches)
then (branches, toPhyloQuality fdt lambda frequency (map fst branches))
else else
-- break all the possible branches at the current seaLvl level -- start breaking up all the possible branches for the current similarity threshold
let quality = toPhyloQuality fdt lambda frequency (map fst branches) let thr = List.head ladder
acc = toAccuracy frequency (map fst branches) branches' = trace ("threshold = " <> printf "%.3f" thr
rec = toRecall frequency (map fst branches) <> " F(λ) = " <> printf "%.5f" (toPhyloQuality fdt lambda frequency (map fst branches))
branches' = trace ("↑ level = " <> printf "%.3f" thr <> " F(λ) = " <> printf "%.5f" quality <> " ξ = " <> printf "%.5f" (globalAccuracy frequency (map fst branches))
<> " ξ = " <> printf "%.5f" acc <> " ρ = " <> printf "%.5f" (globalRecall frequency (map fst branches))
<> " ρ = " <> printf "%.5f" rec <> " branches = " <> show(length branches))
<> " branches = " <> show(length branches) <> " ↴") $ separateBranches fdt proximity lambda frequency minBranch thr rise frame docs coocs periods
$ breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods [] (List.head branches) (List.tail branches)
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches) in seaLevelRise fdt proximity lambda minBranch frequency (List.tail ladder) (rise + 1) frame periods docs coocs branches'
frequency' = reduceFrequency frequency (map fst branches')
in seaLevelMatching fdt proximity lambda minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches'
constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo
constanteTemporalMatching start step phylo = updatePhyloGroups 1
(fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
(toPhyloHorizon phylo)
where where
-- 2) process the temporal matching by elevating seaLvl level --------
branches :: [[PhyloGroup]] stopRise :: [(Branch,ShouldTry)] -> Bool
branches = map fst stopRise bs = ((not . or) $ map snd bs)
$ seaLevelMatching (fromIntegral $ Vector.length $ getRoots phylo)
(phyloProximity $ getConfig phylo)
(_qua_granularity $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo)
(phylo ^. phylo_termFreq)
start step
((((1 - start) / step) - 1))
(((1 - start) / step))
(getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo)
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
(reverse $ sortOn (length . fst) groups)
-- 1) for each group process an initial temporal Matching
-- here we suppose that all the groups of level 1 are part of the same big branch
groups :: [([PhyloGroup],Bool)]
groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
$ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (phyloProximity $ getConfig phylo)
start
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
(traceTemporalMatching $ getGroupsFromLevel 1 phylo)
-----------------
-- | Horizon | --
-----------------
toPhyloHorizon :: Phylo -> Phylo
toPhyloHorizon phylo =
let t0 = take 1 (getPeriodIds phylo)
groups = getGroupsFromLevelPeriods 1 t0 phylo
sens = getSensibility (phyloProximity $ getConfig phylo)
nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) t0
diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) t0
in phylo & phylo_horizon .~ (fromList $ map (\(g,g') ->
((getGroupId g,getGroupId g'),weightedLogJaccard' sens nbDocs diago (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) $ listToCombi' groups)
--------------------------------------
-- | Adaptative Temporal Matching | --
--------------------------------------
thrToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
thrToMeta thr branches =
map (\b ->
map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
depthToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
depthToMeta depth branches =
let break = length branches > 1
in map (\b ->
map (\g ->
if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [depth]) "breaks"(g ^. phylo_groupMeta))
else g) b) branches
reduceTupleMapByKeys :: Eq a => [a] -> Map (a,a) Double -> Map (a,a) Double
reduceTupleMapByKeys ks m = filterWithKey (\(k,k') _ -> (elem k ks) && (elem k' ks)) m
getInTupleMap :: Ord a => Map (a,a) Double -> a -> a -> Double {-
getInTupleMap m k k' -- start the temporal matching process up, recover the resulting branches and update the groups (at scale 1) consequently
| isJust (m !? ( k ,k')) = m ! ( k ,k') -}
| isJust (m !? ( k',k )) = m ! ( k',k ) temporalMatching :: [Double] -> Phylo -> Phylo
| otherwise = 0 temporalMatching ladder phylo = updatePhyloGroups 1
(Map.fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
(updateQuality quality phylo)
toThreshold :: Double -> Map (PhyloGroupId,PhyloGroupId) Double -> Double
toThreshold lvl proxiGroups =
let idx = ((Map.size proxiGroups) `div` (floor lvl)) - 1
in if idx >= 0
then (sort $ elems proxiGroups) !! idx
else 1
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
adaptativeBreakBranches :: Double -> Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double
-> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc
-> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
-> [([PhyloGroup],(Bool,[Double]))]
adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods done ego rest =
-- 1) keep or not the new division of ego
let done' = done ++ (if (fst . snd) ego
then (if ((null (fst ego')) || (quality > quality'))
then
[(concat $ thrToMeta thr $ [fst ego],(False, ((snd . snd) ego)))]
else
( (map (\e -> (e,(True, ((snd . snd) ego) ++ [thr]))) (fst ego'))
++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego'))))
else [(concat $ thrToMeta thr $ [fst ego], snd ego)])
in
-- uncomment let .. in for debugging
-- let part1 = partition (snd) done'
-- part2 = partition (snd) rest
-- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
-- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
-- ) $
-- 2) if there is no more branches in rest then return else continue
if null rest
then done'
else adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods
done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
where where
-------------------------------------- -------
thr :: Double quality :: FinalQuality
thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi quality = snd sea
--------------------------------------
quality :: Double
quality = toPhyloQuality fdt lambda frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
--------------------------------------
ego' :: ([[PhyloGroup]],[[PhyloGroup]])
ego' =
let branches = groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups frame periods proxiConf thr docs coocs (fst ego)
branches' = branches `using` parList rdeepseq
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) > minBranch)
$ thrToMeta thr
$ depthToMeta (elevation - depth) branches'
--------------------------------------
quality' :: Double
quality' = toPhyloQuality fdt lambda frequency
((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
--------
branches :: [Branch]
branches = map fst $ fst sea
adaptativeSeaLevelMatching :: Double -> Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double --- 2) process the temporal matching by elevating the similarity ladder
-> Double -> Int -> Map Int Double sea :: ([(Branch,ShouldTry)],FinalQuality)
-> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc sea = seaLevelRise (fromIntegral $ Vector.length $ getRoots phylo)
-> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
adaptativeSeaLevelMatching fdt proxiConf depth elevation groupsProxi lambda minBranch frequency frame periods docs coocs branches =
-- if there is no branch to break or if seaLvl level >= depth then end
if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
then branches
else
-- break all the possible branches at the current seaLvl level
let branches' = adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches')
groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi
-- thr = toThreshold depth groupsProxi
in trace("\n " <> foldl (\acc _ -> acc <> "🌊 ") "" [0..(elevation - depth)]
<> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches')
<> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]"
<> " thr = ")
$ adaptativeSeaLevelMatching fdt proxiConf (depth - 1) elevation groupsProxi' lambda minBranch frequency' frame periods docs coocs branches'
adaptativeTemporalMatching :: Double -> Phylo -> Phylo
adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
(fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
(toPhyloHorizon phylo)
where
-- 2) process the temporal matching by elevating seaLvl level
branches :: [[PhyloGroup]]
branches = map fst
$ adaptativeSeaLevelMatching (fromIntegral $ Vector.length $ getRoots phylo)
(phyloProximity $ getConfig phylo) (phyloProximity $ getConfig phylo)
(elevation - 1)
elevation
(phylo ^. phylo_groupsProxi)
(_qua_granularity $ phyloQuality $ getConfig phylo) (_qua_granularity $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo) (_qua_minBranch $ phyloQuality $ getConfig phylo)
(phylo ^. phylo_termFreq) (phylo ^. phylo_termFreq)
ladder 1
(getTimeFrame $ timeUnit $ getConfig phylo) (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (getPeriodIds phylo)
(phylo ^. phylo_timeDocs) (phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc) (phylo ^. phylo_timeCooc)
groups (reverse $ sortOn (length . fst) seabed)
-- 1) for each group process an initial temporal Matching
-- here we suppose that all the groups of level 1 are part of the same big branch ------ 1) for each group, process an initial temporal Matching and create a 'seabed'
groups :: [([PhyloGroup],(Bool,[Double]))] ------ ShouldTry determines if you should apply the seaLevelRise function again within each branch
groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr]))) seabed :: [(Branch,ShouldTry)]
$ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) seabed = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
$ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo) $ toPhylomemeticNetwork (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (phyloProximity $ getConfig phylo) (getPeriodIds phylo)
thr (phyloProximity $ getConfig phylo)
(List.head ladder)
(phylo ^. phylo_timeDocs) (phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc) (phylo ^. phylo_timeCooc)
(traceTemporalMatching $ getGroupsFromLevel 1 phylo) (traceTemporalMatching $ getGroupsFromScale 1 phylo)
--------------------------------------
thr :: Double
thr = toThreshold elevation (phylo ^. phylo_groupsProxi)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment