Commit 70a2339e authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'dev-phylo' into dev

parents 01d762cb ddeafce7
...@@ -167,7 +167,7 @@ main = do ...@@ -167,7 +167,7 @@ main = do
printIOMsg "End of reconstruction, start the export" printIOMsg "End of reconstruction, start the export"
let dot = toPhyloExport phylo let dot = toPhyloExport phylo
let output = (outputPath config) let output = (outputPath config)
<> (unpack $ phyloName config) <> (unpack $ phyloName config)
......
...@@ -57,22 +57,39 @@ data CorpusParser = ...@@ -57,22 +57,39 @@ data CorpusParser =
| Csv {_csv_limit :: Int} | Csv {_csv_limit :: Int}
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data SeaElevation =
Constante
{ _cons_start :: Double
, _cons_step :: Double }
| Adaptative
{ _adap_granularity :: Double }
deriving (Show,Generic,Eq)
data Proximity = data Proximity =
WeightedLogJaccard WeightedLogJaccard
{ _wlj_sensibility :: Double { _wlj_sensibility :: Double
, _wlj_thresholdInit :: Double -- , _wlj_thresholdInit :: Double
, _wlj_thresholdStep :: Double } -- , _wlj_thresholdStep :: Double
-- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double
}
| Hamming | Hamming
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data SynchronyScope = SingleBranch | SiblingBranches | AllBranches deriving (Show,Generic,Eq)
data SynchronyStrategy = MergeRegularGroups | MergeAllGroups deriving (Show,Generic,Eq)
data Synchrony = data Synchrony =
ByProximityThreshold ByProximityThreshold
{ _bpt_threshold :: Double { _bpt_threshold :: Double
, _bpt_sensibility :: Double} , _bpt_sensibility :: Double
, _bpt_scope :: SynchronyScope
, _bpt_strategy :: SynchronyStrategy }
| ByProximityDistribution | ByProximityDistribution
{ _bpd_sensibility :: Double} { _bpd_sensibility :: Double
, _bpd_strategy :: SynchronyStrategy }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
...@@ -84,16 +101,18 @@ data TimeUnit = ...@@ -84,16 +101,18 @@ data TimeUnit =
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data ContextualUnit = data Clique =
Fis Fis
{ _fis_support :: Int { _fis_support :: Int
, _fis_size :: Int } , _fis_size :: Int }
| MaxClique
{ _mcl_size :: Int }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data Quality = data Quality =
Quality { _qua_relevance :: Double Quality { _qua_granularity :: Double
, _qua_minBranch :: Int } , _qua_minBranch :: Int }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
...@@ -105,10 +124,11 @@ data Config = ...@@ -105,10 +124,11 @@ data Config =
, phyloName :: Text , phyloName :: Text
, phyloLevel :: Int , phyloLevel :: Int
, phyloProximity :: Proximity , phyloProximity :: Proximity
, seaElevation :: SeaElevation
, phyloSynchrony :: Synchrony , phyloSynchrony :: Synchrony
, phyloQuality :: Quality , phyloQuality :: Quality
, timeUnit :: TimeUnit , timeUnit :: TimeUnit
, contextualUnit :: ContextualUnit , clique :: Clique
, exportLabel :: [PhyloLabel] , exportLabel :: [PhyloLabel]
, exportSort :: Sort , exportSort :: Sort
, exportFilter :: [Filter] , exportFilter :: [Filter]
...@@ -123,11 +143,12 @@ defaultConfig = ...@@ -123,11 +143,12 @@ defaultConfig =
, corpusParser = Csv 1000 , corpusParser = Csv 1000
, phyloName = pack "Default Phylo" , phyloName = pack "Default Phylo"
, phyloLevel = 2 , phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 0 0.1 , phyloProximity = WeightedLogJaccard 10
, phyloSynchrony = ByProximityDistribution 0 , seaElevation = Constante 0 0.1
, phyloQuality = Quality 0.1 1 , phyloSynchrony = ByProximityThreshold 0.5 10 SiblingBranches MergeAllGroups
, phyloQuality = Quality 0.6 1
, timeUnit = Year 3 1 5 , timeUnit = Year 3 1 5
, contextualUnit = Fis 1 5 , clique = Fis 1 5
, exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2] , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
, exportSort = ByHierarchy , exportSort = ByHierarchy
, exportFilter = [ByBranchSize 2] , exportFilter = [ByBranchSize 2]
...@@ -139,10 +160,12 @@ instance FromJSON CorpusParser ...@@ -139,10 +160,12 @@ instance FromJSON CorpusParser
instance ToJSON CorpusParser instance ToJSON CorpusParser
instance FromJSON Proximity instance FromJSON Proximity
instance ToJSON Proximity instance ToJSON Proximity
instance FromJSON SeaElevation
instance ToJSON SeaElevation
instance FromJSON TimeUnit instance FromJSON TimeUnit
instance ToJSON TimeUnit instance ToJSON TimeUnit
instance FromJSON ContextualUnit instance FromJSON Clique
instance ToJSON ContextualUnit instance ToJSON Clique
instance FromJSON PhyloLabel instance FromJSON PhyloLabel
instance ToJSON PhyloLabel instance ToJSON PhyloLabel
instance FromJSON Tagger instance FromJSON Tagger
...@@ -153,6 +176,10 @@ instance FromJSON Order ...@@ -153,6 +176,10 @@ instance FromJSON Order
instance ToJSON Order instance ToJSON Order
instance FromJSON Filter instance FromJSON Filter
instance ToJSON Filter instance ToJSON Filter
instance FromJSON SynchronyScope
instance ToJSON SynchronyScope
instance FromJSON SynchronyStrategy
instance ToJSON SynchronyStrategy
instance FromJSON Synchrony instance FromJSON Synchrony
instance ToJSON Synchrony instance ToJSON Synchrony
instance FromJSON Quality instance FromJSON Quality
...@@ -239,6 +266,8 @@ data Phylo = ...@@ -239,6 +266,8 @@ data Phylo =
Phylo { _phylo_foundations :: PhyloFoundations Phylo { _phylo_foundations :: PhyloFoundations
, _phylo_timeCooc :: !(Map Date Cooc) , _phylo_timeCooc :: !(Map Date Cooc)
, _phylo_timeDocs :: !(Map Date Double) , _phylo_timeDocs :: !(Map Date Double)
, _phylo_termFreq :: !(Map Int Double)
, _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
, _phylo_param :: PhyloParam , _phylo_param :: PhyloParam
, _phylo_periods :: Map PhyloPeriodId PhyloPeriod , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
} }
...@@ -310,21 +339,17 @@ data Filiation = ToParents | ToChilds deriving (Generic, Show) ...@@ -310,21 +339,17 @@ data Filiation = ToParents | ToChilds deriving (Generic, Show)
data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show) data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
--------------------------- ----------------------
-- | Frequent Item Set | -- -- | Phylo Clique | --
--------------------------- ----------------------
-- | Clique : Set of ngrams cooccurring in the same Document
type Clique = Set Ngrams
-- | Support : Number of Documents where a Clique occurs -- | Support : Number of Documents where a Clique occurs
type Support = Int type Support = Int
-- | Fis : Frequent Items Set (ie: the association between a Clique and a Support) data PhyloClique = PhyloClique
data PhyloFis = PhyloFis { _phyloClique_nodes :: Set Ngrams
{ _phyloFis_clique :: Clique , _phyloClique_support :: Support
, _phyloFis_support :: Support , _phyloClique_period :: (Date,Date)
, _phyloFis_period :: (Date,Date)
} deriving (Generic,NFData,Show,Eq) } deriving (Generic,NFData,Show,Eq)
...@@ -356,9 +381,15 @@ data PhyloLabel = ...@@ -356,9 +381,15 @@ data PhyloLabel =
data PhyloBranch = data PhyloBranch =
PhyloBranch PhyloBranch
{ _branch_id :: PhyloBranchId { _branch_id :: PhyloBranchId
, _branch_label :: Text , _branch_canonId :: [Int]
, _branch_meta :: Map Text [Double] , _branch_seaLevel :: [Double]
} deriving (Generic, Show) , _branch_x :: Double
, _branch_y :: Double
, _branch_w :: Double
, _branch_t :: Double
, _branch_label :: Text
, _branch_meta :: Map Text [Double]
} deriving (Generic, Show, Eq)
data PhyloExport = data PhyloExport =
PhyloExport PhyloExport
...@@ -372,12 +403,13 @@ data PhyloExport = ...@@ -372,12 +403,13 @@ data PhyloExport =
makeLenses ''Config makeLenses ''Config
makeLenses ''Proximity makeLenses ''Proximity
makeLenses ''SeaElevation
makeLenses ''Quality makeLenses ''Quality
makeLenses ''ContextualUnit makeLenses ''Clique
makeLenses ''PhyloLabel makeLenses ''PhyloLabel
makeLenses ''TimeUnit makeLenses ''TimeUnit
makeLenses ''PhyloFoundations makeLenses ''PhyloFoundations
makeLenses ''PhyloFis makeLenses ''PhyloClique
makeLenses ''Phylo makeLenses ''Phylo
makeLenses ''PhyloPeriod makeLenses ''PhyloPeriod
makeLenses ''PhyloLevel makeLenses ''PhyloLevel
......
...@@ -30,7 +30,7 @@ import Gargantext.Viz.AdaptativePhylo ...@@ -30,7 +30,7 @@ import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.PhyloMaker import Gargantext.Viz.Phylo.PhyloMaker
import Gargantext.Viz.Phylo.PhyloExport import Gargantext.Viz.Phylo.PhyloExport
import Gargantext.Viz.Phylo.TemporalMatching (temporalMatching) import Gargantext.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching)
import Gargantext.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Control.Lens import Control.Lens
...@@ -38,6 +38,9 @@ import Data.GraphViz.Types.Generalised (DotGraph) ...@@ -38,6 +38,9 @@ import Data.GraphViz.Types.Generalised (DotGraph)
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
---------------------------------
-- | STEP 5 | -- Export the phylo
---------------------------------
phyloExport :: IO () phyloExport :: IO ()
phyloExport = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot" phyloDot phyloExport = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot" phyloDot
...@@ -45,6 +48,10 @@ phyloExport = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot" ...@@ -45,6 +48,10 @@ phyloExport = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot"
phyloDot :: DotGraph DotId phyloDot :: DotGraph DotId
phyloDot = toPhyloExport phylo2 phyloDot = toPhyloExport phylo2
--------------------------------------------------
-- | STEP 4 | -- Process the synchronic clustering
--------------------------------------------------
phylo2 :: Phylo phylo2 :: Phylo
phylo2 = synchronicClustering phylo1 phylo2 = synchronicClustering phylo1
...@@ -53,17 +60,22 @@ phylo2 = synchronicClustering phylo1 ...@@ -53,17 +60,22 @@ phylo2 = synchronicClustering phylo1
----------------------------------------------- -----------------------------------------------
phylo1 :: Phylo phylo1 :: Phylo
phylo1 = temporalMatching phylo1 = case (getSeaElevation phyloBase) of
$ appendGroups fisToGroup 1 phyloFis phyloBase Constante s g -> constanteTemporalMatching s g
$ toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
Adaptative s -> adaptativeTemporalMatching s
$ toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
--------------------------------------------- ---------------------------------------------
-- | STEP 2 | -- Build the frequent items set -- | STEP 2 | -- Build the cliques
--------------------------------------------- ---------------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis] phyloClique :: Map (Date,Date) [PhyloClique]
phyloFis = toPhyloFis docsByPeriods (getFisSupport $ contextualUnit config) (getFisSize $ contextualUnit config) phyloClique = toPhyloClique phyloBase docsByPeriods
docsByPeriods :: Map (Date,Date) [Document] docsByPeriods :: Map (Date,Date) [Document]
...@@ -96,7 +108,7 @@ config = ...@@ -96,7 +108,7 @@ config =
defaultConfig { phyloName = "Cesar et Cleopatre" defaultConfig { phyloName = "Cesar et Cleopatre"
, phyloLevel = 2 , phyloLevel = 2
, exportFilter = [ByBranchSize 0] , exportFilter = [ByBranchSize 0]
, contextualUnit = Fis 0 0 } , clique = Fis 0 0 }
docs :: [Document] docs :: [Document]
......
...@@ -18,7 +18,7 @@ Portability : POSIX ...@@ -18,7 +18,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloExport where module Gargantext.Viz.Phylo.PhyloExport where
import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList) import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy) import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy, inits, elemIndex)
import Data.Vector (Vector) import Data.Vector (Vector)
import Prelude (writeFile) import Prelude (writeFile)
...@@ -36,6 +36,7 @@ import System.FilePath ...@@ -36,6 +36,7 @@ import System.FilePath
import Debug.Trace (trace) import Debug.Trace (trace)
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Data.Vector as Vector
import qualified Data.Text.Lazy as Lazy import qualified Data.Text.Lazy as Lazy
import qualified Data.GraphViz.Attributes.HTML as H import qualified Data.GraphViz.Attributes.HTML as H
...@@ -105,44 +106,51 @@ groupToTable fdt g = H.Table H.HTable ...@@ -105,44 +106,51 @@ groupToTable fdt g = H.Table H.HTable
<> (pack $ show (fst $ g ^. phylo_groupPeriod)) <> (pack $ show (fst $ g ^. phylo_groupPeriod))
<> (fromStrict " , ") <> (fromStrict " , ")
<> (pack $ show (snd $ g ^. phylo_groupPeriod)) <> (pack $ show (snd $ g ^. phylo_groupPeriod))
<> (fromStrict " ) "))]] <> (fromStrict " ) ")
<> (pack $ show (getGroupId g)))]]
-------------------------------------- --------------------------------------
branchToDotNode :: PhyloBranch -> Dot DotId branchToDotNode :: PhyloBranch -> Int -> Dot DotId
branchToDotNode b = branchToDotNode b bId =
node (branchIdToDotId $ b ^. branch_id) node (branchIdToDotId $ b ^. branch_id)
([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)] ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
<> (metaToAttr $ b ^. branch_meta) <> (metaToAttr $ b ^. branch_meta)
<> [ toAttr "nodeType" "branch" <> [ toAttr "nodeType" "branch"
, toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id)) ]) , toAttr "bId" (pack $ show bId)
, toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
, toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
, toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
, toAttr "label" (pack $ show $ b ^. branch_label)
])
periodToDotNode :: (Date,Date) -> Dot DotId periodToDotNode :: (Date,Date) -> Dot DotId
periodToDotNode prd = periodToDotNode prd =
node (periodIdToDotId prd) node (periodIdToDotId prd)
([Shape Square, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))] ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
<> [ toAttr "nodeType" "period" <> [ toAttr "nodeType" "period"
, toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd)) , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
, toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))]) , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
groupToDotNode :: Vector Ngrams -> PhyloGroup -> Dot DotId groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
groupToDotNode fdt g = groupToDotNode fdt g bId =
node (groupIdToDotId $ getGroupId g) node (groupIdToDotId $ getGroupId g)
([FontName "Arial", Shape Square, toLabel (groupToTable fdt g)] ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
<> [ toAttr "nodeType" "group" <> [ toAttr "nodeType" "group"
, toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod)) , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
, toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod)) , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
, toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId)) , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
, toAttr "bId" (pack $ show bId)
, toAttr "support" (pack $ show (g ^. phylo_groupSupport))]) , toAttr "support" (pack $ show (g ^. phylo_groupSupport))])
toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
toDotEdge source target lbl edgeType = edge source target toDotEdge source target lbl edgeType = edge source target
(case edgeType of (case edgeType of
GroupToGroup -> [ Width 10, Color [toWColor Black], Constraint True GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True
, Label (StrLabel $ fromStrict lbl)] , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "link" ]
BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)]) BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
, Label (StrLabel $ fromStrict lbl)] , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "branchLink" ]
BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)]) BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
, Label (StrLabel $ fromStrict lbl)] , Label (StrLabel $ fromStrict lbl)]
PeriodToPeriod -> [ Width 5, Color [toWColor Black]]) PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
...@@ -155,10 +163,17 @@ mergePointers groups = ...@@ -155,10 +163,17 @@ mergePointers groups =
in unionWith (\w w' -> max w w') toChilds toParents in unionWith (\w w' -> max w w') toChilds toParents
toBid :: PhyloGroup -> [PhyloBranch] -> Int
toBid g bs =
let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
in fromJust $ elemIndex b' bs
exportToDot :: Phylo -> PhyloExport -> DotGraph DotId exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
exportToDot phylo export = exportToDot phylo export =
trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and " trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
<> show(length $ export ^. export_groups) <> " groups to a dot file\n") $ <> show(length $ export ^. export_groups) <> " groups "
<> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
<> "##########################") $
digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
-- | 1) init the dot graph -- | 1) init the dot graph
...@@ -167,11 +182,12 @@ exportToDot phylo export = ...@@ -167,11 +182,12 @@ exportToDot phylo export =
, Ratio FillRatio , Ratio FillRatio
, Style [SItem Filled []],Color [toWColor White]] , Style [SItem Filled []],Color [toWColor White]]
-- | home made attributes -- | home made attributes
<> [(toAttr (fromStrict "nbDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs)) <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo)) ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo)) ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo)) ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
,(toAttr (fromStrict "quaFactor") $ pack $ show (_qua_relevance $ phyloQuality $ getConfig phylo)) ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
]) ])
...@@ -191,7 +207,7 @@ exportToDot phylo export = ...@@ -191,7 +207,7 @@ exportToDot phylo export =
-- mapM branchToDotNode branches -- mapM branchToDotNode branches
-- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
mapM branchToDotNode $ export ^. export_branches mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
-- | 5) create a layer for each period -- | 5) create a layer for each period
_ <- mapM (\period -> _ <- mapM (\period ->
...@@ -200,7 +216,7 @@ exportToDot phylo export = ...@@ -200,7 +216,7 @@ exportToDot phylo export =
periodToDotNode period periodToDotNode period
-- | 6) create a node for each group -- | 6) create a node for each group
mapM (\g -> groupToDotNode (getRoots phylo) g) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups) mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
) $ getPeriodIds phylo ) $ getPeriodIds phylo
-- | 7) create the edges between a branch and its first groups -- | 7) create the edges between a branch and its first groups
...@@ -224,12 +240,12 @@ exportToDot phylo export = ...@@ -224,12 +240,12 @@ exportToDot phylo export =
) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
-- | 8) create the edges between the branches -- | 8) create the edges between the branches
_ <- mapM (\(bId,bId') -> -- _ <- mapM (\(bId,bId') ->
toDotEdge (branchIdToDotId bId) (branchIdToDotId bId') -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
(Text.pack $ show(branchIdsToProximity bId bId' -- (Text.pack $ show(branchIdsToProximity bId bId'
(getThresholdInit $ phyloProximity $ getConfig phylo) -- (getThresholdInit $ phyloProximity $ getConfig phylo)
(getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
graphAttrs [Rank SameRank] graphAttrs [Rank SameRank]
...@@ -261,11 +277,25 @@ processFilters filters qua export = ...@@ -261,11 +277,25 @@ processFilters filters qua export =
-- | Sort | -- -- | Sort | --
-------------- --------------
branchToIso :: [PhyloBranch] -> [PhyloBranch]
branchToIso branches =
let steps = map sum
$ inits
$ map (\(b,x) -> b ^. branch_y + 0.05 - x)
$ zip branches
$ ([0] ++ (map (\(b,b') ->
let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
in (b' ^. branch_seaLevel) !! (idx - 1)
) $ listToSeq branches))
in map (\(x,b) -> b & branch_x .~ x)
$ zip steps branches
sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch] sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
sortByHierarchy depth branches = sortByHierarchy depth branches =
if (length branches == 1) if (length branches == 1)
then branches then branchToIso branches
else concat else branchToIso $ concat
$ map (\branches' -> $ map (\branches' ->
let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches' let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions)) in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
...@@ -338,11 +368,12 @@ branchDating export = ...@@ -338,11 +368,12 @@ branchDating export =
$ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id) $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
then acc ++ [g ^. phylo_groupPeriod] then acc ++ [g ^. phylo_groupPeriod]
else acc ) [] $ export ^. export_groups else acc ) [] $ export ^. export_groups
periods = nub groups
birth = fst $ head' "birth" groups birth = fst $ head' "birth" groups
age = (snd $ last' "age" groups) - birth age = (snd $ last' "age" groups) - birth
in b & branch_meta %~ insert "birth" [fromIntegral birth] in b & branch_meta %~ insert "birth" [fromIntegral birth]
& branch_meta %~ insert "age" [fromIntegral age] & branch_meta %~ insert "age" [fromIntegral age]
& branch_meta %~ insert "size" [fromIntegral $ length groups] ) export & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
processMetrics :: PhyloExport -> PhyloExport processMetrics :: PhyloExport -> PhyloExport
processMetrics export = ngramsMetrics processMetrics export = ngramsMetrics
...@@ -409,8 +440,8 @@ processLabels labels foundations export = ...@@ -409,8 +440,8 @@ processLabels labels foundations export =
toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
toDynamics n parents group m = toDynamics n parents g m =
let prd = group ^. phylo_groupPeriod let prd = g ^. phylo_groupPeriod
end = last' "dynamics" (sort $ map snd $ elems m) end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end)) in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
-- | decrease -- | decrease
...@@ -429,7 +460,7 @@ toDynamics n parents group m = ...@@ -429,7 +460,7 @@ toDynamics n parents group m =
processDynamics :: [PhyloGroup] -> [PhyloGroup] processDynamics :: [PhyloGroup] -> [PhyloGroup]
processDynamics groups = processDynamics groups =
map (\g -> map (\g ->
let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId) let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
&& ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
...@@ -449,7 +480,6 @@ processDynamics groups = ...@@ -449,7 +480,6 @@ processDynamics groups =
-- | phyloExport | -- -- | phyloExport | --
--------------------- ---------------------
toPhyloExport :: Phylo -> DotGraph DotId toPhyloExport :: Phylo -> DotGraph DotId
toPhyloExport phylo = exportToDot phylo toPhyloExport phylo = exportToDot phylo
$ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo) $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
...@@ -458,15 +488,47 @@ toPhyloExport phylo = exportToDot phylo ...@@ -458,15 +488,47 @@ toPhyloExport phylo = exportToDot phylo
$ processMetrics export $ processMetrics export
where where
export :: PhyloExport export :: PhyloExport
export = PhyloExport groups branches export = PhyloExport groups branches
-------------------------------------- --------------------------------------
branches :: [PhyloBranch] branches :: [PhyloBranch]
branches = traceExportBranches $ map (\bId -> PhyloBranch bId "" empty) $ nub $ map _phylo_groupBranchId groups branches = map (\g ->
let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
breaks = (g ^. phylo_groupMeta) ! "breaks"
canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
in PhyloBranch (g ^. phylo_groupBranchId)
canonId
seaLvl
0
(last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
0
0
"" empty)
$ map (\gs -> head' "export" gs)
$ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
$ sortOn (\g -> g ^. phylo_groupBranchId) groups
-------------------------------------- --------------------------------------
groups :: [PhyloGroup] groups :: [PhyloGroup]
groups = processDynamics groups = traceExportGroups
$ getGroupsFromLevel (phyloLevel $ getConfig phylo) phylo $ processDynamics
$ getGroupsFromLevel (phyloLevel $ getConfig phylo)
$ tracePhyloInfo phylo
traceExportBranches :: [PhyloBranch] -> [PhyloBranch] traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
traceExportBranches branches = trace ("\n" <> "-- | Export " <> show(length branches) <> " branches") branches traceExportBranches branches = trace ("\n"
<> "-- | Export " <> show(length branches) <> " branches") branches
tracePhyloInfo :: Phylo -> Phylo
tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = "
<> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
<> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
) phylo
traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
traceExportGroups groups = trace ("\n" <> "-- | Export "
<> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
<> show(length groups) <> " groups and "
<> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
) groups
...@@ -15,15 +15,15 @@ Portability : POSIX ...@@ -15,15 +15,15 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloMaker where module Gargantext.Viz.Phylo.PhyloMaker where
import Data.List (concat, nub, partition, sort, (++)) import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys) import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey)
import Data.Set (size) import Data.Set (size)
import Data.Vector (Vector) import Data.Vector (Vector)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.TemporalMatching (temporalMatching) import Gargantext.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity)
import Gargantext.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Text.Context (TermList) import Gargantext.Text.Context (TermList)
import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..)) import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
...@@ -43,7 +43,8 @@ import qualified Data.Set as Set ...@@ -43,7 +43,8 @@ import qualified Data.Set as Set
toPhylo :: [Document] -> TermList -> Config -> Phylo toPhylo :: [Document] -> TermList -> Config -> Phylo
toPhylo docs lst conf = traceToPhylo (phyloLevel conf) $ toPhylo docs lst conf = trace ("# phylo1 groups " <> show(length $ getGroupsFromLevel 1 phylo1))
$ traceToPhylo (phyloLevel conf) $
if (phyloLevel conf) > 1 if (phyloLevel conf) > 1
then foldl' (\phylo' _ -> synchronicClustering phylo') phylo1 [2..(phyloLevel conf)] then foldl' (\phylo' _ -> synchronicClustering phylo') phylo1 [2..(phyloLevel conf)]
else phylo1 else phylo1
...@@ -62,6 +63,33 @@ toPhylo docs lst conf = traceToPhylo (phyloLevel conf) $ ...@@ -62,6 +63,33 @@ toPhylo docs lst conf = traceToPhylo (phyloLevel conf) $
-- | To Phylo 1 | -- -- | To Phylo 1 | --
-------------------- --------------------
toGroupsProxi :: Level -> Phylo -> Phylo
toGroupsProxi lvl phylo =
let proximity = phyloProximity $ getConfig phylo
groupsProxi = foldlWithKey (\acc pId pds ->
-- 1) process period by period
let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams))
$ elems
$ view ( phylo_periodLevels
. traverse . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups ) pds
next = getNextPeriods ToParents (getTimeFrame $ timeUnit $ getConfig phylo) pId (keys $ phylo ^. phylo_periods)
targets = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ getGroupsFromLevelPeriods lvl next phylo
docs = filterDocs (phylo ^. phylo_timeDocs) ([pId] ++ next)
diagos = filterDiago (phylo ^. phylo_timeCooc) ([pId] ++ next)
-- 2) compute the pairs in parallel
pairs = map (\(id,ngrams) ->
map (\(id',ngrams') ->
let nbDocs = (sum . elems) $ filterDocs docs ([idToPrd id, idToPrd id'])
diago = reduceDiagos $ filterDiago diagos ([idToPrd id, idToPrd id'])
in ((id,id'),toProximity nbDocs diago proximity ngrams ngrams' ngrams')
) $ filter (\(_,ngrams') -> (not . null) $ intersect ngrams ngrams') targets
) egos
pairs' = pairs `using` parList rdeepseq
in acc ++ (concat pairs')
) [] $ phylo ^. phylo_periods
in phylo & phylo_groupsProxi .~ ((traceGroupsProxi . fromList) groupsProxi)
appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n") appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
...@@ -72,40 +100,44 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co ...@@ -72,40 +100,44 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
(\phyloLvl -> if lvl == (phyloLvl ^. phylo_levelLevel) (\phyloLvl -> if lvl == (phyloLvl ^. phylo_levelLevel)
then then
let pId = phyloLvl ^. phylo_levelPeriod let pId = phyloLvl ^. phylo_levelPeriod
phyloFis = m ! pId phyloCUnit = m ! pId
in phyloLvl in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj -> & phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups) groups ++ [ (((pId,lvl),length groups)
, f obj pId lvl (length groups) (getRoots phylo) , f obj pId lvl (length groups) (getRoots phylo)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId])) (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
] ) [] phyloFis) ] ) [] phyloCUnit)
else else
phyloLvl ) phyloLvl )
phylo phylo
fisToGroup :: PhyloFis -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup cliqueToGroup :: PhyloClique -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup
fisToGroup fis pId lvl idx fdt coocs = cliqueToGroup fis pId lvl idx fdt coocs =
let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloFis_clique) fdt let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloClique_nodes) fdt
in PhyloGroup pId lvl idx "" in PhyloGroup pId lvl idx ""
(fis ^. phyloFis_support) (fis ^. phyloClique_support)
ngrams ngrams
(ngramsToCooc ngrams coocs) (ngramsToCooc ngrams coocs)
(1,[0]) (1,[0]) -- | branchid (lvl,[path in the branching tree])
empty (fromList [("breaks",[0]),("seaLevels",[0])])
[] [] [] [] [] [] [] []
toPhylo1 :: [Document] -> Phylo -> Phylo toPhylo1 :: [Document] -> Phylo -> Phylo
toPhylo1 docs phyloBase = temporalMatching toPhylo1 docs phyloBase = case (getSeaElevation phyloBase) of
$ appendGroups fisToGroup 1 phyloFis phyloBase Constante start gap -> constanteTemporalMatching start gap
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
Adaptative steps -> adaptativeTemporalMatching steps
$ toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
where where
-------------------------------------- --------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis] phyloClique :: Map (Date,Date) [PhyloClique]
phyloFis = toPhyloFis docs' (getFisSupport $ contextualUnit $ getConfig phyloBase) (getFisSize $ contextualUnit $ getConfig phyloBase) phyloClique = toPhyloClique phyloBase docs'
-------------------------------------- --------------------------------------
docs' :: Map (Date,Date) [Document] docs' :: Map (Date,Date) [Document]
docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs docs' = groupDocsByPeriod' date (getPeriodIds phyloBase) docs
-------------------------------------- --------------------------------------
...@@ -115,54 +147,59 @@ toPhylo1 docs phyloBase = temporalMatching ...@@ -115,54 +147,59 @@ toPhylo1 docs phyloBase = temporalMatching
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False) -- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterFis :: Bool -> Int -> (Int -> [PhyloFis] -> [PhyloFis]) -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis] filterClique :: Bool -> Int -> (Int -> [PhyloClique] -> [PhyloClique]) -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterFis keep thr f m = case keep of filterClique keep thr f m = case keep of
False -> map (\l -> f thr l) m False -> map (\l -> f thr l) m
True -> map (\l -> keepFilled (f) thr l) m True -> map (\l -> keepFilled (f) thr l) m
-- | To filter Fis with small Support -- | To filter Fis with small Support
filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis] filterCliqueBySupport :: Int -> [PhyloClique] -> [PhyloClique]
filterFisBySupport thr l = filter (\fis -> (fis ^. phyloFis_support) >= thr) l filterCliqueBySupport thr l = filter (\clq -> (clq ^. phyloClique_support) >= thr) l
-- | To filter Fis with small Clique size -- | To filter Fis with small Clique size
filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis] filterCliqueBySize :: Int -> [PhyloClique] -> [PhyloClique]
filterFisByClique thr l = filter (\fis -> (size $ fis ^. phyloFis_clique) >= thr) l filterCliqueBySize thr l = filter (\clq -> (size $ clq ^. phyloClique_nodes) >= thr) l
-- | To filter nested Fis -- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis] filterCliqueByNested :: Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterFisByNested m = filterCliqueByNested m =
let fis = map (\l -> let clq = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ f' ^. phyloFis_clique) (Set.toList $ f ^. phyloFis_clique)) mem) foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ f' ^. phyloClique_nodes) (Set.toList $ f ^. phyloClique_nodes)) mem)
then mem then mem
else else
let fMax = filter (\f' -> not $ isNested (Set.toList $ f ^. phyloFis_clique) (Set.toList $ f' ^. phyloFis_clique)) mem let fMax = filter (\f' -> not $ isNested (Set.toList $ f ^. phyloClique_nodes) (Set.toList $ f' ^. phyloClique_nodes)) mem
in fMax ++ [f] ) [] l) in fMax ++ [f] ) [] l)
$ elems m $ elems m
fis' = fis `using` parList rdeepseq clq' = clq `using` parList rdeepseq
in fromList $ zip (keys m) fis' in fromList $ zip (keys m) clq'
-- | To transform a time map of docs innto a time map of Fis with some filters -- | To transform a time map of docs innto a time map of Fis with some filters
toPhyloFis :: Map (Date, Date) [Document] -> Int -> Int -> Map (Date,Date) [PhyloFis] toPhyloClique :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [PhyloClique]
toPhyloFis phyloDocs support clique = traceFis "Filtered Fis" toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
$ filterFisByNested Fis s s' -> -- traceFis "Filtered Fis"
$ traceFis "Filtered by clique size" filterCliqueByNested
$ filterFis True clique (filterFisByClique) -- $ traceFis "Filtered by clique size"
$ traceFis "Filtered by support" $ filterClique True s' (filterCliqueBySize)
$ filterFis True support (filterFisBySupport) -- $ traceFis "Filtered by support"
$ traceFis "Unfiltered Fis" phyloFis $ filterClique True s (filterCliqueBySupport)
-- $ traceFis "Unfiltered Fis"
phyloClique
MaxClique _ -> undefined
where where
-------------------------------------- --------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis] phyloClique :: Map (Date,Date) [PhyloClique]
phyloFis = phyloClique = case (clique $ getConfig phylo) of
let fis = map (\(prd,docs) -> let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs) Fis _ _ -> let fis = map (\(prd,docs) ->
in (prd, map (\f -> PhyloFis (fst f) (snd f) prd) lst)) let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
$ toList phyloDocs in (prd, map (\f -> PhyloClique (fst f) (snd f) prd) lst))
fis' = fis `using` parList rdeepseq $ toList phyloDocs
in fromList fis' fis' = fis `using` parList rdeepseq
in fromList fis'
MaxClique _ -> undefined
-------------------------------------- --------------------------------------
...@@ -188,6 +225,21 @@ docsToTimeScaleCooc docs fdt = ...@@ -188,6 +225,21 @@ docsToTimeScaleCooc docs fdt =
-- | to Phylo Base | -- -- | to Phylo Base | --
----------------------- -----------------------
-- | To group a list of Documents by fixed periods
groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
periods = map (inPeriode f docs') pds
periods' = periods `using` parList rdeepseq
in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length pds) <> " periods" <> "\n")
$ fromList $ zip pds periods'
where
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [[t]] -> (b, b) -> [t]
inPeriode f' h (start,end) =
concat $ fst $ partition (\d -> f' (head' "inPeriode" d) >= start && f' (head' "inPeriode" d) <= end) h
-- | To group a list of Documents by fixed periods -- | To group a list of Documents by fixed periods
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
...@@ -206,6 +258,17 @@ groupDocsByPeriod f pds es = ...@@ -206,6 +258,17 @@ groupDocsByPeriod f pds es =
-------------------------------------- --------------------------------------
docsToTermFreq :: [Document] -> Vector Ngrams -> Map Int Double
docsToTermFreq docs fdt =
let nbDocs = fromIntegral $ length docs
freqs = map (/(nbDocs))
$ fromList
$ map (\lst -> (head' "docsToTermFreq" lst, fromIntegral $ length lst))
$ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) docs
sumFreqs = sum $ elems freqs
in map (/sumFreqs) freqs
-- | To count the number of docs by unit of time -- | To count the number of docs by unit of time
docsToTimeScaleNb :: [Document] -> Map Date Double docsToTimeScaleNb :: [Document] -> Map Date Double
docsToTimeScaleNb docs = docsToTimeScaleNb docs =
...@@ -230,5 +293,7 @@ toPhyloBase docs lst conf = ...@@ -230,5 +293,7 @@ toPhyloBase docs lst conf =
$ Phylo foundations $ Phylo foundations
(docsToTimeScaleCooc docs (foundations ^. foundations_roots)) (docsToTimeScaleCooc docs (foundations ^. foundations_roots))
(docsToTimeScaleNb docs) (docsToTimeScaleNb docs)
(docsToTermFreq docs (foundations ^. foundations_roots))
empty
params params
(fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels 1 prd))) periods) (fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels 1 prd))) periods)
...@@ -34,6 +34,7 @@ import Control.Lens hiding (Level) ...@@ -34,6 +34,7 @@ import Control.Lens hiding (Level)
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Map as Map
------------ ------------
-- | Io | -- -- | Io | --
...@@ -66,6 +67,7 @@ roundToStr = printf "%0.*f" ...@@ -66,6 +67,7 @@ roundToStr = printf "%0.*f"
countSup :: Double -> [Double] -> Int countSup :: Double -> [Double] -> Int
countSup s l = length $ filter (>s) l countSup s l = length $ filter (>s) l
dropByIdx :: Int -> [a] -> [a] dropByIdx :: Int -> [a] -> [a]
dropByIdx k l = take k l ++ drop (k+1) l dropByIdx k l = take k l ++ drop (k+1) l
...@@ -76,6 +78,15 @@ elemIndex' e l = case (List.elemIndex e l) of ...@@ -76,6 +78,15 @@ elemIndex' e l = case (List.elemIndex e l) of
Just i -> i Just i -> i
commonPrefix :: Eq a => [a] -> [a] -> [a] -> [a]
commonPrefix lst lst' acc =
if (null lst || null lst')
then acc
else if (head' "commonPrefix" lst == head' "commonPrefix" lst')
then commonPrefix (tail lst) (tail lst') (acc ++ [head' "commonPrefix" lst])
else acc
--------------------- ---------------------
-- | Foundations | -- -- | Foundations | --
--------------------- ---------------------
...@@ -162,44 +173,44 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l) ...@@ -162,44 +173,44 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l)
else f thr l else f thr l
traceClique :: Map (Date, Date) [PhyloFis] -> String traceClique :: Map (Date, Date) [PhyloClique] -> String
traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6] traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where where
-------------------------------------- --------------------------------------
cliques :: [Double] cliques :: [Double]
cliques = sort $ map (fromIntegral . size . _phyloFis_clique) $ concat $ elems mFis cliques = sort $ map (fromIntegral . size . _phyloClique_nodes) $ concat $ elems mFis
-------------------------------------- --------------------------------------
traceSupport :: Map (Date, Date) [PhyloFis] -> String traceSupport :: Map (Date, Date) [PhyloClique] -> String
traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6] traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where where
-------------------------------------- --------------------------------------
supports :: [Double] supports :: [Double]
supports = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems mFis supports = sort $ map (fromIntegral . _phyloClique_support) $ concat $ elems mFis
-------------------------------------- --------------------------------------
traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis] traceFis :: [Char] -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n" traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
<> "Support : " <> (traceSupport mFis) <> "\n" <> "Support : " <> (traceSupport mFis) <> "\n"
<> "Clique : " <> (traceClique mFis) <> "\n" ) mFis <> "Nb Ngrams : " <> (traceClique mFis) <> "\n" ) mFis
------------------------- ---------------
-- | Contextual unit | -- -- | Clique| --
------------------------- ---------------
getFisSupport :: ContextualUnit -> Int getCliqueSupport :: Clique -> Int
getFisSupport unit = case unit of getCliqueSupport unit = case unit of
Fis s _ -> s Fis s _ -> s
-- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support") MaxClique _ -> 0
getFisSize :: ContextualUnit -> Int getCliqueSize :: Clique -> Int
getFisSize unit = case unit of getCliqueSize unit = case unit of
Fis _ s -> s Fis _ s -> s
-- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size") MaxClique s -> s
-------------- --------------
...@@ -227,6 +238,8 @@ sumCooc cooc cooc' = unionWith (+) cooc cooc' ...@@ -227,6 +238,8 @@ sumCooc cooc cooc' = unionWith (+) cooc cooc'
getTrace :: Cooc -> Double getTrace :: Cooc -> Double
getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
coocToDiago :: Cooc -> Cooc
coocToDiago cooc = filterWithKey (\(k,k') _ -> k == k') cooc
-- | To build the local cooc matrix of each phylogroup -- | To build the local cooc matrix of each phylogroup
ngramsToCooc :: [Int] -> [Cooc] -> Cooc ngramsToCooc :: [Int] -> [Cooc] -> Cooc
...@@ -243,6 +256,12 @@ ngramsToCooc ngrams coocs = ...@@ -243,6 +256,12 @@ ngramsToCooc ngrams coocs =
getGroupId :: PhyloGroup -> PhyloGroupId getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex) getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex)
idToPrd :: PhyloGroupId -> PhyloPeriodId
idToPrd id = (fst . fst) id
getGroupThr :: PhyloGroup -> Double
getGroupThr group = last' "getGroupThr" ((group ^. phylo_groupMeta) ! "breaks")
groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup] groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup]
groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups
...@@ -255,34 +274,21 @@ getPeriodPointers fil group = ...@@ -255,34 +274,21 @@ getPeriodPointers fil group =
filterProximity :: Proximity -> Double -> Double -> Bool filterProximity :: Proximity -> Double -> Double -> Bool
filterProximity proximity thr local = filterProximity proximity thr local =
case proximity of case proximity of
WeightedLogJaccard _ _ _ -> local >= thr WeightedLogJaccard _ -> local >= thr
Hamming -> undefined Hamming -> undefined
getProximityName :: Proximity -> String getProximityName :: Proximity -> String
getProximityName proximity = getProximityName proximity =
case proximity of case proximity of
WeightedLogJaccard _ _ _ -> "WLJaccard" WeightedLogJaccard _ -> "WLJaccard"
Hamming -> "Hamming" Hamming -> "Hamming"
getProximityInit :: Proximity -> Double
getProximityInit proximity =
case proximity of
WeightedLogJaccard _ i _ -> i
Hamming -> undefined
getProximityStep :: Proximity -> Double
getProximityStep proximity =
case proximity of
WeightedLogJaccard _ _ s -> s
Hamming -> undefined
--------------- ---------------
-- | Phylo | -- -- | Phylo | --
--------------- ---------------
addPointers :: PhyloGroup -> Filiation -> PointerType -> [Pointer] -> PhyloGroup addPointers :: Filiation -> PointerType -> [Pointer] -> PhyloGroup -> PhyloGroup
addPointers group fil pty pointers = addPointers fil pty pointers group =
case pty of case pty of
TemporalPointer -> case fil of TemporalPointer -> case fil of
ToChilds -> group & phylo_groupPeriodChilds .~ pointers ToChilds -> group & phylo_groupPeriodChilds .~ pointers
...@@ -310,6 +316,9 @@ getLevels phylo = nub ...@@ -310,6 +316,9 @@ getLevels phylo = nub
. traverse . traverse
. phylo_periodLevels ) phylo . phylo_periodLevels ) phylo
getSeaElevation :: Phylo -> SeaElevation
getSeaElevation phylo = seaElevation (getConfig phylo)
getConfig :: Phylo -> Config getConfig :: Phylo -> Config
getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
...@@ -334,6 +343,26 @@ getGroupsFromLevel lvl phylo = ...@@ -334,6 +343,26 @@ getGroupsFromLevel lvl phylo =
. phylo_levelGroups ) phylo . phylo_levelGroups ) phylo
getGroupsFromLevelPeriods :: Level -> [PhyloPeriodId] -> Phylo -> [PhyloGroup]
getGroupsFromLevelPeriods lvl periods phylo =
elems $ view ( phylo_periods
. traverse
. filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
. phylo_periodLevels
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups ) phylo
getGroupsFromPeriods :: Level -> Map PhyloPeriodId PhyloPeriod -> [PhyloGroup]
getGroupsFromPeriods lvl periods =
elems $ view ( traverse
. phylo_periodLevels
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups ) periods
updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
updatePhyloGroups lvl m phylo = updatePhyloGroups lvl m phylo =
over ( phylo_periods over ( phylo_periods
...@@ -391,27 +420,7 @@ traceSynchronyStart phylo = ...@@ -391,27 +420,7 @@ traceSynchronyStart phylo =
getSensibility :: Proximity -> Double getSensibility :: Proximity -> Double
getSensibility proxi = case proxi of getSensibility proxi = case proxi of
WeightedLogJaccard s _ _ -> s WeightedLogJaccard s -> s
Hamming -> undefined
getThresholdInit :: Proximity -> Double
getThresholdInit proxi = case proxi of
WeightedLogJaccard _ t _ -> t
Hamming -> undefined
getThresholdStep :: Proximity -> Double
getThresholdStep proxi = case proxi of
WeightedLogJaccard _ _ s -> s
Hamming -> undefined
traceBranchMatching :: Proximity -> Double -> [PhyloGroup] -> [PhyloGroup]
traceBranchMatching proxi thr groups = case proxi of
WeightedLogJaccard _ i s -> trace (
roundToStr 2 thr <> " "
<> foldl (\acc _ -> acc <> ".") "." [(10*i),(10*i + 10*s)..(10*thr)]
<> " " <> show(length groups) <> " groups"
) groups
Hamming -> undefined Hamming -> undefined
---------------- ----------------
...@@ -478,4 +487,9 @@ traceMatchEnd groups = ...@@ -478,4 +487,9 @@ traceMatchEnd groups =
traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup] traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup]
traceTemporalMatching groups = traceTemporalMatching groups =
trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups
\ No newline at end of file
traceGroupsProxi :: Map (PhyloGroupId,PhyloGroupId) Double -> Map (PhyloGroupId,PhyloGroupId) Double
traceGroupsProxi m =
trace ( "\n" <> "-- | " <> show(Map.size m) <> " computed pairs of groups proximity" <> "\n") m
\ No newline at end of file
...@@ -18,34 +18,77 @@ module Gargantext.Viz.Phylo.SynchronicClustering where ...@@ -18,34 +18,77 @@ module Gargantext.Viz.Phylo.SynchronicClustering where
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard) import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
import Gargantext.Viz.Phylo.PhyloExport (processDynamics)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn) import Data.List ((++), null, intersect, nub, concat, sort, sortOn, all, groupBy, group, maximum)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member) import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
import Data.Text (Text)
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
-- import Debug.Trace (trace)
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set
------------------------- -------------------------
-- | New Level Maker | -- -- | New Level Maker | --
------------------------- -------------------------
toBranchId :: PhyloGroup -> PhyloBranchId mergeBranchIds :: [[Int]] -> [Int]
toBranchId child = ((child ^. phylo_groupLevel) + 1, snd (child ^. phylo_groupBranchId)) mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
where
-- | 2) find the most Up Left ids in the hierarchy of similarity
-- mostUpLeft :: [[Int]] -> [[Int]]
-- mostUpLeft ids' =
-- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
-- inf = (fst . minimum) groupIds
-- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
-- | 1) find the most frequent ids
mostFreq' :: [[Int]] -> [[Int]]
mostFreq' ids' =
let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
sup = (fst . maximum) groupIds
in map snd $ filter (\gIds -> fst gIds == sup) groupIds
mergeMeta :: [Int] -> [PhyloGroup] -> Map Text [Double]
mergeMeta bId groups =
let ego = head' "mergeMeta" $ filter (\g -> (snd (g ^. phylo_groupBranchId)) == bId) groups
in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")]
groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches' groups =
-- | run the related component algorithm
let egos = map (\g -> [getGroupId g]
++ (map fst $ g ^. phylo_groupPeriodParents)
++ (map fst $ g ^. phylo_groupPeriodChilds) ) $ elems groups
graph = relatedComponents egos
-- | update each group's branch id
in map (\ids ->
let groups' = elems $ restrictKeys groups (Set.fromList ids)
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl,bId))) groups') graph
mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
mergeGroups coocs id mapIds childs = mergeGroups coocs id mapIds childs =
let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs
in PhyloGroup (fst $ fst id) (snd $ fst id) (snd id) "" in PhyloGroup (fst $ fst id) (snd $ fst id) (snd id) ""
(sum $ map _phylo_groupSupport childs) ngrams (sum $ map _phylo_groupSupport childs) ngrams
(ngramsToCooc ngrams coocs) (toBranchId (head' "mergeGroups" childs)) (ngramsToCooc ngrams coocs)
empty [] (map (\g -> (getGroupId g, 1)) childs) ((snd $ fst id),bId)
(mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs)
(updatePointers $ concat $ map _phylo_groupPeriodParents childs) (updatePointers $ concat $ map _phylo_groupPeriodParents childs)
(updatePointers $ concat $ map _phylo_groupPeriodChilds childs) (updatePointers $ concat $ map _phylo_groupPeriodChilds childs)
where where
--------------------
bId :: [Int]
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) childs
--------------------
updatePointers :: [Pointer] -> [Pointer] updatePointers :: [Pointer] -> [Pointer]
updatePointers pointers = map (\(pId,w) -> (mapIds ! pId,w)) pointers updatePointers pointers = map (\(pId,w) -> (mapIds ! pId,w)) pointers
...@@ -57,42 +100,47 @@ addPhyloLevel lvl phylo = ...@@ -57,42 +100,47 @@ addPhyloLevel lvl phylo =
%~ (insert (phyloPrd ^. phylo_periodPeriod, lvl) (PhyloLevel (phyloPrd ^. phylo_periodPeriod) lvl empty))) phylo %~ (insert (phyloPrd ^. phylo_periodPeriod, lvl) (PhyloLevel (phyloPrd ^. phylo_periodPeriod) lvl empty))) phylo
toNextLevel :: Phylo -> [PhyloGroup] -> Phylo toNextLevel' :: Phylo -> [PhyloGroup] -> Phylo
toNextLevel phylo groups = toNextLevel' phylo groups =
let curLvl = getLastLevel phylo let curLvl = getLastLevel phylo
oldGroups = fromList $ map (\g -> (getGroupId g, getLevelParentId g)) groups oldGroups = fromList $ map (\g -> (getGroupId g, getLevelParentId g)) groups
newGroups = fromListWith (++) newGroups = concat $ groupsToBranches'
-- | 5) group the parents by periods $ fromList $ map (\g -> (getGroupId g, g))
$ foldlWithKey (\acc id groups' -> $ foldlWithKey (\acc id groups' ->
-- | 4) create the parent group -- | 4) create the parent group
let parent = mergeGroups (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [(fst . fst) id]) id oldGroups groups' let parent = mergeGroups (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [(fst . fst) id]) id oldGroups groups'
in acc ++ [(parent ^. phylo_groupPeriod, [parent])]) [] in acc ++ [parent]) []
-- | 3) group the current groups by parentId -- | 3) group the current groups by parentId
$ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups $ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups
newPeriods = fromListWith (++) $ map (\g -> (g ^. phylo_groupPeriod, [g])) newGroups
in traceSynchronyEnd in traceSynchronyEnd
$ over ( phylo_periods . traverse . phylo_periodLevels . traverse $ over ( phylo_periods . traverse . phylo_periodLevels . traverse
-- | 6) update each period at curLvl + 1 -- | 6) update each period at curLvl + 1
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == (curLvl + 1))) . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == (curLvl + 1)))
-- | 7) by adding the parents -- | 7) by adding the parents
(\phyloLvl -> (\phyloLvl ->
if member (phyloLvl ^. phylo_levelPeriod) newGroups if member (phyloLvl ^. phylo_levelPeriod) newPeriods
then phyloLvl & phylo_levelGroups then phyloLvl & phylo_levelGroups
.~ fromList (map (\g -> (getGroupId g, g)) $ newGroups ! (phyloLvl ^. phylo_levelPeriod)) .~ fromList (map (\g -> (getGroupId g, g)) $ newPeriods ! (phyloLvl ^. phylo_levelPeriod))
else phyloLvl) else phyloLvl)
-- | 2) add the curLvl + 1 phyloLevel to the phylo -- | 2) add the curLvl + 1 phyloLevel to the phylo
$ addPhyloLevel (curLvl + 1) $ addPhyloLevel (curLvl + 1)
-- | 1) update the current groups (with level parent pointers) in the phylo -- | 1) update the current groups (with level parent pointers) in the phylo
$ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo $ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo
-------------------- --------------------
-- | Clustering | -- -- | Clustering | --
-------------------- --------------------
toPairs :: SynchronyStrategy -> [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
toPairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)] toPairs strategy groups = case strategy of
toPairs groups = filter (\(g,g') -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)) MergeRegularGroups -> pairs
$ listToCombi' groups $ filter (\g -> all (== 3) $ (g ^. phylo_groupMeta) ! "dynamics") groups
MergeAllGroups -> pairs groups
where
pairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
pairs gs = filter (\(g,g') -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)) (listToCombi' gs)
toDiamonds :: [PhyloGroup] -> [[PhyloGroup]] toDiamonds :: [PhyloGroup] -> [[PhyloGroup]]
...@@ -108,26 +156,25 @@ toDiamonds groups = foldl' (\acc groups' -> ...@@ -108,26 +156,25 @@ toDiamonds groups = foldl' (\acc groups' ->
$ foldl' (\acc g -> acc ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodParents) ) [] groups $ foldl' (\acc g -> acc ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodParents) ) [] groups
groupsToEdges :: Proximity -> Synchrony -> Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] groupsToEdges :: Proximity -> Synchrony -> Double -> Map Int Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
groupsToEdges prox sync docs groups = groupsToEdges prox sync nbDocs diago groups =
case sync of case sync of
ByProximityThreshold t s -> filter (\(_,w) -> w >= t) ByProximityThreshold thr sens _ strat ->
$ toEdges s filter (\(_,w) -> w >= thr)
$ toPairs groups $ toEdges sens
$ toPairs strat groups
ByProximityDistribution s -> ByProximityDistribution sens strat ->
let diamonds = sortOn snd let diamonds = sortOn snd
$ toEdges s $ concat $ toEdges sens $ concat
$ map toPairs $ toDiamonds groups $ map (\gs -> toPairs strat gs) $ toDiamonds groups
in take (div (length diamonds) 2) diamonds in take (div (length diamonds) 2) diamonds
where where
toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)] toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)]
toEdges sens edges = toEdges sens edges =
case prox of case prox of
WeightedLogJaccard _ _ _ -> map (\(g,g') -> WeightedLogJaccard _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard sens docs ((g,g'), weightedLogJaccard' sens nbDocs diago
(g ^. phylo_groupCooc) (g' ^. phylo_groupCooc) (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
_ -> undefined _ -> undefined
...@@ -142,21 +189,34 @@ toParentId :: PhyloGroup -> PhyloGroupId ...@@ -142,21 +189,34 @@ toParentId :: PhyloGroup -> PhyloGroupId
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex) toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex)
reduceBranch :: Proximity -> Synchrony -> Map Date Double -> [PhyloGroup] -> [PhyloGroup] reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
reduceBranch prox sync docs branch = reduceGroups prox sync docs diagos branch =
-- | 1) reduce a branch as a set of periods & groups -- | 1) reduce a branch as a set of periods & groups
let periods = fromListWith (++) let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch $ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems) in (concat . concat . elems)
$ mapWithKey (\prd groups -> $ mapWithKey (\prd groups ->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold -- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
let edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) groups let diago = reduceDiagos $ filterDiago diagos [prd]
edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) diago groups
in map (\comp -> in map (\comp ->
-- | 4) add to each groups their futur level parent group -- | 4) add to each groups their futur level parent group
let parentId = toParentId (head' "parentId" comp) let parentId = toParentId (head' "parentId" comp)
in map (\g -> g & phylo_groupLevelParents %~ (++ [(parentId,1)]) ) comp ) in map (\g -> g & phylo_groupLevelParents %~ (++ [(parentId,1)]) ) comp )
-- |3) reduce the graph a a set of related components -- |3) reduce the graph a a set of related components
$ toRelatedComponents groups edges) periods $ toRelatedComponents groups edges) periods
adjustClustering :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]]
adjustClustering sync branches = case sync of
ByProximityThreshold _ _ scope _ -> case scope of
SingleBranch -> branches
SiblingBranches -> groupBy (\g g' -> (last' "adjustClustering" $ (g ^. phylo_groupMeta) ! "breaks")
== (last' "adjustClustering" $ (g' ^. phylo_groupMeta) ! "breaks"))
$ sortOn _phylo_groupBranchId $ concat branches
AllBranches -> [concat branches]
ByProximityDistribution _ _ -> branches
synchronicClustering :: Phylo -> Phylo synchronicClustering :: Phylo -> Phylo
...@@ -164,11 +224,14 @@ synchronicClustering phylo = ...@@ -164,11 +224,14 @@ synchronicClustering phylo =
let prox = phyloProximity $ getConfig phylo let prox = phyloProximity $ getConfig phylo
sync = phyloSynchrony $ getConfig phylo sync = phyloSynchrony $ getConfig phylo
docs = phylo ^. phylo_timeDocs docs = phylo ^. phylo_timeDocs
branches = map (\branch -> reduceBranch prox sync docs branch) diagos = map coocToDiago $ phylo ^. phylo_timeCooc
$ phyloToLastBranches newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
$ traceSynchronyStart phylo $ map processDynamics
branches' = branches `using` parList rdeepseq $ adjustClustering sync
in toNextLevel phylo $ concat branches' $ phyloToLastBranches
$ traceSynchronyStart phylo
newBranches' = newBranches `using` parList rdeepseq
in toNextLevel' phylo $ concat newBranches'
---------------- ----------------
......
...@@ -15,18 +15,19 @@ Portability : POSIX ...@@ -15,18 +15,19 @@ Portability : POSIX
module Gargantext.Viz.Phylo.TemporalMatching where module Gargantext.Viz.Phylo.TemporalMatching where
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, delete) import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, or, sort, (!!))
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, intersectionWith, findWithDefault, keys, (!), filterWithKey) import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
-- import Prelude (logBase) import Prelude (floor)
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Debug.Trace (trace) import Debug.Trace (trace)
import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
...@@ -35,30 +36,29 @@ import qualified Data.Set as Set ...@@ -35,30 +36,29 @@ import qualified Data.Set as Set
------------------- -------------------
-- | Process the inverse sumLog -- | To compute a jaccard similarity between two lists
sumInvLog :: Double -> [Double] -> Double jaccard :: [Int] -> [Int] -> Double
sumInvLog s l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
-- | Process the sumLog -- | Process the inverse sumLog
sumLog :: Double -> [Double] -> Double sumInvLog' :: Double -> Double -> [Double] -> Double
sumLog s l = foldl (\mem x -> mem + log (s + x)) 0 l sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + s) / log (nb + s)))) 0 diago
-- | To compute a jaccard similarity between two lists -- | Process the sumLog
jaccard :: [Int] -> [Int] -> Double sumLog' :: Double -> Double -> [Double] -> Double
jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union') sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + s) / log (nb + s))) 0 diago
-- | To process a WeighedLogJaccard distance between to coocurency matrix weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
weightedLogJaccard :: Double -> Double -> Cooc -> Cooc -> [Int] -> [Int] -> Double weightedLogJaccard' sens nbDocs diago ngrams ngrams'
weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
| null ngramsInter = 0 | null ngramsInter = 0
| ngramsInter == ngramsUnion = 1 | ngramsInter == ngramsUnion = 1
| sens == 0 = jaccard ngramsInter ngramsUnion | sens == 0 = jaccard ngramsInter ngramsUnion
| sens > 0 = (sumInvLog sens coocInter) / (sumInvLog sens coocUnion) | sens > 0 = (sumInvLog' sens nbDocs diagoInter) / (sumInvLog' sens nbDocs diagoUnion)
| otherwise = (sumLog sens coocInter) / (sumLog sens coocUnion) | otherwise = (sumLog' sens nbDocs diagoInter) / (sumLog' sens nbDocs diagoUnion)
where where
-------------------------------------- --------------------------------------
ngramsInter :: [Int] ngramsInter :: [Int]
ngramsInter = intersect ngrams ngrams' ngramsInter = intersect ngrams ngrams'
...@@ -66,122 +66,125 @@ weightedLogJaccard sens docs cooc cooc' ngrams ngrams' ...@@ -66,122 +66,125 @@ weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
ngramsUnion :: [Int] ngramsUnion :: [Int]
ngramsUnion = union ngrams ngrams' ngramsUnion = union ngrams ngrams'
-------------------------------------- --------------------------------------
coocInter :: [Double] diagoInter :: [Double]
coocInter = elems $ map (/docs) $ filterWithKey (\(k,k') _ -> k == k') $ intersectionWith (+) cooc cooc' diagoInter = elems $ restrictKeys diago (Set.fromList ngramsInter)
-- coocInter = elems $ map (/docs) $ intersectionWith (+) cooc cooc' --------------------------------------
-------------------------------------- diagoUnion :: [Double]
coocUnion :: [Double] diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
coocUnion = elems $ map (/docs) $ filterWithKey (\(k,k') _ -> k == k') $ unionWith (+) cooc cooc' --------------------------------------
--------------------------------------
-- | To choose a proximity function
pickProximity :: Proximity -> Double -> Cooc -> Cooc -> [Int] -> [Int] -> Double
pickProximity proximity docs cooc cooc' ngrams ngrams' = case proximity of
WeightedLogJaccard sens _ _ -> weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
Hamming -> undefined
-- | To process the proximity between a current group and a pair of targets group -- | To process the proximity between a current group and a pair of targets group
toProximity :: Map Date Double -> Proximity -> PhyloGroup -> PhyloGroup -> PhyloGroup -> Double toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
toProximity docs proximity ego target target' = toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
let docs' = sum $ elems docs case proximity of
cooc = if target == target' WeightedLogJaccard sens ->
then (target ^. phylo_groupCooc) let pairNgrams = if targetNgrams == targetNgrams'
else sumCooc (target ^. phylo_groupCooc) (target' ^. phylo_groupCooc) then targetNgrams
ngrams = if target == target' else union targetNgrams targetNgrams'
then (target ^. phylo_groupNgrams) in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams
else union (target ^. phylo_groupNgrams) (target' ^. phylo_groupNgrams) Hamming -> undefined
in pickProximity proximity docs' (ego ^. phylo_groupCooc) cooc (ego ^. phylo_groupNgrams) ngrams
------------------------ ------------------------
-- | Local Matching | -- -- | Local Matching | --
------------------------ ------------------------
toLastPeriod :: Filiation -> [PhyloPeriodId] -> PhyloPeriodId findLastPeriod :: Filiation -> [PhyloPeriodId] -> PhyloPeriodId
toLastPeriod fil periods = case fil of findLastPeriod fil periods = case fil of
ToParents -> head' "toLastPeriod" (sortOn fst periods) ToParents -> head' "findLastPeriod" (sortOn fst periods)
ToChilds -> last' "toLastPeriod" (sortOn fst periods) ToChilds -> last' "findLastPeriod" (sortOn fst periods)
toLazyPairs :: [Pointer] -> Filiation -> Double -> Proximity -> PhyloPeriodId -> [(PhyloGroup,PhyloGroup)] -> [(PhyloGroup,PhyloGroup)] -- | To filter pairs of candidates related to old pointers periods
toLazyPairs pointers fil thr prox prd pairs = removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> PhyloPeriodId
if null pointers then pairs -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
else let rest = filterPointers prox thr pointers -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
in if null rest removeOldPointers oldPointers fil thr prox prd pairs
then let prd' = toLastPeriod fil (map (fst . fst . fst) pointers) | null oldPointers = pairs
in if prd' == prd | null (filterPointers prox thr oldPointers) =
then [] let lastMatchedPrd = findLastPeriod fil (map (fst . fst . fst) oldPointers)
else filter (\(g,g') -> in if lastMatchedPrd == prd
case fil of then []
ToParents -> ((fst $ g ^. phylo_groupPeriod) < (fst prd')) else filter (\((id,_),(id',_)) ->
|| ((fst $ g' ^. phylo_groupPeriod) < (fst prd')) case fil of
ToChilds -> ((fst $ g ^. phylo_groupPeriod) > (fst prd')) ToParents -> (((fst . fst . fst) id ) < (fst lastMatchedPrd))
|| ((fst $ g' ^. phylo_groupPeriod) > (fst prd'))) pairs || (((fst . fst . fst) id') < (fst lastMatchedPrd))
else [] ToChilds -> (((fst . fst . fst) id ) > (fst lastMatchedPrd))
|| (((fst . fst . fst) id') > (fst lastMatchedPrd))) pairs
| otherwise = []
-- | Find pairs of valuable candidates to be matched
makePairs' :: PhyloGroup -> [PhyloGroup] -> [PhyloPeriodId] -> [Pointer] -> Filiation -> Double -> Proximity -> Map Date Double -> [(PhyloGroup,PhyloGroup)]
makePairs' ego candidates periods pointers fil thr prox docs = makePairs' :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [PhyloPeriodId] -> [Pointer] -> Filiation -> Double -> Proximity
case null periods of -> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
True -> [] makePairs' (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos =
False -> toLazyPairs pointers fil thr prox lastPrd if (null periods)
-- | at least on of the pair candidates should be from the last added period then []
$ filter (\(g,g') -> ((g ^. phylo_groupPeriod) == lastPrd) else removeOldPointers oldPointers fil thr prox lastPrd
|| ((g' ^. phylo_groupPeriod) == lastPrd)) -- | at least on of the pair candidates should be from the last added period
$ listToKeys $ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd))
$ filter (\g -> (g ^. phylo_groupPeriod == lastPrd) $ listToKeys
|| ((toProximity docs prox ego ego g) >= thr)) candidates $ filter (\(id,ngrams) ->
let nbDocs = (sum . elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id])
diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id])
in (toProximity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr
) candidates
where where
lastPrd :: PhyloPeriodId lastPrd :: PhyloPeriodId
lastPrd = toLastPeriod fil periods lastPrd = findLastPeriod fil periods
filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer] filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer]
filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts
phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Proximity -> Map Date Double -> Double -> PhyloGroup -> PhyloGroup reduceDiagos :: Map Date Cooc -> Map Int Double
phyloGroupMatching candidates fil proxi docs thr ego = reduceDiagos diagos = mapKeys (\(k,_) -> k)
case null nextPointers of $ foldl (\acc diago -> unionWith (+) acc diago) empty (elems diagos)
-- | let's find new pointers
True -> if null $ filterPointers proxi thr $ getPeriodPointers fil ego
then addPointers ego fil TemporalPointer [] phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
-- | or keep the old ones -> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
else addPointers ego fil TemporalPointer phyloGroupMatching candidates fil proxi docs diagos thr oldPointers (id,ngrams) =
$ filterPointers proxi thr $ getPeriodPointers fil ego if (null $ filterPointers proxi thr oldPointers)
False -> addPointers ego fil TemporalPointer -- | let's find new pointers
$ head' "phyloGroupMatching" then if null nextPointers
-- | Keep only the best set of pointers grouped by proximity then []
$ groupBy (\pt pt' -> snd pt == snd pt') else head' "phyloGroupMatching"
$ reverse $ sortOn snd $ head' "pointers" -- | Keep only the best set of pointers grouped by proximity
$ nextPointers $ groupBy (\pt pt' -> snd pt == snd pt')
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold $ reverse $ sortOn snd $ head' "pointers" nextPointers
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
else oldPointers
where where
nextPointers :: [[Pointer]] nextPointers :: [[Pointer]]
nextPointers = take 1 nextPointers = take 1
$ dropWhile (null) $ dropWhile (null)
-- | for each time frame, process the proximity on relevant pairs of targeted groups -- | for each time frame, process the proximity on relevant pairs of targeted groups
$ scanl (\acc groups -> $ scanl (\acc groups ->
let periods = nub $ map _phylo_groupPeriod $ concat groups let periods = nub $ map (fst . fst . fst) $ concat groups
docs' = (filterDocs docs ([ego ^. phylo_groupPeriod] ++ periods)) nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods))
pairs = makePairs' ego (concat groups) periods (getPeriodPointers fil ego) fil thr proxi docs diago = reduceDiagos
$ filterDiago diagos ([(fst . fst) id] ++ periods)
-- | important resize nbdocs et diago dans le make pairs
pairs = makePairs' (id,ngrams) (concat groups) periods oldPointers fil thr proxi docs diagos
in acc ++ ( filterPointers proxi thr in acc ++ ( filterPointers proxi thr
$ concat $ concat
$ map (\(c,c') -> $ map (\(c,c') ->
-- | process the proximity between the current group and a pair of candidates -- | process the proximity between the current group and a pair of candidates
let proximity = toProximity docs' proxi ego c c' let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c')
in if (c == c') in if (c == c')
then [(getGroupId c,proximity)] then [(fst c,proximity)]
else [(getGroupId c,proximity),(getGroupId c',proximity)] ) pairs )) [] else [(fst c,proximity),(fst c',proximity)] ) pairs )) []
$ inits candidates -- | groups from [[1900],[1900,1901],[1900,1901,1902],...] $ inits candidates -- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
filterDocs d pds = restrictKeys d $ periodsToYears pds filterDocs d pds = restrictKeys d $ periodsToYears pds
filterDiago :: Map Date Cooc -> [PhyloPeriodId] -> Map Date Cooc
filterDiago diago pds = restrictKeys diago $ periodsToYears pds
----------------------------- -----------------------------
-- | Matching Processing | -- -- | Matching Processing | --
...@@ -195,40 +198,41 @@ getNextPeriods fil max' pId pIds = ...@@ -195,40 +198,41 @@ getNextPeriods fil max' pId pIds =
ToParents -> take max' $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds ToParents -> take max' $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds
getCandidates :: Filiation -> PhyloGroup -> [[PhyloGroup]] -> [[PhyloGroup]] getCandidates :: PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]]
getCandidates fil ego targets = getCandidates ego targets =
case fil of map (\groups' ->
ToChilds -> targets' filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')
ToParents -> reverse targets' ) groups') targets
where
targets' :: [[PhyloGroup]]
targets' = matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
map (\groups' -> matchGroupsToGroups frame periods proximity thr docs coocs groups =
filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams) let groups' = groupByField _phylo_groupPeriod groups
) groups') targets in foldl' (\acc prd ->
let -- | 1) find the parents/childs matching periods
periodsPar = getNextPeriods ToParents frame prd periods
phyloBranchMatching :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> [PhyloGroup] -> [PhyloGroup] periodsChi = getNextPeriods ToChilds frame prd periods
phyloBranchMatching frame periods proximity thr docs branch = traceBranchMatching proximity thr -- | 2) find the parents/childs matching candidates
-- $ matchByPeriods ToParents candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar
-- $ groupByField _phylo_groupPeriod candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi
$ matchByPeriods -- | 3) find the parents/child number of docs by years
$ groupByField _phylo_groupPeriod branch docsPar = filterDocs docs ([prd] ++ periodsPar)
where docsChi = filterDocs docs ([prd] ++ periodsChi)
-------------------------------------- -- | 4) find the parents/child diago by years
matchByPeriods :: Map PhyloPeriodId [PhyloGroup] -> [PhyloGroup] diagoPar = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
matchByPeriods branch' = foldl' (\acc prd -> diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
let periodsPar = getNextPeriods ToParents frame prd periods -- | 5) match in parallel all the groups (egos) to their possible candidates
periodsChi = getNextPeriods ToChilds frame prd periods egos = map (\ego ->
candidatesPar = map (\prd' -> findWithDefault [] prd' branch') periodsPar let pointersPar = phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar diagoPar
candidatesChi = map (\prd' -> findWithDefault [] prd' branch') periodsChi thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
docsPar = filterDocs docs ([prd] ++ periodsPar) pointersChi = phyloGroupMatching (getCandidates ego candidatesChi) ToChilds proximity docsChi diagoChi
docsChi = filterDocs docs ([prd] ++ periodsChi) thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams)
egos = map (\ego -> phyloGroupMatching (getCandidates ToParents ego candidatesPar) ToParents proximity docsPar thr in addPointers ToChilds TemporalPointer pointersChi
$ phyloGroupMatching (getCandidates ToChilds ego candidatesChi) ToChilds proximity docsChi thr ego) $ addPointers ToParents TemporalPointer pointersPar ego)
$ findWithDefault [] prd branch' $ findWithDefault [] prd groups'
egos' = egos `using` parList rdeepseq egos' = egos `using` parList rdeepseq
in acc ++ egos' ) [] periods in acc ++ egos'
) [] periods
----------------------- -----------------------
...@@ -236,85 +240,38 @@ phyloBranchMatching frame periods proximity thr docs branch = traceBranchMatchin ...@@ -236,85 +240,38 @@ phyloBranchMatching frame periods proximity thr docs branch = traceBranchMatchin
----------------------- -----------------------
count :: Eq a => a -> [a] -> Int
count x = length . filter (== x)
termFreq' :: Int -> [PhyloGroup] -> Double
termFreq' term groups =
let ngrams = concat $ map _phylo_groupNgrams groups
in log ((fromIntegral $ count term ngrams)
/ (fromIntegral $ length ngrams))
relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]] relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]]
relevantBranches term branches = relevantBranches term branches =
filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches
branchCov' :: [PhyloGroup] -> [[PhyloGroup]] -> Double fScore :: Double -> Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
branchCov' branch branches = fScore beta i bk bks =
(fromIntegral $ length branch) / (fromIntegral $ length $ concat branches) let recall = ( (fromIntegral $ length $ filter (\g -> elem i $ g ^. phylo_groupNgrams) bk)
/ (fromIntegral $ length $ filter (\g -> elem i $ g ^. phylo_groupNgrams) $ concat bks))
accuracy = ( (fromIntegral $ length $ filter (\g -> elem i $ g ^. phylo_groupNgrams) bk)
toRecall :: Double -> Int -> Int -> [[PhyloGroup]] -> Double / (fromIntegral $ length bk))
toRecall freq term border branches = in ((1 + beta ** 2) * accuracy * recall)
-- | given a random term in a phylo / (((beta ** 2) * accuracy + recall))
freq
-- | for each relevant branches
* (sum $ map (\branch ->
-- | given its local coverage
((branchCov' branch branches') / (sum $ map (\b -> branchCov' b branches') branches'))
-- | compute the local recall
* ( (fromIntegral $ length $ filter (\group -> elem term $ group ^. phylo_groupNgrams) branch)
/ ( (fromIntegral $ length $ filter (\group -> elem term $ group ^. phylo_groupNgrams) $ concat branches')
-- | with a ponderation from border branches
+ (fromIntegral border)) )) branches')
where
branches' :: [[PhyloGroup]]
branches' = relevantBranches term branches
toAccuracy :: Double -> Int -> [[PhyloGroup]] -> Double
toAccuracy freq term branches =
if (null branches)
then 0
else
-- | given a random term in a phylo
freq
-- | for each relevant branches
* (sum $ map (\branch ->
-- | given its local coverage
((branchCov' branch branches') / (sum $ map (\b -> branchCov' b branches') branches'))
-- | compute the local accuracy
* ( (fromIntegral $ length $ filter (\group -> elem term $ group ^. phylo_groupNgrams) branch)
/ (fromIntegral $ length branch))) branches')
where
branches' :: [[PhyloGroup]]
branches' = relevantBranches term branches
toPhyloQuality :: Double -> Map Int Double -> Int -> Double -> [[PhyloGroup]] -> Double
toPhyloQuality beta frequency border oldAcc branches =
-- trace (" rec : " <> show(recall)) $
-- trace (" acc : " <> show(accuracy)) $
if (null branches)
then 0
else ((1 + beta ** 2) * accuracy * recall)
/ (((beta ** 2) * accuracy + recall))
where
-- | for each term compute the global accuracy
accuracy :: Double
accuracy = oldAcc + (sum $ map (\term -> toAccuracy (frequency ! term) term branches) $ keys frequency)
-- | for each term compute the global recall
recall :: Double
recall = sum $ map (\term -> toRecall (frequency ! term) term border branches) $ keys frequency
toBorderAccuracy :: Map Int Double -> [[PhyloGroup]] -> Double wk :: [PhyloGroup] -> Double
toBorderAccuracy freq branches = sum $ map (\t -> toAccuracy (freq ! t) t branches) $ keys freq wk bk = fromIntegral $ length bk
----------------------------- toPhyloQuality' :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
-- | Adaptative Matching | -- toPhyloQuality' beta freq branches =
----------------------------- if (null branches)
then 0
else sum
$ map (\i ->
let bks = relevantBranches i branches
in (freq ! i) * (sum $ map (\bk -> ((wk bk) / (sum $ map wk bks)) * (fScore beta i bk bks)) bks))
$ keys freq
------------------------------------
-- | Constant Temporal Matching | --
------------------------------------
groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]] groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
...@@ -326,6 +283,7 @@ groupsToBranches groups = ...@@ -326,6 +283,7 @@ groupsToBranches groups =
++ (map fst $ group ^. phylo_groupPeriodParents) ++ (map fst $ group ^. phylo_groupPeriodParents)
++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups ++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
-- | first find the related components by inside each ego's period -- | first find the related components by inside each ego's period
-- | a supprimer
graph' = map relatedComponents egos graph' = map relatedComponents egos
-- | then run it for the all the periods -- | then run it for the all the periods
graph = zip [1..] graph = zip [1..]
...@@ -341,81 +299,248 @@ reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double ...@@ -341,81 +299,248 @@ reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
reduceFrequency frequency branches = reduceFrequency frequency branches =
restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches) restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
alterBorder :: Int -> [[PhyloGroup]] -> [PhyloGroup] -> Int updateThr thr branches = map (\b -> map (\g ->
alterBorder border branches branch = border + (length $ concat branches) - (length branch) g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches
recursiveMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Int -> [PhyloPeriodId] -> Map Date Double -> Double -> Int -> Double -> [PhyloGroup] -> [PhyloGroup] -- | Sequentially break each branch of a phylo where
recursiveMatching proximity beta minBranch frequency egoThr frame periods docs quality border oldAcc groups = -- done = all the allready broken branches
if ((egoThr >= 1) || (quality > quality') || ((length $ concat $ snd branches') == (length groups))) -- ego = the current branch we want to break
then -- rest = the branches we still have to break
trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality') <> "\n" breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
<> " |✓ " <> show(length $ fst branches') <> show(map length $ fst branches') -> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
<> " |✗ " <> show(length $ snd branches') <> "[" <> show(length $ concat $ snd branches') <> "]") $ breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
groups -- | 1) keep or not the new division of ego
else let done' = done ++ (if snd ego
let next = map (\b -> recursiveMatching proximity beta minBranch then
(reduceFrequency frequency (fst branches')) (if ((null (fst ego')) || (quality > quality'))
(egoThr + (getThresholdStep proximity)) then
frame periods docs quality' -- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
(alterBorder border (fst branches') b) -- <> " | " <> show(length $ fst ego) <> " groups : "
(oldAcc + (toBorderAccuracy frequency (delete b ((fst branches') ++ (snd branches'))))) -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
b ) (fst branches') -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
in trace (" ✓ F(β) = " <> show(quality) <> " (vs) " <> show(quality') <> "\n" [(fst ego,False)]
<> " |✓ " <> show(length $ fst branches') <> show(map length $ fst branches') else
<> " |✗ " <> show(length $ snd branches') <> "[" <> show(length $ concat $ snd branches') <> "]") $ -- trace (" ✓ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
concat (next ++ (snd branches')) -- <> " | " <> show(length $ fst ego) <> " groups : "
-- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
-- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
((map (\e -> (e,True)) (fst ego')) ++ (map (\e -> (e,False)) (snd ego'))))
else [ego])
in
-- | 2) if there is no more branches in rest then return else continue
if null rest
then done'
else breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
where
--------------------------------------
quality :: Double
quality = toPhyloQuality' beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
--------------------------------------
ego' :: ([[PhyloGroup]],[[PhyloGroup]])
ego' =
let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego)
branches' = branches `using` parList rdeepseq
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
$ thrToMeta thr
$ depthToMeta (elevation - depth) branches'
--------------------------------------
quality' :: Double
quality' = toPhyloQuality' beta frequency
((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
-> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
seaLevelMatching proximity beta minBranch frequency thr step depth elevation frame periods docs coocs branches =
-- | if there is no branch to break or if seaLvl level > 1 then end
if (thr >= 1) || ((not . or) $ map snd branches)
then branches
else
-- | break all the possible branches at the current seaLvl level
let branches' = breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches')
in seaLevelMatching proximity beta minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches'
constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo
constanteTemporalMatching start step phylo = updatePhyloGroups 1
(fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
phylo
where
-- | 2) process the temporal matching by elevating seaLvl level
branches :: [[PhyloGroup]]
branches = map fst
$ seaLevelMatching (phyloProximity $ getConfig phylo)
(_qua_granularity $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo)
(phylo ^. phylo_termFreq)
start step
((((1 - start) / step) - 1))
(((1 - start) / step))
(getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo)
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
groups
-- | 1) for each group process an initial temporal Matching
-- | here we suppose that all the groups of level 1 are part of the same big branch
groups :: [([PhyloGroup],Bool)]
groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
$ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (phyloProximity $ getConfig phylo)
start
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
(traceTemporalMatching $ getGroupsFromLevel 1 phylo)
--------------------------------------
-- | Adaptative Temporal Matching | --
--------------------------------------
thrToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
thrToMeta thr branches =
map (\b ->
map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
depthToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
depthToMeta depth branches =
let break = length branches > 1
in map (\b ->
map (\g ->
if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [depth]) "breaks"(g ^. phylo_groupMeta))
else g) b) branches
reduceTupleMapByKeys :: Eq a => [a] -> Map (a,a) Double -> Map (a,a) Double
reduceTupleMapByKeys ks m = filterWithKey (\(k,k') _ -> (elem k ks) && (elem k' ks)) m
getInTupleMap :: Ord a => Map (a,a) Double -> a -> a -> Double
getInTupleMap m k k'
| isJust (m !? ( k ,k')) = m ! ( k ,k')
| isJust (m !? ( k',k )) = m ! ( k',k )
| otherwise = 0
toThreshold :: Double -> Map (PhyloGroupId,PhyloGroupId) Double -> Double
toThreshold lvl proxiGroups =
let idx = ((Map.size proxiGroups) `div` (floor lvl)) - 1
in if idx >= 0
then (sort $ elems proxiGroups) !! idx
else 1
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
adaptativeBreakBranches :: Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double
-> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc
-> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
-> [([PhyloGroup],(Bool,[Double]))]
adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods done ego rest =
-- | 1) keep or not the new division of ego
let done' = done ++ (if (fst . snd) ego
then (if ((null (fst ego')) || (quality > quality'))
then
[(concat $ thrToMeta thr $ [fst ego],(False, ((snd . snd) ego)))]
else
( (map (\e -> (e,(True, ((snd . snd) ego) ++ [thr]))) (fst ego'))
++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego'))))
else [(concat $ thrToMeta thr $ [fst ego], snd ego)])
in
-- | uncomment let .. in for debugging
-- let part1 = partition (snd) done'
-- part2 = partition (snd) rest
-- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
-- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
-- ) $
-- | 2) if there is no more branches in rest then return else continue
if null rest
then done'
else adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
where where
-- | 2) for each of the possible next branches process the phyloQuality score --------------------------------------
thr :: Double
thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi
--------------------------------------
quality :: Double
quality = toPhyloQuality' beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
--------------------------------------
ego' :: ([[PhyloGroup]],[[PhyloGroup]])
ego' =
let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups frame periods proxiConf thr docs coocs (fst ego)
branches' = branches `using` parList rdeepseq
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) > minBranch)
$ thrToMeta thr
$ depthToMeta (elevation - depth) branches'
--------------------------------------
quality' :: Double quality' :: Double
quality' = toPhyloQuality beta frequency border oldAcc ((fst branches') ++ (snd branches')) quality' = toPhyloQuality' beta frequency
-- | 1) for each local branch process a temporal matching then find the resulting branches ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
branches' :: ([[PhyloGroup]],[[PhyloGroup]])
branches' =
let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) adaptativeSeaLevelMatching :: Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double
$ phyloBranchMatching frame periods proximity egoThr docs groups -> Double -> Int -> Map Int Double
in partition (\b -> length b >= minBranch) (branches `using` parList rdeepseq) -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc
-> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
adaptativeSeaLevelMatching proxiConf depth elevation groupsProxi beta minBranch frequency frame periods docs coocs branches =
temporalMatching :: Phylo -> Phylo -- | if there is no branch to break or if seaLvl level >= depth then end
temporalMatching phylo = updatePhyloGroups 1 branches' phylo if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
then branches
else
-- | break all the possible branches at the current seaLvl level
let branches' = adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches')
groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi
-- thr = toThreshold depth groupsProxi
in trace("\n " <> foldl (\acc _ -> acc <> "🌊 ") "" [0..(elevation - depth)]
<> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches')
<> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]"
<> " thr = ")
$ adaptativeSeaLevelMatching proxiConf (depth - 1) elevation groupsProxi' beta minBranch frequency' frame periods docs coocs branches'
adaptativeTemporalMatching :: Double -> Phylo -> Phylo
adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
(fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
phylo
where where
-- | 5) apply the recursive matching -- | 2) process the temporal matching by elevating seaLvl level
branches' :: Map PhyloGroupId PhyloGroup branches :: [[PhyloGroup]]
branches' = branches = map fst
let next = trace (" ✓ F(β) = " <> show(quality) $ adaptativeSeaLevelMatching (phyloProximity $ getConfig phylo)
<> " |✓ " <> show(length $ fst branches) <> show(map length $ fst branches) (elevation - 1)
<> " |✗ " <> show(length $ snd branches) <> "[" <> show(length $ concat $ snd branches) <> "]") elevation
$ map (\branch -> recursiveMatching (phyloProximity $ getConfig phylo) (phylo ^. phylo_groupsProxi)
(_qua_relevance $ phyloQuality $ getConfig phylo) (_qua_granularity $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo) (_qua_minBranch $ phyloQuality $ getConfig phylo)
(reduceFrequency frequency (fst branches)) (phylo ^. phylo_termFreq)
( (getThresholdInit $ phyloProximity $ getConfig phylo) (getTimeFrame $ timeUnit $ getConfig phylo)
+ (getThresholdStep $ phyloProximity $ getConfig phylo)) (getPeriodIds phylo)
(getTimeFrame $ timeUnit $ getConfig phylo) (phylo ^. phylo_timeDocs)
(getPeriodIds phylo) (phylo ^. phylo_timeCooc)
(phylo ^. phylo_timeDocs) quality (alterBorder 0 (fst branches) branch) groups
(toBorderAccuracy frequency (delete branch ((fst branches) ++ (snd branches))))
branch
) (fst branches)
in fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd (concat (next ++ (snd branches)))
-- | 4) process the quality score
quality :: Double
quality = toPhyloQuality (_qua_relevance $ phyloQuality $ getConfig phylo) frequency 0 0 ((fst branches) ++ (snd branches))
-- | 3) process the constants of the quality score
frequency :: Map Int Double
frequency =
let terms = ngramsInBranches ((fst branches) ++ (snd branches))
freqs = map (\t -> termFreq' t $ concat ((fst branches) ++ (snd branches))) terms
in fromList $ map (\(t,freq) -> (t,freq/(sum freqs))) $ zip terms freqs
-- | 2) group into branches
branches :: ([[PhyloGroup]],[[PhyloGroup]])
branches = partition (\b -> length b >= (_qua_minBranch $ phyloQuality $ getConfig phylo))
$ groupsToBranches $ fromList $ map (\group -> (getGroupId group, group)) groups'
-- | 1) for each group process an initial temporal Matching -- | 1) for each group process an initial temporal Matching
groups' :: [PhyloGroup] -- | here we suppose that all the groups of level 1 are part of the same big branch
groups' = phyloBranchMatching (getTimeFrame $ timeUnit $ getConfig phylo) (getPeriodIds phylo) groups :: [([PhyloGroup],(Bool,[Double]))]
(phyloProximity $ getConfig phylo) (getThresholdInit $ phyloProximity $ getConfig phylo) groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr])))
(phylo ^. phylo_timeDocs) $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
(traceTemporalMatching $ getGroupsFromLevel 1 phylo) $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (phyloProximity $ getConfig phylo)
thr
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
(traceTemporalMatching $ getGroupsFromLevel 1 phylo)
--------------------------------------
thr :: Double
thr = toThreshold elevation (phylo ^. phylo_groupsProxi)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment