Commit e2b8b663 authored by qlobbe's avatar qlobbe

temporal is close to be ok, start export

parent 5d618e53
...@@ -44,6 +44,8 @@ import GHC.IO (FilePath) ...@@ -44,6 +44,8 @@ import GHC.IO (FilePath)
import Control.DeepSeq (NFData) import Control.DeepSeq (NFData)
import Control.Lens (makeLenses) import Control.Lens (makeLenses)
import qualified Data.Text.Lazy as TextLazy
---------------- ----------------
-- | Config | -- -- | Config | --
...@@ -102,7 +104,7 @@ defaultConfig = ...@@ -102,7 +104,7 @@ defaultConfig =
, corpusParser = Csv 1000 , corpusParser = Csv 1000
, phyloName = pack "Default Phylo" , phyloName = pack "Default Phylo"
, phyloLevel = 2 , phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 0 0.05 , phyloProximity = WeightedLogJaccard 10 0 0.2
, timeUnit = Year 3 1 5 , timeUnit = Year 3 1 5
, contextualUnit = Fis 2 4 , contextualUnit = Fis 2 4
, branchSize = 3 , branchSize = 3
...@@ -290,6 +292,12 @@ data PhyloFis = PhyloFis ...@@ -290,6 +292,12 @@ data PhyloFis = PhyloFis
} deriving (Generic,NFData,Show,Eq) } deriving (Generic,NFData,Show,Eq)
----------------
-- | Export | --
----------------
type DotId = TextLazy.Text
---------------- ----------------
-- | Lenses | -- -- | Lenses | --
---------------- ----------------
......
...@@ -29,6 +29,7 @@ import Gargantext.Text.Terms.Mono (monoTexts) ...@@ -29,6 +29,7 @@ import Gargantext.Text.Terms.Mono (monoTexts)
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.PhyloMaker import Gargantext.Viz.Phylo.PhyloMaker
import Gargantext.Viz.Phylo.TemporalMatching (temporalMatching)
import Control.Lens import Control.Lens
...@@ -40,7 +41,8 @@ import qualified Data.Vector as Vector ...@@ -40,7 +41,8 @@ import qualified Data.Vector as Vector
----------------------------------------------- -----------------------------------------------
phylo1 :: Phylo phylo1 :: Phylo
phylo1 = appendGroups fisToGroup 1 phyloFis phyloBase phylo1 = temporalMatching
$ appendGroups fisToGroup 1 phyloFis phyloBase
--------------------------------------------- ---------------------------------------------
......
...@@ -13,4 +13,55 @@ Portability : POSIX ...@@ -13,4 +13,55 @@ Portability : POSIX
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE MultiParamTypeClasses #-}
module Gargantext.Viz.Phylo.PhyloExport where module Gargantext.Viz.Phylo.PhyloExport where
\ No newline at end of file
import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools
import Data.GraphViz.Types.Generalised (DotGraph)
--------------------
-- | Dot export | --
--------------------
toDot :: [PhyloGroup] -> DotGraph DotId
toDot branches = undefined
----------------------
-- | post process | --
----------------------
processFilters :: [PhyloGroup] -> [PhyloGroup]
processFilters branches = branches
processSort :: [PhyloGroup] -> [PhyloGroup]
processSort branches = branches
processMetrics :: [PhyloGroup] -> [PhyloGroup]
processMetrics branches = branches
processDynamics :: [PhyloGroup] -> [PhyloGroup]
processDynamics branches = branches
processLabels :: [PhyloGroup] -> [PhyloGroup]
processLabels branches = branches
phyloPostProcess :: [PhyloGroup] -> [PhyloGroup]
phyloPostProcess branches = branches
---------------------
-- | phyloExport | --
---------------------
toPhyloExport :: Phylo -> DotGraph DotId
toPhyloExport phylo = toDot
$ phyloPostProcess groups
where
groups :: [PhyloGroup]
groups = getGroupsFromLevel (phyloLevel $ getConfig phylo) phylo
\ No newline at end of file
...@@ -23,6 +23,7 @@ import Data.Vector (Vector) ...@@ -23,6 +23,7 @@ import Data.Vector (Vector)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.TemporalMatching (temporalMatching)
import Gargantext.Text.Context (TermList) import Gargantext.Text.Context (TermList)
import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..)) import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
...@@ -86,12 +87,13 @@ fisToGroup fis pId lvl idx fdt coocs = ...@@ -86,12 +87,13 @@ fisToGroup fis pId lvl idx fdt coocs =
(fis ^. phyloFis_support) (fis ^. phyloFis_support)
ngrams ngrams
(ngramsToCooc ngrams coocs) (ngramsToCooc ngrams coocs)
(1,[]) (1,[0])
[] [] [] [] [] [] [] [] [] []
toPhylo1 :: [Document] -> Phylo -> Phylo toPhylo1 :: [Document] -> Phylo -> Phylo
toPhylo1 docs phyloBase = appendGroups fisToGroup 1 phyloFis phyloBase toPhylo1 docs phyloBase = temporalMatching
$ appendGroups fisToGroup 1 phyloFis phyloBase
where where
-------------------------------------- --------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis] phyloFis :: Map (Date,Date) [PhyloFis]
...@@ -181,7 +183,7 @@ docsToTimeScaleCooc docs fdt = ...@@ -181,7 +183,7 @@ docsToTimeScaleCooc docs fdt =
mCooc' = fromList mCooc' = fromList
$ map (\t -> (t,empty)) $ map (\t -> (t,empty))
$ toTimeScale (map date docs) 1 $ toTimeScale (map date docs) 1
in trace ("\n" <> "-- | Build the coocurency matrix for " <> show (length $ keys mCooc') <> " unit of time" <> "\n") in trace ("\n" <> "-- | Build the coocurency matrix for " <> show (length $ keys mCooc') <> " unit of time" <> "\n")
$ unionWith sumCooc mCooc mCooc' $ unionWith sumCooc mCooc mCooc'
......
...@@ -17,7 +17,7 @@ Portability : POSIX ...@@ -17,7 +17,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloTools where module Gargantext.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex) import Data.Vector (Vector, elemIndex)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub) import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init)
import Data.Set (Set, size) import Data.Set (Set, size)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey) import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey)
import Data.String (String) import Data.String (String)
...@@ -202,11 +202,11 @@ addPointers :: PhyloGroup -> Filiation -> PointerType -> [Pointer] -> PhyloGroup ...@@ -202,11 +202,11 @@ addPointers :: PhyloGroup -> Filiation -> PointerType -> [Pointer] -> PhyloGroup
addPointers group fil pty pointers = addPointers group fil pty pointers =
case pty of case pty of
TemporalPointer -> case fil of TemporalPointer -> case fil of
ToChilds -> group & phylo_groupPeriodChilds %~ (++ pointers) ToChilds -> group & phylo_groupPeriodChilds .~ pointers
ToParents -> group & phylo_groupPeriodParents %~ (++ pointers) ToParents -> group & phylo_groupPeriodParents .~ pointers
LevelPointer -> case fil of LevelPointer -> case fil of
ToChilds -> group & phylo_groupLevelChilds %~ (++ pointers) ToChilds -> group & phylo_groupLevelChilds .~ pointers
ToParents -> group & phylo_groupLevelParents %~ (++ pointers) ToParents -> group & phylo_groupLevelParents .~ pointers
getPeriodIds :: Phylo -> [(Date,Date)] getPeriodIds :: Phylo -> [(Date,Date)]
...@@ -284,4 +284,47 @@ getThresholdStep proxi = case proxi of ...@@ -284,4 +284,47 @@ getThresholdStep proxi = case proxi of
---------------- ----------------
ngramsInBranches :: [[PhyloGroup]] -> [Int] ngramsInBranches :: [[PhyloGroup]] -> [Int]
ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
\ No newline at end of file
traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]]
traceMatchSuccess thr qua qua' nextBranches =
trace ( "\n" <> "-- local branches : " <> (init $ show ((init . init . snd)
$ (head' "trace" $ head' "trace" $ head' "trace" nextBranches) ^. phylo_groupBranchId))
<> ",(1.." <> show (length nextBranches) <> ")]"
<> " | " <> show ((length . concat . concat) nextBranches) <> " groups" <> "\n"
<> " - splited with success in " <> show (map length nextBranches) <> " sub-branches" <> "\n"
<> " - for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " < " <> show(qua') <> ")\n" ) nextBranches
traceMatchFailure :: Double -> Double -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
traceMatchFailure thr qua qua' branches =
trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
<> ",(1.." <> show (length branches) <> ")]"
<> " | " <> show (length $ concat branches) <> " groups" <> "\n"
<> " - split with failure for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " > " <> show(qua') <> ")\n"
) branches
traceMatchNoSplit :: [[PhyloGroup]] -> [[PhyloGroup]]
traceMatchNoSplit branches =
trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
<> ",(1.." <> show (length branches) <> ")]"
<> " | " <> show (length $ concat branches) <> " groups" <> "\n"
<> " - unable to split in smaller branches" <> "\n"
) branches
traceMatchLimit :: [[PhyloGroup]] -> [[PhyloGroup]]
traceMatchLimit branches =
trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
<> ",(1.." <> show (length branches) <> ")]"
<> " | " <> show (length $ concat branches) <> " groups" <> "\n"
<> " - unable to increase the threshold above 1" <> "\n"
) branches
traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
traceMatchEnd groups =
trace ("\n" <> "-- | End of temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
<> " branches and " <> show (length groups) <> " groups" <> "\n") groups
\ No newline at end of file
...@@ -15,7 +15,7 @@ Portability : POSIX ...@@ -15,7 +15,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.TemporalMatching where module Gargantext.Viz.Phylo.TemporalMatching where
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, find, groupBy, scanl, nub, union) import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, find, groupBy, scanl, nub, union, elemIndex, (!!))
import Data.Map (Map, fromList, fromListWith, filterWithKey, elems, restrictKeys, unionWith, intersectionWith, findWithDefault) import Data.Map (Map, fromList, fromListWith, filterWithKey, elems, restrictKeys, unionWith, intersectionWith, findWithDefault)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -23,11 +23,13 @@ import Gargantext.Viz.AdaptativePhylo ...@@ -23,11 +23,13 @@ import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.SynchronicClustering import Gargantext.Viz.Phylo.SynchronicClustering
import Debug.Trace (trace)
import Prelude (logBase) import Prelude (logBase)
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import qualified Data.Set as Set import qualified Data.Set as Set
------------------- -------------------
-- | Proximity | -- -- | Proximity | --
------------------- -------------------
...@@ -204,7 +206,9 @@ entropy branches = ...@@ -204,7 +206,9 @@ entropy branches =
/ (sum $ map (\branch -> 1 / log (termFreq term [branch])) branches) / (sum $ map (\branch -> 1 / log (termFreq term [branch])) branches)
* (sum $ map (\branch -> * (sum $ map (\branch ->
let q = branchObs term (length $ concat branches) branch let q = branchObs term (length $ concat branches) branch
in q * logBase 2 q ) branches) ) terms in if (q == 0)
then 0
else - q * logBase 2 q ) branches) ) terms
where where
-- | Probability to observe a branch given a random term of the phylo -- | Probability to observe a branch given a random term of the phylo
branchObs :: Int -> Int -> [PhyloGroup] -> Double branchObs :: Int -> Int -> [PhyloGroup] -> Double
...@@ -213,10 +217,21 @@ entropy branches = ...@@ -213,10 +217,21 @@ entropy branches =
homogeneity :: [[PhyloGroup]] -> Double homogeneity :: [[PhyloGroup]] -> Double
homogeneity _ = undefined homogeneity branches =
-- where let nbGroups = length $ concat branches
-- branchCov :: [PhyloGroup] -> Int -> Double in sum
-- branchCov branch total = (fromIntegral $ length branch) / (fromIntegral total) $ map (\branch -> (if (length branch == nbGroups)
then 1
else (1 / log (branchCov branch nbGroups))
/ (sum $ map (\branch' -> 1 / log (branchCov branch' nbGroups)) branches))
* (sum $ map (\term -> (termFreq term branches)
/ (sum $ map (\term' -> termFreq term' branches) $ ngramsInBranches [branch])
* (fromIntegral $ sum $ ngramsInBranches [filter (\g -> elem term $ g ^. phylo_groupNgrams) branch])
/ (fromIntegral $ sum $ ngramsInBranches [branch])
) $ ngramsInBranches [branch]) ) branches
where
branchCov :: [PhyloGroup] -> Int -> Double
branchCov branch total = (fromIntegral $ length branch) / (fromIntegral total)
toPhyloQuality :: [[PhyloGroup]] -> Double toPhyloQuality :: [[PhyloGroup]] -> Double
...@@ -243,42 +258,57 @@ groupsToBranches groups = ...@@ -243,42 +258,57 @@ groupsToBranches groups =
) graph ) graph
recursiveMatching :: Proximity -> Double -> Int -> [PhyloPeriodId] -> Map Date Double -> Double -> [PhyloGroup] -> [PhyloGroup] recursiveMatching :: Proximity -> Double -> Int -> [PhyloPeriodId] -> Map Date Double -> Double -> [[PhyloGroup]] -> [PhyloGroup]
recursiveMatching proximity thr max' periods docs quality groups = recursiveMatching proximity thr frame periods docs quality branches =
case quality < quality' of if (length branches == (length $ concat branches))
-- | success : we localy improve the quality of the branch, let's go deeper then concat $ traceMatchNoSplit branches
True -> concat else if thr > 1
$ map (\branch -> then concat $ traceMatchLimit branches
recursiveMatching proximity (thr + (getThresholdStep proximity)) max' periods docs quality' branch else
) branches case quality <= (sum nextQualities) of
-- | failure : last step was the local maximum, let's validate it -- | success : the new threshold improves the quality score, let's go deeper
False -> groups True -> concat
$ map (\branches' ->
let idx = fromJust $ elemIndex branches' nextBranches
in recursiveMatching proximity (thr + (getThresholdStep proximity)) frame periods docs (nextQualities !! idx) branches')
$ traceMatchSuccess thr quality (sum nextQualities) nextBranches
-- | failure : last step was a local maximum of quality, let's validate it
False -> concat $ traceMatchFailure thr quality (sum nextQualities) branches
where where
-- | 3) process a quality score on the local set of branches -- | 2) for each of the possible next branches process the phyloQuality score
quality' :: Double nextQualities :: [Double]
quality' = toPhyloQuality branches nextQualities = map toPhyloQuality nextBranches
-- | 2) group the new groups into branches -- | 1) for each local branch process a temporal matching then find the resulting branches
branches :: [[PhyloGroup]] nextBranches :: [[[PhyloGroup]]]
branches = groupsToBranches $ fromList $ map (\group -> (getGroupId group, group)) groups' nextBranches = map (\branch ->
-- | 1) process a temporal matching for each group let branch' = processMatching frame periods proximity thr docs branch
groups' :: [PhyloGroup] in groupsToBranches $ fromList $ map (\group -> (getGroupId group, group)) branch'
groups' = processMatching max' periods proximity thr docs groups ) branches
temporalMatching :: Phylo -> Phylo temporalMatching :: Phylo -> Phylo
temporalMatching phylo = updatePhyloGroups 1 branches phylo temporalMatching phylo = updatePhyloGroups 1 branches' phylo
where where
-- | 2) run the recursive matching to find the best repartition among branches -- | 4) run the recursive matching to find the best repartition among branches
branches :: Map PhyloGroupId PhyloGroup branches' :: Map PhyloGroupId PhyloGroup
branches = fromList branches' = fromList
$ map (\g -> (getGroupId g, g)) $ map (\g -> (getGroupId g, g))
$ traceMatchEnd
$ recursiveMatching (phyloProximity $ getConfig phylo) $ recursiveMatching (phyloProximity $ getConfig phylo)
(getThresholdInit $ phyloProximity $ getConfig phylo) ( (getThresholdInit $ phyloProximity $ getConfig phylo)
+ (getThresholdStep $ phyloProximity $ getConfig phylo))
(getTimeFrame $ timeUnit $ getConfig phylo) (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (getPeriodIds phylo)
(phylo ^. phylo_timeDocs) (toPhyloQuality [groups']) groups' (phylo ^. phylo_timeDocs) quality branches
-- | 3) process the quality score
quality :: Double
quality = toPhyloQuality branches
-- | 2) group into branches
branches :: [[PhyloGroup]]
branches = groupsToBranches $ fromList $ map (\group -> (getGroupId group, group))
$ trace ("\n" <> "-- | Init temporal matching for " <> show (length $ groups') <> " groups" <> "\n") groups'
-- | 1) for each group process an initial temporal Matching -- | 1) for each group process an initial temporal Matching
groups' :: [PhyloGroup] groups' :: [PhyloGroup]
groups' = processMatching (getTimeFrame $ timeUnit $ getConfig phylo) (getPeriodIds phylo) groups' = processMatching (getTimeFrame $ timeUnit $ getConfig phylo) (getPeriodIds phylo)
(phyloProximity $ getConfig phylo) (getThresholdInit $ phyloProximity $ getConfig phylo) (phyloProximity $ getConfig phylo) (getThresholdInit $ phyloProximity $ getConfig phylo)
(phylo ^. phylo_timeDocs) (getGroupsFromLevel 1 phylo) (phylo ^. phylo_timeDocs) (getGroupsFromLevel 1 phylo)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment