Commit 56636731 authored by qlobbe's avatar qlobbe

working on perf

parent 5a8e884b
Pipeline #575 failed with stage
......@@ -160,8 +160,6 @@ main = do
let dot = toPhyloExport phylo
printIOMsg "End of export to dot"
let output = (outputPath config)
<> (unpack $ phyloName config)
<> "_V2.dot"
......
......@@ -69,7 +69,8 @@ data Proximity =
data Synchrony =
ByProximityThreshold
{ _bpt_threshold :: Double }
{ _bpt_threshold :: Double
, _bpt_sensibility :: Double}
| ByProximityDistribution
deriving (Show,Generic,Eq)
......@@ -115,7 +116,7 @@ defaultConfig =
, phyloName = pack "Default Phylo"
, phyloLevel = 1
, phyloProximity = WeightedLogJaccard 10 0 0.1
, phyloSynchrony = ByProximityThreshold 0.1
, phyloSynchrony = ByProximityThreshold 0.4 0
, timeUnit = Year 3 1 5
, contextualUnit = Fis 2 4
, exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
......
......@@ -17,14 +17,16 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, intersect, (\\))
import Data.Set (Set, size)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition)
import Data.Set (Set, size, disjoint)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty)
import Data.String (String)
import Data.Text (Text, unwords)
import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo
import Text.Printf
import Debug.Trace (trace)
import Control.Lens hiding (Level)
......@@ -57,6 +59,10 @@ printIOComment cmt =
--------------
roundToStr :: (PrintfArg a, Floating a) => Int -> a -> String
roundToStr = printf "%0.*f"
countSup :: Double -> [Double] -> Int
countSup s l = length $ filter (>s) l
......@@ -231,6 +237,30 @@ ngramsToCooc ngrams coocs =
getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex)
groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup]
groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups
getPeriodPointers :: Filiation -> PhyloGroup -> [Pointer]
getPeriodPointers fil group =
case fil of
ToChilds -> group ^. phylo_groupPeriodChilds
ToParents -> group ^. phylo_groupPeriodParents
filterProximity :: Proximity -> Double -> Double -> Bool
filterProximity proximity thr local =
case proximity of
WeightedLogJaccard _ _ _ -> local >= thr
Hamming -> undefined
filterPointers :: Filiation -> PointerType -> Proximity -> Double -> PhyloGroup -> PhyloGroup
filterPointers fil pty proximity thr group =
case pty of
TemporalPointer -> case fil of
ToChilds -> group & phylo_groupPeriodChilds %~ (filter (\(_,w) -> filterProximity proximity thr w))
ToParents -> group & phylo_groupPeriodParents %~ (filter (\(_,w) -> filterProximity proximity thr w))
LevelPointer -> undefined
---------------
-- | Phylo | --
---------------
......@@ -315,28 +345,25 @@ traceToPhylo lvl phylo =
-- | Clustering | --
--------------------
relatedComponents :: Eq a => [[a]] -> [[a]]
relatedComponents graphs = foldl' (\mem groups ->
if (null mem)
then mem ++ [groups]
else
let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
in if (null related)
then mem ++ [groups]
else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
relatedComponents :: Ord a => [[a]] -> [[a]]
relatedComponents graph = foldl' (\acc groups ->
if (null acc)
then acc ++ [groups]
else
let acc' = partition (\groups' -> disjoint (Set.fromList groups') (Set.fromList groups)) acc
in (fst acc') ++ [nub $ concat $ (snd acc') ++ [groups]]) [] graph
traceSynchronyEnd :: Phylo -> Phylo
traceSynchronyEnd phylo =
trace ( "\n" <> "-- | End of synchronic clustering for level " <> show (getLastLevel phylo)
trace ( "\n" <> "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo
traceSynchronyStart :: Phylo -> Phylo
traceSynchronyStart phylo =
trace ( "\n" <> "-- | Start of synchronic clustering for level " <> show (getLastLevel phylo)
trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo
......@@ -362,6 +389,15 @@ getThresholdStep proxi = case proxi of
Hamming -> undefined
traceBranchMatching :: Proximity -> Double -> [PhyloGroup] -> [PhyloGroup]
traceBranchMatching proxi thr groups = case proxi of
WeightedLogJaccard _ i s -> trace (
roundToStr 2 thr <> " "
<> foldl (\acc _ -> acc <> ".") "." [(10*i),(10*i + 10*s)..(10*thr)]
<> " " <> show(length groups) <> " groups"
) groups
Hamming -> undefined
----------------
-- | Branch | --
----------------
......@@ -420,5 +456,10 @@ traceMatchLimit branches =
traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
traceMatchEnd groups =
trace ("\n" <> "-- | End of temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
<> " branches and " <> show (length groups) <> " groups" <> "\n") groups
\ No newline at end of file
trace ("\n" <> "-- | End temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
<> " branches and " <> show (length groups) <> " groups" <> "\n") groups
traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup]
traceTemporalMatching groups =
trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups
\ No newline at end of file
......@@ -24,7 +24,7 @@ import Data.List ((++), null, intersect, nub, concat, sort)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
import Control.Lens hiding (Level)
-- import Debug.Trace (trace)
import Control.Parallel.Strategies (parList, rdeepseq, using)
-------------------------
......@@ -92,31 +92,34 @@ toPairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
toPairs groups = filter (\(g,g') -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))
$ listToCombi' groups
groupsToEdges :: Proximity -> Double -> Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
groupsToEdges prox thr docs groups =
groupsToEdges :: Proximity -> Double -> Double -> Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
groupsToEdges prox thr sens docs groups =
case prox of
WeightedLogJaccard sens _ _ -> filter (\(_,w) -> w >= thr)
$ map (\(g,g') -> ((g,g'), weightedLogJaccard sens docs (g ^. phylo_groupCooc) (g' ^. phylo_groupCooc) (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)))
$ toPairs groups
WeightedLogJaccard _ _ _ -> filter (\(_,w) -> w >= thr)
$ map (\(g,g') -> ((g,g'), weightedLogJaccard sens docs (g ^. phylo_groupCooc) (g' ^. phylo_groupCooc) (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)))
$ toPairs groups
_ -> undefined
toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]]
toRelatedComponents nodes edges = relatedComponents $ ((map (\((g,g'),_) -> [g,g']) edges) ++ (map (\g -> [g]) nodes))
toRelatedComponents nodes edges =
let ref = fromList $ map (\g -> (getGroupId g, g)) nodes
clusters = relatedComponents $ ((map (\((g,g'),_) -> [getGroupId g, getGroupId g']) edges) ++ (map (\g -> [getGroupId g]) nodes))
in map (\cluster -> map (\gId -> ref ! gId) cluster) clusters
toParentId :: PhyloGroup -> PhyloGroupId
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex)
reduceBranch :: Proximity -> Double -> Map Date Double -> [PhyloGroup] -> [PhyloGroup]
reduceBranch prox thr docs branch =
reduceBranch :: Proximity -> Double -> Double -> Map Date Double -> [PhyloGroup] -> [PhyloGroup]
reduceBranch prox thr sens docs branch =
-- | 1) reduce a branch as a set of periods & groups
let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems)
$ mapWithKey (\prd groups ->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
let edges = groupsToEdges prox thr ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) groups
let edges = groupsToEdges prox thr sens ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) groups
in map (\comp ->
-- | 4) add to each groups their futur level parent group
let parentId = toParentId (head' "parentId" comp)
......@@ -128,9 +131,12 @@ reduceBranch prox thr docs branch =
synchronicClustering :: Phylo -> Phylo
synchronicClustering phylo =
case (phyloSynchrony $ getConfig phylo) of
ByProximityThreshold thr -> toNextLevel phylo
$ concat
$ map (\branch -> reduceBranch (phyloProximity $ getConfig phylo) thr (phylo ^. phylo_timeDocs) branch)
$ phyloToLastBranches
$ traceSynchronyStart phylo
ByProximityThreshold t s ->
let prox = phyloProximity $ getConfig phylo
docs = phylo ^. phylo_timeDocs
branches = map (\branch -> reduceBranch prox t s docs branch)
$ phyloToLastBranches
$ traceSynchronyStart phylo
branches' = branches `using` parList rdeepseq
in toNextLevel phylo $ concat branches'
ByProximityDistribution -> undefined
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment