Commit c60f86f9 authored by Alp Mestanogullari's avatar Alp Mestanogullari

bug fixes in confluence input re-indexing

parent 642b9ec7
...@@ -50,21 +50,21 @@ default-extensions: ...@@ -50,21 +50,21 @@ default-extensions:
library: library:
source-dirs: src source-dirs: src
executables: # executables:
gargantext-graph-exe: # gargantext-graph-exe:
main: Main.hs # main: Main.hs
source-dirs: app # source-dirs: app
ghc-options: # ghc-options:
- -O2 # - -O2
- -threaded # - -threaded
- -rtsopts # - -rtsopts
- -with-rtsopts=-N # - -with-rtsopts=-N
- -fprof-auto # - -fprof-auto
- -Wmissing-signatures # - -Wmissing-signatures
- -Wcompat # - -Wcompat
dependencies: # dependencies:
- gargantext-graph # - gargantext-graph
- criterion # - criterion
tests: tests:
gargantext-graph-test: gargantext-graph-test:
......
...@@ -37,6 +37,7 @@ import Data.IntMap (IntMap) ...@@ -37,6 +37,7 @@ import Data.IntMap (IntMap)
import Data.Maybe (isJust, fromJust) import Data.Maybe (isJust, fromJust)
import Data.Proxy (Proxy(Proxy)) import Data.Proxy (Proxy(Proxy))
import Data.Reflection import Data.Reflection
import Data.Semigroup
import GHC.TypeLits (KnownNat, Nat, SomeNat(SomeNat), type(+), natVal, sameNat, someNatVal) import GHC.TypeLits (KnownNat, Nat, SomeNat(SomeNat), type(+), natVal, sameNat, someNatVal)
import Graph.FGL import Graph.FGL
import Graph.Types import Graph.Types
...@@ -65,10 +66,26 @@ import qualified Data.Map.Strict as Map ...@@ -65,10 +66,26 @@ import qualified Data.Map.Strict as Map
---------------------------------------------------------------- ----------------------------------------------------------------
traceMaxIndices :: forall a t. (Bounded t, Ord t, Show t) => String -> [t] -> a -> a
traceMaxIndices f xs a = trace s a
where s = "[" ++ f ++ "] (min, max, # of ints) = " ++ show (min_i, max_i, Set.size is)
(min_i, max_i, is) = foldl'
(\(s, b, ints) i -> (min s i, max b i, Set.insert i ints))
(maxBound :: t, minBound :: t, Set.empty)
xs
traceAdjMapIndices :: String -> Map (Int, Int) x -> a -> a
traceAdjMapIndices f m a = traceMaxIndices f (foldMap (\(a, b) -> [a, b]) (Map.keys m)) a
traceDicoIndices :: (Ord x, Show x, Bounded x) => String -> IntMap x -> a -> a
traceDicoIndices s m a = traceMaxIndices (s ++ " dico keys => ") (Dict.keys m) $
traceMaxIndices (s ++ " dico vals => ") (Dict.elems m) a
defaultClustering :: Map (Int, Int) Double -> [ClusterNode] defaultClustering :: Map (Int, Int) Double -> [ClusterNode]
defaultClustering adjmap = withG g $ \fg -> defaultClustering adjmap = withG g $ \fg ->
case clusteringOptim len fg beta gc of case clusteringOptim len fg dicoToId beta gc of
Clust _ dico idx _ -> map (lkpId dico) (Dict.toList idx) Clust _ idx _ -> map go (Dict.toList idx)
where gc = False where gc = False
beta = 0.0 beta = 0.0
...@@ -76,30 +93,34 @@ defaultClustering adjmap = withG g $ \fg -> ...@@ -76,30 +93,34 @@ defaultClustering adjmap = withG g $ \fg ->
g = DGI.mkGraph ns es g = DGI.mkGraph ns es
ns = zip [0..] . Set.toList . Set.fromList $ ns = zip [0..] . Set.toList . Set.fromList $
concatMap (\(a, b) -> [a, b]) $ Map.keys adjmap concatMap (\(a, b) -> [a, b]) $ Map.keys adjmap
es = map (\((a, b), w) -> (a, b, w)) $ Map.toList adjmap lkpId n = dicoToId Dict.! n
lkpId dict (i, clust) = ClusterNode lkpLbl n = dicoToLbl Dict.! n
(fromJust (Dict.lookup i dict)) dicoToId = Dict.fromList (map (\(a, b) -> (b, a)) ns)
dicoToLbl = Dict.fromList ns
es = map (\((a, b), w) -> (lkpId a, lkpId b, w)) $ Map.toList adjmap
go (i, clust) = ClusterNode
(lkpLbl i)
clust clust
{-# INLINE clusteringOptim #-} {-# INLINE clusteringOptim #-}
clusteringOptim :: forall n a b. KnownNat n clusteringOptim :: forall n a b. (KnownNat n, Ord a, Show a, Bounded a)
=> Length -- ^ length of the random walks => Length -- ^ length of the random walks
-> FiniteGraph n a b -- ^ graph to compute clusters for -> FiniteGraph n a b -- ^ graph to compute clusters for
-> Dict a
-> Double -- ^ beta -> Double -- ^ beta
-> Bool -- ^ True = run GC, False = don't -> Bool -- ^ True = run GC, False = don't
-> Clust a -> Clust a
clusteringOptim l fg@(FiniteGraph g) beta gc = trace ("clusteringOptim" :: String) $ clusteringOptim l fg@(FiniteGraph g) dico beta gc =
case runClustering gc beta adj prox sorted_edges of case runClustering gc beta adj prox sorted_edges of
(clusts, d) -> Clust clusts dico (index clusts) d (clusts, d) -> Clust clusts (index clusts) d
where where
dico = trace ("dico" :: String) $ Dict.fromList (DGI.labNodes g) index clusts = Dict.foldMapWithKey
index clusts = trace ("index" :: String) $ Dict.foldMapWithKey
(\clustN is -> Dict.fromList $ map (,clustN) (IntSet.toList is)) (\clustN is -> Dict.fromList $ map (,clustN) (IntSet.toList is))
clusts clusts
!adj = graphMatrix fg True adj = graphMatrix fg True
!tra = transition adj tra = transition adj
!prox = proxemie l tra prox = proxemie l tra
sorted_edges = trace ("confluence" :: String) $ sort_edges (natToInt @n) (edges_confluence l fg adj tra) sorted_edges = sort_edges (natToInt @n) (edges_confluence l fg adj tra)
graphMatrix graphMatrix
:: forall (n :: Nat) a b. :: forall (n :: Nat) a b.
...@@ -107,17 +128,16 @@ graphMatrix ...@@ -107,17 +128,16 @@ graphMatrix
=> FiniteGraph n a b -> Bool -> SMatrix.Matrix n n Double => FiniteGraph n a b -> Bool -> SMatrix.Matrix n n Double
graphMatrix (FiniteGraph g) reflexive = adj graphMatrix (FiniteGraph g) reflexive = adj
where where
!adj = trace ("adjacency" :: String) $ SMatrix.fromList es adj = SMatrix.fromList es
es = diag ++ triplets es = diag ++ triplets
triplets = [ (i, j, 1.0) | i <- nodes g, j <- neighbors g i ] triplets = [ (i, j, 1.0) | i <- nodes g, j <- neighbors g i ]
diag = if reflexive diag = if reflexive
then [ (i, i, 1.0) | i <- [0..(n-1)] ] then [ (i, i, 1.0) | i <- nodes g ]
else [] else []
n = fromIntegral $ natVal (Proxy :: Proxy n)
transition transition
:: KnownNat n => SMatrix.Matrix n n Double -> SMatrix.Matrix n n Double :: KnownNat n => SMatrix.Matrix n n Double -> SMatrix.Matrix n n Double
transition m = trace ("transition" :: String) $ SMatrix.imap transition m = SMatrix.imap
(\i j _ -> 1 / fromIntegral (SMatrix.nnzCol m j)) (\i j _ -> 1 / fromIntegral (SMatrix.nnzCol m j))
m m
...@@ -156,10 +176,9 @@ proxemie :: KnownNat n ...@@ -156,10 +176,9 @@ proxemie :: KnownNat n
=> Length => Length
-> SMatrix.Matrix n n Double -> SMatrix.Matrix n n Double
-> ProxemyMatrix n -> ProxemyMatrix n
proxemie l !tm = trace ("proxemie" :: String) $ case l <= 1 of proxemie l !tm = case l <= 1 of
True -> tm True -> tm
False -> case iterate (SMatrix.mul tm) tm Prelude.!! (l-1) of False -> iterate (SMatrix.mul tm) tm Prelude.!! (l-1)
!p -> trace ("proxemie ok" :: String) p
--------------------------------------------------------------- ---------------------------------------------------------------
matconf :: forall n. KnownNat n matconf :: forall n. KnownNat n
...@@ -224,17 +243,21 @@ computeConfluences ...@@ -224,17 +243,21 @@ computeConfluences
-> Map (Int, Int) Double -> Map (Int, Int) Double
computeConfluences l edges reflexive = reifyNat (fromIntegral maxNode + 1) $ \(Proxy :: Proxy n) -> computeConfluences l edges reflexive = reifyNat (fromIntegral maxNode + 1) $ \(Proxy :: Proxy n) ->
let let
nodeLabels = Set.toList $ Set.fromList $ foldMap (\(a, b) -> [a, b]) edges
dictLabels = Dict.fromList (zip [0..] nodeLabels)
dictIDs = Dict.fromList (zip nodeLabels [0..])
edges' = map (\(a, b) -> (dictIDs Dict.! a, dictIDs Dict.! b)) edges
xs :: [(Int, Int, Double)] xs :: [(Int, Int, Double)]
xs = xs =
concatMap (\(i, j) -> [(i, j, 1.0), (j, i, 1.0)]) edges ++ concatMap (\(i, j) -> [(i, j, 1.0), (j, i, 1.0)]) edges' ++
(if reflexive (if reflexive
then [ (i, i, 1.0) | i <- [0..maxNode] ] then [ (i, i, 1.0) | i <- [0..(Dict.size dictLabels - 1)] ]
else [] else []
) )
am :: SMatrix.Matrix n n Double am :: SMatrix.Matrix n n Double
!am = SMatrix.fromList xs am = SMatrix.fromList xs
!tm = transition am tm = transition am
!sumdeg_m2 = fromIntegral (SMatrix.nonZeros am - 2) sumdeg_m2 = fromIntegral (SMatrix.nonZeros am - 2)
go x y = go x y =
let let
!deg_x_m1 = fromIntegral (SMatrix.nnzCol am x - 1) !deg_x_m1 = fromIntegral (SMatrix.nnzCol am x - 1)
...@@ -246,17 +269,19 @@ computeConfluences l edges reflexive = reifyNat (fromIntegral maxNode + 1) $ \(P ...@@ -246,17 +269,19 @@ computeConfluences l edges reflexive = reifyNat (fromIntegral maxNode + 1) $ \(P
iterate (SMatrix.mul tm'') v Prelude.!! l iterate (SMatrix.mul tm'') v Prelude.!! l
prox_y_x_length = SMatrix.extractCol v' 0 SVector.! x prox_y_x_length = SMatrix.extractCol v' 0 SVector.! x
prox_y_x_infini = if sumdeg_m2 == 0 then 0 else deg_x_m1 / sumdeg_m2 prox_y_x_infini = if sumdeg_m2 == 0 then 0 else deg_x_m1 / sumdeg_m2
denominator = (prox_y_x_length + prox_y_x_infini) denominator = (prox_y_x_length + prox_y_x_infini)
in in
if denominator == 0 if denominator == 0
then 0 then 0
else (prox_y_x_length - prox_y_x_infini) / denominator else (prox_y_x_length - prox_y_x_infini) / denominator
in in
Map.fromList $ map (\(a, b) -> ((a, b), go a b)) edges Map.fromList $ map
(\(a, b) -> ( (a, b)
, go (dictIDs Dict.! a) (dictIDs Dict.! b)
)
) edges
where maxNode = maximum $ map (\(i, j) -> max i j) edges where maxNode = getMax $ foldMap (\(i, j) -> Max (max i j)) edges
minNode = minimum $ map (\(i, j) -> min i j) edges
edges_confluence :: forall n a b. edges_confluence :: forall n a b.
KnownNat n KnownNat n
...@@ -265,7 +290,7 @@ edges_confluence :: forall n a b. ...@@ -265,7 +290,7 @@ edges_confluence :: forall n a b.
-> SMatrix.Matrix n n Double -- adjacency -> SMatrix.Matrix n n Double -- adjacency
-> SMatrix.Matrix n n Double -- transition -> SMatrix.Matrix n n Double -- transition
-> UnsortedEdges -> UnsortedEdges
edges_confluence l (FiniteGraph g) am tm = trace ("confluence" :: String) $ map f (edges g) edges_confluence l (FiniteGraph g) am tm = map f (edges g)
where where
vcount = natToInt @n vcount = natToInt @n
...@@ -287,7 +312,7 @@ edges_confluence l (FiniteGraph g) am tm = trace ("confluence" :: String) $ map ...@@ -287,7 +312,7 @@ edges_confluence l (FiniteGraph g) am tm = trace ("confluence" :: String) $ map
sort_edges :: Int sort_edges :: Int
-> UnsortedEdges -> UnsortedEdges
-> SortedEdges -> SortedEdges
sort_edges n = trace ("sort_edges" :: String) . List.sortBy (\a b -> confCompare a b <> comparing xnpy a b) sort_edges n = List.sortBy (\a b -> confCompare a b <> comparing xnpy a b)
where where
third third
:: forall a b c :: forall a b c
...@@ -332,7 +357,7 @@ data MClustering s = ...@@ -332,7 +357,7 @@ data MClustering s =
} }
newMClustering :: Int -> ST s (MClustering s) newMClustering :: Int -> ST s (MClustering s)
newMClustering n = trace ("newClustering" :: String) $ do newMClustering n = do
mps <- MV.unsafeNew n mps <- MV.unsafeNew n
mis <- MVU.unsafeNew n mis <- MVU.unsafeNew n
msc <- MVU.unsafeNew 1 msc <- MVU.unsafeNew 1
...@@ -466,7 +491,6 @@ clusteringCollector beta adj prox mclust = do ...@@ -466,7 +491,6 @@ clusteringCollector beta adj prox mclust = do
data Clust a = Clust data Clust a = Clust
{ cparts :: !(Dict IntSet) { cparts :: !(Dict IntSet)
, cdico :: (Dict a)
, cindex :: (Dict Int) , cindex :: (Dict Int)
, cscore :: !Double , cscore :: !Double
} deriving (Show, Eq) } deriving (Show, Eq)
...@@ -479,11 +503,11 @@ runClustering ...@@ -479,11 +503,11 @@ runClustering
-> SMatrix.Matrix n n Double -- ^ proxemie -> SMatrix.Matrix n n Double -- ^ proxemie
-> SortedEdges -> SortedEdges
-> (Dict IntSet, Double) -> (Dict IntSet, Double)
runClustering gc beta adj prox se = trace ("runClustering" :: String) $ runST $ do runClustering gc beta adj prox se = runST $ do
mclust <- newMClustering n mclust <- newMClustering n
trace ("hbec" :: String) $ forM_ se $ \(x, y, _) -> clusteringStep beta adj prox mclust (x, y) forM_ se $ \(x, y, _) -> clusteringStep beta adj prox mclust (x, y)
if gc if gc
then trace ("gc" :: String) $ clusteringCollector beta adj prox mclust then clusteringCollector beta adj prox mclust
else do cps <- V.unsafeFreeze (mparts mclust) else do cps <- V.unsafeFreeze (mparts mclust)
let cps' = Dict.fromList let cps' = Dict.fromList
[ (n, xs) [ (n, xs)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment