Commit c60f86f9 authored by Alp Mestanogullari's avatar Alp Mestanogullari

bug fixes in confluence input re-indexing

parent 642b9ec7
......@@ -50,21 +50,21 @@ default-extensions:
library:
source-dirs: src
executables:
gargantext-graph-exe:
main: Main.hs
source-dirs: app
ghc-options:
- -O2
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -fprof-auto
- -Wmissing-signatures
- -Wcompat
dependencies:
- gargantext-graph
- criterion
# executables:
# gargantext-graph-exe:
# main: Main.hs
# source-dirs: app
# ghc-options:
# - -O2
# - -threaded
# - -rtsopts
# - -with-rtsopts=-N
# - -fprof-auto
# - -Wmissing-signatures
# - -Wcompat
# dependencies:
# - gargantext-graph
# - criterion
tests:
gargantext-graph-test:
......
......@@ -37,6 +37,7 @@ import Data.IntMap (IntMap)
import Data.Maybe (isJust, fromJust)
import Data.Proxy (Proxy(Proxy))
import Data.Reflection
import Data.Semigroup
import GHC.TypeLits (KnownNat, Nat, SomeNat(SomeNat), type(+), natVal, sameNat, someNatVal)
import Graph.FGL
import Graph.Types
......@@ -65,10 +66,26 @@ import qualified Data.Map.Strict as Map
----------------------------------------------------------------
traceMaxIndices :: forall a t. (Bounded t, Ord t, Show t) => String -> [t] -> a -> a
traceMaxIndices f xs a = trace s a
where s = "[" ++ f ++ "] (min, max, # of ints) = " ++ show (min_i, max_i, Set.size is)
(min_i, max_i, is) = foldl'
(\(s, b, ints) i -> (min s i, max b i, Set.insert i ints))
(maxBound :: t, minBound :: t, Set.empty)
xs
traceAdjMapIndices :: String -> Map (Int, Int) x -> a -> a
traceAdjMapIndices f m a = traceMaxIndices f (foldMap (\(a, b) -> [a, b]) (Map.keys m)) a
traceDicoIndices :: (Ord x, Show x, Bounded x) => String -> IntMap x -> a -> a
traceDicoIndices s m a = traceMaxIndices (s ++ " dico keys => ") (Dict.keys m) $
traceMaxIndices (s ++ " dico vals => ") (Dict.elems m) a
defaultClustering :: Map (Int, Int) Double -> [ClusterNode]
defaultClustering adjmap = withG g $ \fg ->
case clusteringOptim len fg beta gc of
Clust _ dico idx _ -> map (lkpId dico) (Dict.toList idx)
case clusteringOptim len fg dicoToId beta gc of
Clust _ idx _ -> map go (Dict.toList idx)
where gc = False
beta = 0.0
......@@ -76,30 +93,34 @@ defaultClustering adjmap = withG g $ \fg ->
g = DGI.mkGraph ns es
ns = zip [0..] . Set.toList . Set.fromList $
concatMap (\(a, b) -> [a, b]) $ Map.keys adjmap
es = map (\((a, b), w) -> (a, b, w)) $ Map.toList adjmap
lkpId dict (i, clust) = ClusterNode
(fromJust (Dict.lookup i dict))
lkpId n = dicoToId Dict.! n
lkpLbl n = dicoToLbl Dict.! n
dicoToId = Dict.fromList (map (\(a, b) -> (b, a)) ns)
dicoToLbl = Dict.fromList ns
es = map (\((a, b), w) -> (lkpId a, lkpId b, w)) $ Map.toList adjmap
go (i, clust) = ClusterNode
(lkpLbl i)
clust
{-# INLINE clusteringOptim #-}
clusteringOptim :: forall n a b. KnownNat n
clusteringOptim :: forall n a b. (KnownNat n, Ord a, Show a, Bounded a)
=> Length -- ^ length of the random walks
-> FiniteGraph n a b -- ^ graph to compute clusters for
-> Dict a
-> Double -- ^ beta
-> Bool -- ^ True = run GC, False = don't
-> Clust a
clusteringOptim l fg@(FiniteGraph g) beta gc = trace ("clusteringOptim" :: String) $
clusteringOptim l fg@(FiniteGraph g) dico beta gc =
case runClustering gc beta adj prox sorted_edges of
(clusts, d) -> Clust clusts dico (index clusts) d
(clusts, d) -> Clust clusts (index clusts) d
where
dico = trace ("dico" :: String) $ Dict.fromList (DGI.labNodes g)
index clusts = trace ("index" :: String) $ Dict.foldMapWithKey
index clusts = Dict.foldMapWithKey
(\clustN is -> Dict.fromList $ map (,clustN) (IntSet.toList is))
clusts
!adj = graphMatrix fg True
!tra = transition adj
!prox = proxemie l tra
sorted_edges = trace ("confluence" :: String) $ sort_edges (natToInt @n) (edges_confluence l fg adj tra)
adj = graphMatrix fg True
tra = transition adj
prox = proxemie l tra
sorted_edges = sort_edges (natToInt @n) (edges_confluence l fg adj tra)
graphMatrix
:: forall (n :: Nat) a b.
......@@ -107,17 +128,16 @@ graphMatrix
=> FiniteGraph n a b -> Bool -> SMatrix.Matrix n n Double
graphMatrix (FiniteGraph g) reflexive = adj
where
!adj = trace ("adjacency" :: String) $ SMatrix.fromList es
adj = SMatrix.fromList es
es = diag ++ triplets
triplets = [ (i, j, 1.0) | i <- nodes g, j <- neighbors g i ]
diag = if reflexive
then [ (i, i, 1.0) | i <- [0..(n-1)] ]
then [ (i, i, 1.0) | i <- nodes g ]
else []
n = fromIntegral $ natVal (Proxy :: Proxy n)
transition
:: KnownNat n => SMatrix.Matrix n n Double -> SMatrix.Matrix n n Double
transition m = trace ("transition" :: String) $ SMatrix.imap
transition m = SMatrix.imap
(\i j _ -> 1 / fromIntegral (SMatrix.nnzCol m j))
m
......@@ -156,10 +176,9 @@ proxemie :: KnownNat n
=> Length
-> SMatrix.Matrix n n Double
-> ProxemyMatrix n
proxemie l !tm = trace ("proxemie" :: String) $ case l <= 1 of
proxemie l !tm = case l <= 1 of
True -> tm
False -> case iterate (SMatrix.mul tm) tm Prelude.!! (l-1) of
!p -> trace ("proxemie ok" :: String) p
False -> iterate (SMatrix.mul tm) tm Prelude.!! (l-1)
---------------------------------------------------------------
matconf :: forall n. KnownNat n
......@@ -224,17 +243,21 @@ computeConfluences
-> Map (Int, Int) Double
computeConfluences l edges reflexive = reifyNat (fromIntegral maxNode + 1) $ \(Proxy :: Proxy n) ->
let
nodeLabels = Set.toList $ Set.fromList $ foldMap (\(a, b) -> [a, b]) edges
dictLabels = Dict.fromList (zip [0..] nodeLabels)
dictIDs = Dict.fromList (zip nodeLabels [0..])
edges' = map (\(a, b) -> (dictIDs Dict.! a, dictIDs Dict.! b)) edges
xs :: [(Int, Int, Double)]
xs =
concatMap (\(i, j) -> [(i, j, 1.0), (j, i, 1.0)]) edges ++
concatMap (\(i, j) -> [(i, j, 1.0), (j, i, 1.0)]) edges' ++
(if reflexive
then [ (i, i, 1.0) | i <- [0..maxNode] ]
then [ (i, i, 1.0) | i <- [0..(Dict.size dictLabels - 1)] ]
else []
)
am :: SMatrix.Matrix n n Double
!am = SMatrix.fromList xs
!tm = transition am
!sumdeg_m2 = fromIntegral (SMatrix.nonZeros am - 2)
am = SMatrix.fromList xs
tm = transition am
sumdeg_m2 = fromIntegral (SMatrix.nonZeros am - 2)
go x y =
let
!deg_x_m1 = fromIntegral (SMatrix.nnzCol am x - 1)
......@@ -246,17 +269,19 @@ computeConfluences l edges reflexive = reifyNat (fromIntegral maxNode + 1) $ \(P
iterate (SMatrix.mul tm'') v Prelude.!! l
prox_y_x_length = SMatrix.extractCol v' 0 SVector.! x
prox_y_x_infini = if sumdeg_m2 == 0 then 0 else deg_x_m1 / sumdeg_m2
denominator = (prox_y_x_length + prox_y_x_infini)
in
if denominator == 0
then 0
else (prox_y_x_length - prox_y_x_infini) / denominator
in
Map.fromList $ map (\(a, b) -> ((a, b), go a b)) edges
Map.fromList $ map
(\(a, b) -> ( (a, b)
, go (dictIDs Dict.! a) (dictIDs Dict.! b)
)
) edges
where maxNode = maximum $ map (\(i, j) -> max i j) edges
minNode = minimum $ map (\(i, j) -> min i j) edges
where maxNode = getMax $ foldMap (\(i, j) -> Max (max i j)) edges
edges_confluence :: forall n a b.
KnownNat n
......@@ -265,7 +290,7 @@ edges_confluence :: forall n a b.
-> SMatrix.Matrix n n Double -- adjacency
-> SMatrix.Matrix n n Double -- transition
-> UnsortedEdges
edges_confluence l (FiniteGraph g) am tm = trace ("confluence" :: String) $ map f (edges g)
edges_confluence l (FiniteGraph g) am tm = map f (edges g)
where
vcount = natToInt @n
......@@ -287,7 +312,7 @@ edges_confluence l (FiniteGraph g) am tm = trace ("confluence" :: String) $ map
sort_edges :: Int
-> UnsortedEdges
-> SortedEdges
sort_edges n = trace ("sort_edges" :: String) . List.sortBy (\a b -> confCompare a b <> comparing xnpy a b)
sort_edges n = List.sortBy (\a b -> confCompare a b <> comparing xnpy a b)
where
third
:: forall a b c
......@@ -332,7 +357,7 @@ data MClustering s =
}
newMClustering :: Int -> ST s (MClustering s)
newMClustering n = trace ("newClustering" :: String) $ do
newMClustering n = do
mps <- MV.unsafeNew n
mis <- MVU.unsafeNew n
msc <- MVU.unsafeNew 1
......@@ -466,7 +491,6 @@ clusteringCollector beta adj prox mclust = do
data Clust a = Clust
{ cparts :: !(Dict IntSet)
, cdico :: (Dict a)
, cindex :: (Dict Int)
, cscore :: !Double
} deriving (Show, Eq)
......@@ -479,11 +503,11 @@ runClustering
-> SMatrix.Matrix n n Double -- ^ proxemie
-> SortedEdges
-> (Dict IntSet, Double)
runClustering gc beta adj prox se = trace ("runClustering" :: String) $ runST $ do
runClustering gc beta adj prox se = runST $ do
mclust <- newMClustering n
trace ("hbec" :: String) $ forM_ se $ \(x, y, _) -> clusteringStep beta adj prox mclust (x, y)
forM_ se $ \(x, y, _) -> clusteringStep beta adj prox mclust (x, y)
if gc
then trace ("gc" :: String) $ clusteringCollector beta adj prox mclust
then clusteringCollector beta adj prox mclust
else do cps <- V.unsafeFreeze (mparts mclust)
let cps' = Dict.fromList
[ (n, xs)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment