Commit f0f00ffd authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'flouvain' of ssh://gitlab.iscpif.fr:20022/gargantext/clustering-louvain into flouvain

parents f8d493dc d05ffebf
......@@ -4,7 +4,7 @@ cabal-version: 1.12
--
-- see: https://github.com/sol/hpack
--
-- hash: 9d2b00c4d3d099b31d6b9db84cd1172e0464481bc132080e2694e02b5587b29b
-- hash: 1f5db6daebbb99eb2ce71fb2a61120a7128c45b5152b0e6defa3a811c85aef62
name: clustering-louvain
version: 0.1.0.0
......@@ -29,6 +29,7 @@ library
, fgl
, foldl
, hxt
, parsec
, protolude
, text
, turtle
......
......@@ -36,6 +36,7 @@ library:
- base >= 4.7 && < 5
- fgl
- hxt
- parsec
- turtle
- foldl
#tests:
......
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TupleSections #-}
module Data.Graph.Clustering.Example where
import Protolude
import Control.Monad (foldM_)
import Data.List (sort)
import Data.List (nub, sort)
import Data.Graph.Clustering.Louvain.Utils
import Data.Graph.Inductive
import Data.Graph.Clustering.FLouvain
import qualified Data.Text as T
import qualified Text.ParserCombinators.Parsec as P
import Text.Parsec.Language (haskellStyle)
import qualified Text.Parsec.Token as PT
-- | Utility function to remap Gr () Double into FGraph () ()
exampleRemap :: Gr () Double -> FGraph () ()
......@@ -34,7 +43,7 @@ runIterations n gr = do
let initCgr = initialCGr fgr
putStrLn "Initial modularity: "
putStrLn $ show $ modularity fgr initCgr fgrWeight
putStrLn $ T.unpack $ show $ modularity fgr initCgr fgrWeight
foldM_ (runIteration fgr fgrWeight) initCgr [0..n]
......@@ -44,7 +53,7 @@ runIterations n gr = do
putStrLn $ "----- ITERATION " <> show i
putStrLn $ prettify iterNextCgr
putStrLn $ show i <> " iteration modularity: "
putStrLn $ show $ modularity fgr iterNextCgr fgrWeight
putStrLn $ T.unpack $ show $ modularity fgr iterNextCgr fgrWeight
return iterNextCgr
runLouvainFirstStepIterate :: Int -> Gr () Double -> (Modularity, CGr)
......@@ -54,13 +63,48 @@ runLouvainFirstStepIterate n gr = (modularity fgr cgr m, cgr)
cgr = louvainFirstStepIterate n fgr
m = graphWeight fgr
-- | egr <- readPythonGraph "<file-path>"
-- let gr = head $ Data.Either.rights [egr]
readPythonGraph :: FilePath -> IO (Either P.ParseError (Gr () Double))
readPythonGraph src = do
contents <- readFile src
let eEdges = P.parse edgeParser "(unknown)" $ T.unpack contents
case eEdges of
Left err -> do
return $ Left err
Right edges -> do
let nodes = map (, ()) $ nub $ map (\(s, _, _) -> s) edges
return $ Right $ mkGraph nodes edges
where
lexer = PT.makeTokenParser haskellStyle
edgeParser :: P.GenParser Char st [(Node, Node, Double)]
edgeParser = P.many edgeLine
edgeLine :: P.GenParser Char st (Node, Node, Double)
edgeLine = do
source <- edgeSource
target <- edgeTarget
weight <- edgeWeight
return (source, target, weight)
edgeSource :: P.GenParser Char st Node
edgeSource = do
v <- PT.integer lexer
return $ fromIntegral v
edgeTarget :: P.GenParser Char st Node
edgeTarget = do
v <- PT.integer lexer
return $ fromIntegral v
edgeWeight :: P.GenParser Char st Double
edgeWeight = do
v <- PT.float lexer
return v
karate :: Gr () Double
-- karate = mkGraph' <$> importGraphFromGexf "src/Data/karate.gexf"
karate = mkGraph [(1,()),(2,()),(3,()),(4,()),(5,()),(6,()),(7,()),(8,()),(9,()),(10,()),(11,()),(12,()),(13,()),(14,()),(15,()),(16,()),(17,()),(18,()),(19,()),(20,()),(21,()),(22,()),(23,()),(24,()),(25,()),(26,()),(27,()),(28,()),(29,()),(30,()),(31,()),(32,()),(33,()),(34,())] [(1,2,1.0),(1,3,1.0),(1,4,1.0),(1,5,1.0),(1,6,1.0),(1,7,1.0),(1,8,1.0),(1,9,1.0),(1,11,1.0),(1,12,1.0),(1,13,1.0),(1,14,1.0),(1,18,1.0),(1,20,1.0),(1,22,1.0),(1,32,1.0),(2,3,1.0),(2,4,1.0),(2,8,1.0),(2,14,1.0),(2,18,1.0),(2,20,1.0),(2,22,1.0),(2,31,1.0),(3,4,1.0),(3,8,1.0),(3,9,1.0),(3,10,1.0),(3,14,1.0),(3,28,1.0),(3,29,1.0),(3,33,1.0),(4,8,1.0),(4,13,1.0),(4,14,1.0),(5,7,1.0),(5,11,1.0),(6,7,1.0),(6,11,1.0),(6,17,1.0),(7,17,1.0),(9,31,1.0),(9,33,1.0),(9,34,1.0),(10,34,1.0),(14,34,1.0),(15,33,1.0),(15,34,1.0),(16,33,1.0),(16,34,1.0),(19,33,1.0),(19,34,1.0),(20,34,1.0),(21,33,1.0),(21,34,1.0),(23,33,1.0),(23,34,1.0),(24,26,1.0),(24,28,1.0),(24,30,1.0),(24,33,1.0),(24,34,1.0),(25,26,1.0),(25,28,1.0),(25,32,1.0),(26,32,1.0),(27,30,1.0),(27,34,1.0),(28,34,1.0),(29,32,1.0),(29,34,1.0),(30,33,1.0),(30,34,1.0),(31,33,1.0),(31,34,1.0),(32,33,1.0),(32,34,1.0),(33,34,1.0)]
karate2com :: [[Node]]
karate2com = sort $ Prelude.map (sort) [[10, 29, 32, 25, 28, 26, 24, 30, 27, 34, 31, 33, 23, 15, 16, 21, 19], [3, 9, 8, 4, 14, 20, 2, 13, 22, 1, 18, 12, 5, 7, 6, 17]]
karate2com = sort $ map (sort) [[10, 29, 32, 25, 28, 26, 24, 30, 27, 34, 31, 33, 23, 15, 16, 21, 19], [3, 9, 8, 4, 14, 20, 2, 13, 22, 1, 18, 12, 5, 7, 6, 17]]
eU :: [LEdge Double]
......
This source diff could not be displayed because it is too large. You can view the blob instead.
with import <nixpkgs> {};
stdenv.mkDerivation rec {
name = "env";
env = buildEnv {
name = name;
paths = buildInputs;
};
buildInputs = [
python3
python3Packages.ipython
python3Packages.numpy
python3Packages.pandas
python3Packages.sklearn-deap
];
builder = builtins.toFile "builder.sh" ''
source $stdenv/setup
touch $out
'';
}
#!/usr/bin/env python3
#
# In ipython:
# %load "src/Data/Graph/Clustering/example.py"
import numpy as np
import numpy.linalg as npl
import pandas as pd
import random
from sklearn.datasets.samples_generator import make_blobs
SAME_CLUSTER_PROB = 0.8
DIFFERENT_CLUSTER_PROB = 0.1
X, y = make_blobs(n_samples=300, centers=4, cluster_std=0.60, random_state=0)
# Norm with be the edge weight
norms = [npl.norm(xx) for xx in X]
# Our graph will consist of pt (0.0, 0.0) with edges to all nodes in X
#node0 = (0, np.array([0.0, 0.0]))
graph = {
'nodes': [],
'edges': [],
}
for i, (node, norm, cluster) in enumerate(zip(X, norms, y)):
graph['nodes'].append((i, node))
for j, (node2, cluster2) in enumerate(zip(X, y)):
# If same cluster -- higher probability of edge creation
# If different cluster -- low probability of an edge
prob = DIFFERENT_CLUSTER_PROB
if cluster == cluster2:
prob = SAME_CLUSTER_PROB
if random.random() < prob:
graph['edges'].append((i, j, norm))
# If no edge, try backwards one
elif random.random() < prob:
graph['edges'].append((j, i, norm))
# Custom graph file format:
# <edge source> <edge target> <edge weight>
with open('clustered.graph', 'w') as f:
for edge in graph['edges']:
f.write('{} {} {}\n'.format(*edge))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment