Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
160
Issues
160
List
Board
Labels
Milestones
Merge Requests
14
Merge Requests
14
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
03cac30c
Commit
03cac30c
authored
Mar 01, 2023
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] Clustering with more granularity and structure
parent
3ccd26ab
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
102 additions
and
51 deletions
+102
-51
Bridgeness.hs
src/Gargantext/Core/Viz/Graph/Bridgeness.hs
+52
-29
Tools.hs
src/Gargantext/Core/Viz/Graph/Tools.hs
+32
-22
IGraph.hs
src/Gargantext/Core/Viz/Graph/Tools/IGraph.hs
+18
-0
No files found.
src/Gargantext/Core/Viz/Graph/Bridgeness.hs
View file @
03cac30c
...
...
@@ -31,6 +31,7 @@ import Data.Tuple.Extra (swap)
import
Debug.Trace
(
trace
)
import
Gargantext.Core.Methods.Similarities
(
Similarity
(
..
))
import
Gargantext.Prelude
import
Prelude
(
pi
)
import
Graph.Types
(
ClusterNode
(
..
))
import
qualified
Data.List
as
List
import
qualified
Data.Map.Strict
as
Map
...
...
@@ -40,6 +41,7 @@ import qualified Data.IntMap as Dico
----------------------------------------------------------------------
type
Partitions
=
Map
(
Int
,
Int
)
Double
->
IO
[
ClusterNode
]
type
Partitions'
=
Map
(
Int
,
Int
)
Double
->
IO
[
Set
NodeId
]
----------------------------------------------------------------------
nodeId2comId
::
ClusterNode
->
(
NodeId
,
CommunityId
)
nodeId2comId
(
ClusterNode
i1
i2
)
=
(
i1
,
i2
)
...
...
@@ -48,9 +50,24 @@ type NodeId = Int
type
CommunityId
=
Int
----------------------------------------------------------------------
----------------------------------------------------------------------
-- recursiveClustering : get get more granularity of a given clustering
-- recursiveClustering : to get more granularity of a given clustering
-- tested with spinglass clustering only (WIP)
recursiveClustering'
::
Partitions'
->
Map
(
Int
,
Int
)
Double
->
IO
[[
Set
NodeId
]]
recursiveClustering'
f
mp
=
do
let
n
::
Double
n
=
fromIntegral
$
Set
.
size
$
Set
.
unions
$
List
.
concat
$
map
(
\
(
k1
,
k2
)
->
map
Set
.
singleton
[
k1
,
k2
])
$
Map
.
keys
mp
t
::
Int
t
=
round
$
2
*
n
/
sqrt
n
ss
<-
f
mp
mapM
(
\
s
->
if
Set
.
size
s
>
t
then
f
(
removeNodes
s
mp
)
else
pure
[
s
])
ss
----------------------------------------------------------------------
recursiveClustering
::
Partitions
->
Map
(
Int
,
Int
)
Double
->
IO
[
ClusterNode
]
recursiveClustering
f
mp
=
do
let
...
...
@@ -61,18 +78,23 @@ recursiveClustering f mp = do
$
Map
.
keys
mp
t
::
Int
t
=
round
$
(
n
/
2
)
*
(
sqrt
n
)
/
100
t
=
round
$
2
*
n
/
sqrt
n
(
toSplit
,
others
)
<-
List
.
span
(
\
a
->
Set
.
size
a
>
t
)
<$>
clusterNodes2sets
<$>
f
mp
cls'
<-
mapM
f
$
map
(
\
s
->
removeNodes
s
mp
)
toSplit
pure
$
setNodes2clusterNodes
$
others
<>
(
List
.
concat
$
map
clusterNodes2sets
cls'
)
----------------------------------------------------------------------
setNodes2clusterNodes
::
[
Set
NodeId
]
->
[
ClusterNode
]
setNodes2clusterNodes
ns
=
List
.
concat
$
map
(
\
(
n
,
ns'
)
->
toCluster
n
ns'
)
$
zip
[
1
..
]
ns
where
toCluster
::
CommunityId
->
Set
NodeId
->
[
ClusterNode
]
toCluster
cId
setNodeId
=
map
(
\
n
->
ClusterNode
n
cId
)
(
Set
.
toList
setNodeId
)
clusterNodes2map
::
[
ClusterNode
]
->
Map
NodeId
Int
clusterNodes2map
=
Map
.
fromList
.
map
(
\
(
ClusterNode
nId
cId
)
->
(
nId
,
cId
))
removeNodes
::
Set
NodeId
->
Map
(
NodeId
,
NodeId
)
Double
->
Map
(
NodeId
,
NodeId
)
Double
...
...
@@ -84,7 +106,6 @@ clusterNodes2sets = Dico.elems
.
Dico
.
fromListWith
(
<>
)
.
(
map
((
Tuple
.
second
Set
.
singleton
)
.
swap
.
nodeId2comId
))
----------------------------------------------------------------------
----------------------------------------------------------------------
data
Bridgeness
=
Bridgeness_Basic
{
bridgeness_partitions
::
[
ClusterNode
]
,
bridgeness_filter
::
Double
...
...
@@ -92,48 +113,51 @@ data Bridgeness = Bridgeness_Basic { bridgeness_partitions :: [ClusterNode]
|
Bridgeness_Advanced
{
bridgeness_similarity
::
Similarity
,
bridgness_confluence
::
Confluence
}
|
Bridgeness_Recursive
{
br_partitions
::
[[
Set
NodeId
]]
,
br_filter
::
Double
}
type
Confluence
=
Map
(
NodeId
,
NodeId
)
Double
type
Confluence
=
Map
(
NodeId
,
NodeId
)
Double
-- Filter Links between the Clusters
-- Links: Map (NodeId, NodeId) Double
-- List of Clusters: [Set NodeId]
bridgeness
::
Bridgeness
->
Map
(
NodeId
,
NodeId
)
Double
->
Map
(
NodeId
,
NodeId
)
Double
bridgeness
(
Bridgeness_Recursive
sn
f
)
m
=
Map
.
unions
$
[
linksBetween
]
<>
map
(
\
s
->
bridgeness
(
Bridgeness_Basic
(
setNodes2clusterNodes
s
)
f
)
m'
)
sn
where
(
linksBetween
,
m'
)
=
Map
.
partitionWithKey
(
\
(
n1
,
n2
)
_v
->
Map
.
lookup
n1
mapNodeIdClusterId
/=
Map
.
lookup
n2
mapNodeIdClusterId
)
$
bridgeness
(
Bridgeness_Basic
clusters
(
pi
*
f
))
m
clusters
=
setNodes2clusterNodes
(
map
Set
.
unions
sn
)
mapNodeIdClusterId
=
clusterNodes2map
clusters
bridgeness
(
Bridgeness_Advanced
sim
c
)
m
=
Map
.
fromList
$
List
.
filter
(
\
x
->
if
sim
==
Conditional
then
snd
x
>
0.2
else
snd
x
>
0.02
)
$
map
(
\
(
ks
,
(
v1
,
_v2
))
->
(
ks
,
v1
))
-- $ List.take (if sim == Conditional then 2*n else 3*n)
-- $ List.sortOn (Down . (snd . snd))
$
Map
.
toList
$
trace
(
"bridgeness3 m c"
<>
show
(
m
,
c
))
--
$ trace ("bridgeness3 m c" <> show (m,c))
$
Map
.
intersectionWithKey
(
\
k
v1
v2
->
trace
(
"intersectionWithKey "
<>
(
show
(
k
,
v1
,
v2
)))
(
v1
,
v2
))
m
c
{-
where
!m' = Map.toList m
n :: Int
!n = trace ("bridgeness m size: " <> (show $ List.length m'))
$ round
$ (fromIntegral $ List.length m') / (log $ fromIntegral nodesNumber :: Double)
nodesNumber :: Int
nodesNumber = Set.size $ Set.fromList $ as <> bs
where
(as, bs) = List.unzip $ Map.keys m
-}
bridgeness
(
Bridgeness_Basic
ns
b
)
m
=
Map
.
fromList
$
List
.
concat
$
Map
.
elems
$
filterComs
b
$
filterComs
(
round
b
)
$
groupEdges
(
Map
.
fromList
$
map
nodeId2comId
ns
)
m
groupEdges
::
(
Ord
a
,
Ord
b1
)
=>
Map
b1
a
->
Map
(
b1
,
b1
)
b2
->
Map
(
a
,
a
)
[((
b1
,
b1
),
b2
)]
groupEdges
::
(
Ord
comId
,
Ord
nodeId
)
=>
Map
nodeId
comId
->
Map
(
nodeId
,
nodeId
)
value
->
Map
(
comId
,
comId
)
[((
nodeId
,
nodeId
),
value
)]
groupEdges
m
=
fromListWith
(
<>
)
.
catMaybes
.
map
(
\
((
n1
,
n2
),
d
)
...
...
@@ -144,17 +168,16 @@ groupEdges m = fromListWith (<>)
)
.
toList
-- | TODO : sortOn Confluence
filterComs
::
(
Ord
n1
,
Eq
n2
)
=>
p
=>
Int
->
Map
(
n2
,
n2
)
[(
a3
,
n1
)]
->
Map
(
n2
,
n2
)
[(
a3
,
n1
)]
filterComs
_
b
m
=
Map
.
filter
(
\
n
->
length
n
>
0
)
$
mapWithKey
filter'
m
filterComs
b
m
=
Map
.
filter
(
\
n
->
length
n
>
0
)
$
mapWithKey
filter'
m
where
filter'
(
c1
,
c2
)
a
|
c1
==
c2
=
a
-- TODO use n here
|
otherwise
=
take
(
2
*
n
)
$
List
.
sortOn
(
Down
.
snd
)
a
|
otherwise
=
take
(
b
*
2
*
n
)
$
List
.
sortOn
(
Down
.
snd
)
a
where
n
::
Int
n
=
round
$
100
*
a'
/
t
...
...
src/Gargantext/Core/Viz/Graph/Tools.hs
View file @
03cac30c
...
...
@@ -25,9 +25,9 @@ import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import
Gargantext.Core.Methods.Similarities
(
Similarity
(
..
),
measure
)
-- import Gargantext.Core.Methods.Similarities.Conditional (conditional)
import
Gargantext.Core.Statistics
import
Gargantext.Core.Viz.Graph.Bridgeness
(
bridgeness
,
Bridgeness
(
..
),
Partitions
,
nodeId2comId
,
recursiveClustering
)
import
Gargantext.Core.Viz.Graph.Bridgeness
(
bridgeness
,
Bridgeness
(
..
),
Partitions
,
nodeId2comId
,
recursiveClustering
,
recursiveClustering'
,
setNodes2clusterNodes
)
import
Gargantext.Core.Viz.Graph.Index
(
createIndices
,
toIndex
,
map2mat
,
mat2map
,
Index
,
MatrixShape
(
..
))
import
Gargantext.Core.Viz.Graph.Tools.IGraph
(
mkGraphUfromEdges
,
spinglass
)
import
Gargantext.Core.Viz.Graph.Tools.IGraph
(
mkGraphUfromEdges
,
spinglass
,
spinglass'
)
import
Gargantext.Core.Viz.Graph.Tools.Infomap
(
infomap
)
import
Gargantext.Core.Viz.Graph.Types
(
Attributes
(
..
),
Edge
(
..
),
Graph
(
..
),
MultiPartite
(
..
),
Node
(
..
),
Partite
(
..
),
Strength
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
...
...
@@ -109,7 +109,6 @@ cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
--cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
-- TODO: change these options, or make them configurable in UI?
cooc2graphWith'
::
Partitions
->
BridgenessMethod
->
MultiPartite
...
...
@@ -118,29 +117,13 @@ cooc2graphWith' :: Partitions
->
Strength
->
HashMap
(
NgramsTerm
,
NgramsTerm
)
Int
->
IO
Graph
cooc2graphWith'
doPartitions
bridgenessMethod
multi
similarity
threshold
strength
myCooc
=
do
cooc2graphWith'
_doPartitions
_bridgenessMethod
multi
similarity
@
Conditional
threshold
strength
myCooc
=
do
let
(
distanceMap
,
diag
,
ti
)
=
doSimilarityMap
similarity
threshold
strength
myCooc
distanceMap
`
seq
`
diag
`
seq
`
ti
`
seq
`
return
()
--{- -- Debug
-- To Work with Igraph
saveAsFileDebug
"/tmp/distanceMap"
(
List
.
intercalate
";"
$
Set
.
toList
$
Set
.
fromList
$
map
(
\
(
k1
,
k2
)
->
if
k1
<
k2
then
show
(
k1
+
1
)
<>
" "
<>
show
(
k2
+
1
)
else
show
(
k2
+
1
)
<>
" "
<>
show
(
k1
+
1
)
)
$
Map
.
keys
$
Map
.
filter
(
>
0.005
)
distanceMap
)
saveAsFileDebug
"/tmp/distanceMap.data"
distanceMap
saveAsFileDebug
"/tmp/distanceMap.cooc"
myCooc
-- printDebug "similarities" similarities
--}
partitions
<-
if
(
Map
.
size
distanceMap
>
0
)
then
recursiveClustering
doPartitions
distanceMap
-- then recursiveClustering doPartitions distanceMap
then
recursiveClustering'
(
spinglass'
1
)
distanceMap
else
panic
$
Text
.
unlines
[
"[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
,
"Maybe you should add more Map Terms in your list"
,
"Tutorial: TODO"
...
...
@@ -149,12 +132,39 @@ cooc2graphWith' doPartitions bridgenessMethod multi similarity threshold strengt
let
!
confluence'
=
BAC
.
computeConfluences
3
(
Map
.
keys
distanceMap
)
True
!
bridgeness'
=
bridgeness
(
Bridgeness_Recursive
partitions
1.0
)
distanceMap
{-
!bridgeness' = if bridgenessMethod == BridgenessMethod_Basic
then bridgeness (Bridgeness_Basic partitions 1.0) distanceMap
else bridgeness (Bridgeness_Advanced similarity confluence') distanceMap
-}
pure
$
data2graph
multi
ti
diag
bridgeness'
confluence'
(
setNodes2clusterNodes
$
List
.
concat
partitions
)
cooc2graphWith'
doPartitions
bridgenessMethod
multi
Distributional
threshold
strength
myCooc
=
do
let
(
distanceMap
,
diag
,
ti
)
=
doSimilarityMap
Distributional
threshold
strength
myCooc
distanceMap
`
seq
`
diag
`
seq
`
ti
`
seq
`
return
()
partitions
<-
if
(
Map
.
size
distanceMap
>
0
)
then
recursiveClustering
doPartitions
distanceMap
else
panic
$
Text
.
unlines
[
"[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
,
"Maybe you should add more Map Terms in your list"
,
"Tutorial: TODO"
]
length
partitions
`
seq
`
return
()
let
!
confluence'
=
BAC
.
computeConfluences
3
(
Map
.
keys
distanceMap
)
True
!
bridgeness'
=
if
bridgenessMethod
==
BridgenessMethod_Basic
then
bridgeness
(
Bridgeness_Basic
partitions
10.0
)
distanceMap
else
bridgeness
(
Bridgeness_Advanced
Distributional
confluence'
)
distanceMap
pure
$
data2graph
multi
ti
diag
bridgeness'
confluence'
partitions
type
Reverse
=
Bool
doSimilarityMap
::
Similarity
...
...
src/Gargantext/Core/Viz/Graph/Tools/IGraph.hs
View file @
03cac30c
...
...
@@ -29,6 +29,7 @@ import qualified IGraph.Algorithms.Clique as IG
import
qualified
IGraph.Algorithms.Community
as
IG
import
qualified
IGraph.Algorithms.Structure
as
IG
import
qualified
IGraph.Random
as
IG
import
qualified
Data.Set
as
Set
------------------------------------------------------------------
-- | Main Types
...
...
@@ -74,6 +75,23 @@ spinglass s g = toClusterNode
(
toI
,
fromI
)
=
createIndices
g
spinglass'
::
Seed
->
Map
(
Int
,
Int
)
Double
->
IO
[
Set
Int
]
spinglass'
s
g
=
map
Set
.
fromList
<$>
map
catMaybes
<$>
map
(
map
(
\
n
->
Map
.
lookup
n
fromI
))
<$>
List
.
concat
<$>
mapM
(
partitions_spinglass'
s
)
g'
where
-- Not connected components of the graph make crash spinglass
g'
=
IG
.
decompose
$
mkGraphUfromEdges
$
Map
.
keys
$
toIndex
toI
g
(
toI
,
fromI
)
=
createIndices
g
-- | Tools to analyze graphs
partitions_spinglass'
::
(
Serialize
v
,
Serialize
e
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment