Commit badfdf60 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] Distributional Measure Similarity in Graph Flow (WIP)

parent b7a8823a
......@@ -18,10 +18,10 @@ import Data.Aeson
import Data.Array.Accelerate (Matrix)
import Data.Swagger
import GHC.Generics (Generic)
import Gargantext.Core.Methods.Distances.Accelerate.Conditional (measureConditional)
import Gargantext.Core.Methods.Distances.Accelerate.Distributional (logDistributional)
import Gargantext.Prelude (Ord, Eq, Int, Double)
import Gargantext.Prelude (Show)
import Gargantext.Core.Methods.Distances.Accelerate.Conditional (measureConditional)
import Gargantext.Core.Methods.Distances.Accelerate.Distributional (distributional)
import Prelude (Enum, Bounded, minBound, maxBound)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
......@@ -32,12 +32,12 @@ data Distance = Conditional | Distributional
measure :: Distance -> Matrix Int -> Matrix Double
measure Conditional = measureConditional
measure Distributional = distributional
------------------------------------------------------------------------
measure Distributional = logDistributional
------------------------------------------------------------------------
withMetric :: GraphMetric -> Matrix Int -> Matrix Double
withMetric Order1 = measureConditional
withMetric Order2 = distributional
withMetric Order2 = logDistributional
------------------------------------------------------------------------
data GraphMetric = Order1 | Order2
......
......@@ -116,10 +116,14 @@ distributional m' = run result
result = termDivNan z_1 z_2
logDistributional :: Matrix Int -> Matrix Double
logDistributional m' = run result
logDistributional m = run $ diagNull n $ matMiniMax $ logDistributional' n m
where
n = dim m
logDistributional' :: Int -> Matrix Int -> Acc (Matrix Double)
logDistributional' n m' = result
where
m = map fromIntegral $ use m'
n = dim m'
-- Scalar. Sum of all elements of m.
to = the $ sum (flatten m)
......@@ -234,6 +238,6 @@ rIJ n m = matMiniMax $ divide a b
-- | Test perfermance with this matrix
-- TODO : add this in a benchmark folder
distriTest :: Int -> Matrix Double
distriTest n = distributional (theMatrixInt n)
distriTest n = logDistributional (theMatrixInt n)
......@@ -15,24 +15,17 @@ Motivation and definition of the @Conditional@ distance.
module Gargantext.Core.Methods.Distances.Conditional
where
import Data.Matrix hiding (identity)
import Data.List (sortOn)
import Data.Map (Map)
import Data.Matrix hiding (identity)
import Gargantext.Core.Viz.Graph.Utils
import Gargantext.Prelude
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Vector as V
import Gargantext.Prelude
import Gargantext.Core.Viz.Graph.Utils
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Optimisation issue
toBeOptimized :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
toBeOptimized m = proba Col m
......@@ -56,7 +49,6 @@ mapOnly Row = mapRow
mapAll :: (a -> a) -> Matrix a -> Matrix a
mapAll f m = mapOn Col (\_ -> f) m
---------------------------------------------------------------
-- | Compute a distance from axis
-- xs = (sum Col x') - x'
......
......@@ -242,7 +242,7 @@ matMiniMax :: (Elt a, Ord a, P.Num a)
-> Acc (Matrix a)
matMiniMax m = filterWith' miniMax' (constant 0) m
where
miniMax' = the $ minimum $ maximum m
miniMax' = the $ maximum $ minimum m
-- | Filters the matrix with a constant
......
......@@ -112,7 +112,7 @@ ex_cooc_mat :: IO (Map Label Index, Matrix Int, Matrix Double, (DAA.Vector Gener
ex_cooc_mat = do
m <- ex_cooc
let (ti,_) = createIndices m
let mat_cooc = cooc2mat ti m
let mat_cooc = cooc2mat Triangular ti m
pure ( ti
, mat_cooc
, incExcSpeGen_proba mat_cooc
......@@ -123,7 +123,7 @@ ex_incExcSpeGen :: IO ([(Label, Double)], [(Label, Double)])
ex_incExcSpeGen = incExcSpeGen_sorted <$> ex_cooc
incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m)
incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat Triangular ti m)
where
(ti,fi) = createIndices m
ordonne x = sortWith (Down . snd)
......
......@@ -70,7 +70,7 @@ localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [
scores
where
(ti, fi) = createIndices m
(is, ss) = incExcSpeGen $ cooc2mat ti m
(is, ss) = incExcSpeGen $ cooc2mat Triangular ti m
scores = DAA.toList
$ DAA.run
$ DAA.zip (DAA.use is) (DAA.use ss)
......@@ -82,7 +82,7 @@ scored' :: Ord t => Map (t,t) Int -> [Scored t]
scored' m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (Map.toList fi) scores
where
(ti, fi) = createIndices m
(is, ss) = incExcSpeGen $ cooc2mat ti m
(is, ss) = incExcSpeGen $ cooc2mat Triangular ti m
scores = DAA.toList
$ DAA.run
$ DAA.zip (DAA.use is) (DAA.use ss)
......
......@@ -79,12 +79,12 @@ graphAPI u n = getGraph u n
getGraph :: UserId -> NodeId -> GargNoServer HyperdataGraphAPI
getGraph _uId nId = do
nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
let graph = nodeGraph ^. node_hyperdata . hyperdataGraph
let camera = nodeGraph ^. node_hyperdata . hyperdataCamera
repo <- getRepo
let cId = maybe (panic "[G.V.G.API] Node has no parent")
let
graph = nodeGraph ^. node_hyperdata . hyperdataGraph
camera = nodeGraph ^. node_hyperdata . hyperdataCamera
cId = maybe (panic "[G.V.G.API] Node has no parent")
identity
$ nodeGraph ^. node_parentId
......@@ -94,8 +94,9 @@ getGraph _uId nId = do
-- graph' <- computeGraph cId Distributional NgramsTerms repo
graph' <- computeGraph cId Conditional NgramsTerms repo
mt <- defaultGraphMetadata cId "Title" repo
let graph'' = set graph_metadata (Just mt) graph'
let hg = HyperdataGraphAPI graph'' camera
let
graph'' = set graph_metadata (Just mt) graph'
hg = HyperdataGraphAPI graph'' camera
-- _ <- updateHyperdata nId hg
_ <- updateHyperdata nId (HyperdataGraph (Just graph'') camera)
pure $ trace "[G.V.G.API] Graph empty, computing" hg
......@@ -107,14 +108,16 @@ getGraph _uId nId = do
recomputeGraph :: UserId -> NodeId -> Distance -> GargNoServer Graph
recomputeGraph _uId nId d = do
nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
let graph = nodeGraph ^. node_hyperdata . hyperdataGraph
let camera = nodeGraph ^. node_hyperdata . hyperdataCamera
let graphMetadata = graph ^? _Just . graph_metadata . _Just
let listVersion = graph ^? _Just . graph_metadata . _Just . gm_list . lfg_version
let
graph = nodeGraph ^. node_hyperdata . hyperdataGraph
camera = nodeGraph ^. node_hyperdata . hyperdataCamera
graphMetadata = graph ^? _Just . graph_metadata . _Just
listVersion = graph ^? _Just . graph_metadata . _Just . gm_list . lfg_version
repo <- getRepo
let v = repo ^. r_version
let cId = maybe (panic "[G.V.G.API.recomputeGraph] Node has no parent")
let
v = repo ^. r_version
cId = maybe (panic "[G.V.G.API.recomputeGraph] Node has no parent")
identity
$ nodeGraph ^. node_parentId
......@@ -144,18 +147,19 @@ computeGraph :: HasNodeError err
-> Cmd err Graph
computeGraph cId d nt repo = do
lId <- defaultList cId
lIds <- selectNodesWithUsername NodeList userMaster
let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
-- TODO split diagonal
myCooc <- HashMap.filter (>1)
let ngs = filterListWithRoot MapTerm
$ mapTermListRoot [lId] nt repo
myCooc <- HashMap.filter (>1) -- Removing the hapax (ngrams with 1 cooc)
<$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
graph <- liftBase $ cooc2graphWith Spinglass d 0 myCooc
printDebug "myCooc" myCooc
graph <- liftBase $ cooc2graphWith Spinglass d 0 myCooc
pure graph
......@@ -205,7 +209,7 @@ graphRecompute u n logStatus = do
, _scst_remaining = Just 1
, _scst_events = Just []
}
_g <- trace (show u) $ recomputeGraph u n Conditional -- Distributional
_g <- trace (show u) $ recomputeGraph u n Distributional
pure JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 0
......@@ -226,12 +230,17 @@ graphVersionsAPI u n =
graphVersions :: NodeId -> GargNoServer GraphVersions
graphVersions nId = do
nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
let graph = nodeGraph ^. node_hyperdata . hyperdataGraph
let listVersion = graph ^? _Just
. graph_metadata
. _Just
. gm_list
. lfg_version
let
graph = nodeGraph
^. node_hyperdata
. hyperdataGraph
listVersion = graph
^? _Just
. graph_metadata
. _Just
. gm_list
. lfg_version
repo <- getRepo
let v = repo ^. r_version
......@@ -240,7 +249,7 @@ graphVersions nId = do
, gv_repo = v }
recomputeVersions :: UserId -> NodeId -> GargNoServer Graph
recomputeVersions uId nId = recomputeGraph uId nId Conditional -- Distributional
recomputeVersions uId nId = recomputeGraph uId nId Distributional
------------------------------------------------------------
graphClone :: UserId
......
......@@ -44,25 +44,34 @@ type Index = Int
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
score :: (Ord t) => (A.Matrix Int -> A.Matrix Double)
-> Map (t, t) Int
-> Map (t, t) Double
score f m = fromIndex fromI . mat2map . f $ cooc2mat toI m
score :: (Ord t) => MatrixShape
-> (A.Matrix Int -> A.Matrix Double)
-> Map (t, t) Int
-> Map (t, t) Double
score s f m = fromIndex fromI . mat2map . f $ cooc2mat s toI m
where
(toI, fromI) = createIndices m
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
cooc2mat :: Ord t => Map t Index -> Map (t, t) Int -> Matrix Int
cooc2mat ti m = map2mat 0 n idx
cooc2mat :: Ord t => MatrixShape -> Map t Index -> Map (t, t) Int -> Matrix Int
cooc2mat sym ti m = map2mat sym 0 n idx
where
n = M.size ti
idx = toIndex ti m -- it is important to make sure that toIndex is ran only once.
map2mat :: Elt a => a -> Int -> Map (Index, Index) a -> Matrix a
map2mat def n m = A.fromFunction shape (\(Z :. x :. y) -> fromMaybe def $ M.lookup (x, y) m)
data MatrixShape = Triangular | Square
map2mat :: Elt a => MatrixShape -> a -> Int -> Map (Index, Index) a -> Matrix a
map2mat sym def n m = A.fromFunction shape getData
where
shape = (Z :. n :. n)
getData = (\(Z :. x :. y) ->
case sym of
Triangular -> fromMaybe def (M.lookup (x,y) m)
Square -> fromMaybe (fromMaybe def $ M.lookup (y,x) m)
$ M.lookup (x, y) m
)
shape = (Z :. n :. n)
mat2map :: (Elt a, Shape (Z :. Index)) =>
A.Array (Z :. Index :. Index) a -> Map (Index, Index) a
......@@ -73,15 +82,19 @@ mat2map m = M.fromList . map f . A.toList . A.run . A.indexed $ A.use m
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
toIndex :: Ord t => Map t Index -> Map (t,t) a -> Map (Index,Index) a
toIndex ni ns = indexConversion ni ns
toIndex :: Ord t
=> Map t Index
-> Map (t,t) a
-> Map (Index,Index) a
toIndex = indexConversion
fromIndex :: Ord t => Map Index t -> Map (Index, Index) a -> Map (t,t) a
fromIndex ni ns = indexConversion ni ns
indexConversion :: (Ord b, Ord k) => Map k b -> Map (k,k) a -> Map (b, b) a
indexConversion index ms = M.fromList
$ map (\((k1,k2),c) -> ( ((M.!) index k1, (M.!) index k2), c)) (M.toList ms)
$ map (\((k1,k2),c) -> ( ((M.!) index k1, (M.!) index k2), c))
(M.toList ms)
---------------------------------------------------------------------------------
-------------------------------------------------------------------------------
......
......@@ -14,8 +14,8 @@ module Gargantext.Core.Viz.Graph.Tools
-- import Data.Graph.Clustering.Louvain (hLouvain, {-iLouvainMap-})
import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
import Data.Map (Map)
import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Data.Text (Text)
import Debug.Trace (trace)
import GHC.Float (sin, cos)
......@@ -25,17 +25,17 @@ import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
import Gargantext.Core.Statistics
import Gargantext.Core.Viz.Graph
import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index)
import Gargantext.Prelude
import IGraph.Random -- (Gen(..))
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Vector.Storable as Vec
import qualified IGraph as Igraph
import qualified IGraph.Algorithms.Layout as Layout
import qualified Data.HashMap.Strict as HashMap
type Threshold = Double
......@@ -44,13 +44,19 @@ cooc2graph' :: Ord t => Distance
-> Double
-> Map (t, t) Int
-> Map (Index, Index) Double
cooc2graph' distance threshold myCooc = distanceMap
where
(ti, _) = createIndices myCooc
myCooc' = toIndex ti myCooc
matCooc = map2mat 0 (Map.size ti) $ Map.filter (> 1) myCooc'
distanceMat = measure distance matCooc
distanceMap = Map.filter (> threshold) $ mat2map distanceMat
cooc2graph' distance threshold myCooc
= Map.filter (> threshold)
$ mat2map
$ measure distance
$ case distance of
Conditional -> map2mat Triangular 0 (Map.size ti)
Distributional -> map2mat Square 0 (Map.size ti)
$ Map.filter (> 1) myCooc'
where
(ti, _) = createIndices myCooc
myCooc' = toIndex ti myCooc
data PartitionMethod = Louvain | Spinglass
......@@ -70,17 +76,33 @@ cooc2graphWith' :: ToComId a
-> HashMap (NgramsTerm, NgramsTerm) Int
-> IO Graph
cooc2graphWith' doPartitions distance threshold myCooc = do
printDebug "cooc2graph" distance
let
-- TODO remove below
theMatrix = Map.fromList $ HashMap.toList myCooc
(ti, _) = createIndices theMatrix
myCooc' = toIndex ti theMatrix
matCooc = map2mat 0 (Map.size ti)
$ Map.filterWithKey (\(a,b) _ -> a /= b)
$ Map.filter (> 1) myCooc'
distanceMat = measure distance matCooc
distanceMap = Map.filter (> threshold) $ mat2map distanceMat
matCooc = case distance of -- Shape of the Matrix
Conditional -> map2mat Triangular 0 (Map.size ti)
Distributional -> map2mat Square 0 (Map.size ti)
$ case distance of -- Removing the Diagonal ?
Conditional -> Map.filterWithKey (\(a,b) _ -> a /= b)
Distributional -> identity
$ Map.filter (>1) myCooc'
printDebug "myCooc'" myCooc'
printDebug "ti" (Map.size ti)
let
similarities = measure distance matCooc
printDebug "Similarities" similarities
let
distanceMap = case distance of
Conditional -> Map.filter (> threshold)
Distributional -> Map.filter (> 0)
$ mat2map similarities
nodesApprox :: Int
nodesApprox = n'
......@@ -89,18 +111,15 @@ cooc2graphWith' doPartitions distance threshold myCooc = do
n' = Set.size $ Set.fromList $ as <> bs
ClustersParams rivers _level = clustersParams nodesApprox
printDebug "Start" ("partitions" :: Text)
partitions <- if (Map.size distanceMap > 0)
-- then iLouvainMap 100 10 distanceMap
-- then hLouvain distanceMap
then doPartitions distanceMap
else panic "Text.Flow: DistanceMap is empty"
printDebug "End" ("partitions" :: Text)
let
-- bridgeness' = distanceMap
bridgeness' = trace ("Rivers: " <> show rivers)
$ bridgeness rivers partitions distanceMap
confluence' = confluence (Map.keys bridgeness') 3 True False
pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti)
......@@ -230,4 +249,14 @@ layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
--p = Layout.defaultLGL
p = Layout.kamadaKawai
g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
-----------------------------------------------------------------------------
-----------------------------------------------------------------------------
-----------------------------------------------------------------------------
-- Debug
{-
-- measure logDistributional
dataDebug = map2mat Square (0::Int) 19 dataBug'
dataBug' :: Map (Int, Int) Int
dataBug' = Map.fromList [((0,0),28),((0,1),8),((0,2),6),((0,3),2),((0,5),4),((0,6),4),((0,7),2),((0,9),7),((0,10),4),((0,13),4),((0,14),2),((0,15),5),((0,16),8),((0,17),3),((1,1),28),((1,2),6),((1,3),7),((1,4),5),((1,5),7),((1,6),5),((1,7),2),((1,9),6),((1,10),7),((1,11),5),((1,13),6),((1,15),6),((1,16),14),((1,18),4),((2,2),39),((2,3),5),((2,4),4),((2,5),3),((2,6),4),((2,7),4),((2,8),3),((2,9),17),((2,10),4),((2,11),8),((2,12),2),((2,13),15),((2,14),4),((2,15),5),((2,16),21),((2,18),4),((3,3),48),((3,4),10),((3,5),7),((3,6),3),((3,7),7),((3,8),6),((3,9),12),((3,10),9),((3,11),8),((3,12),5),((3,13),15),((3,14),5),((3,15),9),((3,16),17),((3,18),4),((4,4),33),((4,5),2),((4,6),5),((4,7),7),((4,8),4),((4,9),6),((4,10),12),((4,11),8),((4,12),3),((4,13),16),((4,14),4),((4,15),4),((4,16),5),((4,17),2),((4,18),12),((5,5),27),((5,6),2),((5,8),3),((5,9),12),((5,10),6),((5,11),9),((5,13),4),((5,14),2),((5,15),7),((5,16),11),((5,18),4),((6,6),34),((6,7),4),((6,8),3),((6,9),12),((6,10),8),((6,11),2),((6,12),5),((6,13),6),((6,14),6),((6,15),5),((6,16),22),((6,17),8),((6,18),4),((7,7),27),((7,8),2),((7,9),6),((7,10),2),((7,11),4),((7,13),13),((7,15),2),((7,16),8),((7,17),6),((7,18),4),((8,8),30),((8,9),9),((8,10),6),((8,11),9),((8,12),6),((8,13),3),((8,14),3),((8,15),4),((8,16),15),((8,17),3),((8,18),5),((9,9),69),((9,10),9),((9,11),22),((9,12),15),((9,13),18),((9,14),10),((9,15),14),((9,16),48),((9,17),6),((9,18),9),((10,10),39),((10,11),15),((10,12),5),((10,13),11),((10,14),2),((10,15),4),((10,16),19),((10,17),3),((10,18),11),((11,11),48),((11,12),9),((11,13),20),((11,14),2),((11,15),13),((11,16),29),((11,18),13),((12,12),30),((12,13),4),((12,15),5),((12,16),16),((12,17),6),((12,18),2),((13,13),65),((13,14),10),((13,15),14),((13,16),23),((13,17),6),((13,18),10),((14,14),25),((14,16),9),((14,17),3),((14,18),3),((15,15),38),((15,16),17),((15,18),4),((16,16),99),((16,17),11),((16,18),14),((17,17),29),((18,18),23)]
-}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment