Commit 32c41543 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] from CSV file added

parent 5843d84c
......@@ -4,10 +4,13 @@ module Main where
import Data.Set as Set
import Data.String.Conversions (cs)
import System.Environment (getArgs)
import Data.Text (Text)
import Graph.BAC.Clustering
import Graph.BAC.ProxemyOptim
import Graph.BAC.ProxemyOptim
import Graph.FGL
import Graph.Tools.Import
import Prelude (String)
import Protolude
import qualified Data.IntMap as Dict
......@@ -15,15 +18,16 @@ import qualified Data.List as List
import qualified Eigen.Matrix as Matrix
import qualified Eigen.SparseMatrix as SMatrix
import qualified Prelude as Prelude
import qualified Data.Graph.Inductive.PatriciaTree as DGIP
main :: IO ()
main = do
m <- randomAdjacency
fp <- getArgs
(m,g) <- case fp of
[] -> getUnlabGraph Random
[fp'] -> getUnlabGraph (WithFile fp')
let
g :: Graph () ()
g = mkGraphUfromEdges
$ List.map (\(x,y,_) -> (x,y))
$ SMatrix.toList m
result = withG g (identity . (clusteringOptim 3 Conf))
result = withG g (parts . (clusteringOptim 3 Conf))
print (result)
{-# LANGUAGE NoImplicitPrelude #-}
module Main2 where
import Graph.Tools
import Graph.BAC.Clustering
import System.Environment (getArgs)
import Protolude
main :: IO ()
main = do
[fp] <- getArgs
graph <- readFileGraph TestGraph fp
print (clusterGraph 3 graph)
This diff is collapsed.
......@@ -4,7 +4,7 @@ cabal-version: 1.12
--
-- see: https://github.com/sol/hpack
--
-- hash: 4cecc6b992a2dda8a33d66136d8ffcda1eb4fcfed9fb2a21df529821278d524d
-- hash: c963b0693d31715c24d7005b13eb4c408cd00c23009b3e68b6dbf329a47db200
name: gargantext-graph
version: 0.1.0.0
......@@ -37,7 +37,10 @@ library
Graph.BAC.Types
Graph.FGL
Graph.IGraph
Graph.Tools
Graph.Tools.CSV
Graph.Tools.Import
Graph.Tools.Random
Graph.Types
other-modules:
Paths_gargantext_graph
hs-source-dirs:
......@@ -68,12 +71,11 @@ library
executable gargantext-graph-exe
main-is: Main.hs
other-modules:
Main2
Paths_gargantext_graph
hs-source-dirs:
app
default-extensions: DataKinds FlexibleInstances OverloadedStrings
ghc-options: -O2 -threaded -rtsopts -with-rtsopts=-N -fprof-auto
ghc-options: -O2 -threaded -rtsopts -with-rtsopts=-N -fprof-auto -Wmissing-signatures -Wcompat
build-depends:
accelerate
, accelerate-arithmetic
......
......@@ -58,8 +58,8 @@ executables:
- -rtsopts
- -with-rtsopts=-N
- -fprof-auto
# - -Wmissing-signatures
# - -Wcompat
- -Wmissing-signatures
- -Wcompat
dependencies:
- gargantext-graph
......
#!/bin/bash
#~/.local/bin/gargantext-graph-exe $1 +RTS -p # -sstderr
time ~/.local/bin/gargantext-graph-exe +RTS -p # -sstderr
~/.local/bin/gargantext-graph-exe $1 +RTS -p # -sstderr
#time ~/.local/bin/gargantext-graph-exe +RTS -p # -sstderr
......@@ -44,6 +44,7 @@ import Eigen.Matrix (sum)
import Eigen.SparseMatrix (SparseMatrix, SparseMatrixXd, (!), toMatrix)
import GHC.TypeLits (KnownNat, Nat, SomeNat(SomeNat), type(+), natVal, sameNat, someNatVal)
import Graph.FGL
import Graph.Types
import Prelude (String, readLn)
import Protolude hiding (sum, natVal)
import qualified Eigen.Matrix as DenseMatrix
......@@ -91,7 +92,6 @@ type Length = Int
-- | A finite Graph is a Graph whose number of Nodes is a KnownNat
data FiniteGraph (n :: Nat) a b = FiniteGraph (Graph a b)
type Graph a b = DGI.Gr a b
data Clustering a = ClusteringIs { parts :: !(Dict (Set a))
, index :: !(Dict Int)
......@@ -124,13 +124,9 @@ type ProxemyMatrix n = MatrixD n
type ConfluenceMatrix n = MatrixD n
type ModularityMatrix n = MatrixD n
type MatrixD n = DenseMatrix.Matrix n n Double
type MatrixS n = SparseMatrix n n Double
data SimilarityMatrix n = SimConf !(ConfluenceMatrix n)
| SimMod !(ModularityMatrix n)
type Dict = IntMap
-----
adjacent :: KnownNat n
......@@ -445,25 +441,4 @@ sumWith dir f m = VS.fromList
somme m' = map (sum . SMatrix.toMatrix) m'
--------------------------------------------
-- | Random Matrix && Graph
-- TODO random matrix of any size for the tests
randomMatrix :: IO (MatrixD 100)
randomMatrix = DMatrix.random
{-
matrix2graph :: forall n
. (KnownNat n)
=> MatrixS n
-> FiniteGraph n () ()
matrix2graph m = withG (mkGraphUfromEdges $ map (\(x,y,_) -> (x,y)) $ SMatrix.toList m)
identity
-}
randomAdjacency :: IO (MatrixS 100)
randomAdjacency = do
m1 <- randomMatrix
m2 <- randomMatrix
pure $ SMatrix.fromMatrix
$ DMatrix.imap (\i j v -> if i < j && v > 0.9 then 1 else 0)
$ DMatrix.mul m1 m2
{-| Module : Gargantext.Core.Viz.Graph.DGI
{-| Module : Graph.FGL
Description : DGI main functions used in Garg
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -6,7 +6,9 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Main DGI funs/types to ease portability with IGraph.
This is supposed to be the main Graph funs/types of the library.
Includes some tools to ease portability with IGraph.
-}
......@@ -17,49 +19,50 @@ Main DGI funs/types to ease portability with IGraph.
module Graph.FGL where
import Protolude
import Graph.Tools (uniq)
import Graph.Types
import Graph.Tools.CSV (uniq)
import qualified Data.List as List
import qualified Data.IntMap as IntMap
import qualified Data.IntMap as Dict
import qualified Data.Set as Set
import qualified Data.Graph.Inductive as Graph
import qualified Data.Graph.Inductive as DGI
import qualified Data.Graph.Inductive.PatriciaTree as DGIP
import qualified Data.Map as Map
------------------------------------------------------------------
------------------------------------------------------------------
-- | Main Types
type Node = DGI.Node -- Int
type Edge = DGI.Edge -- (Int, Int)
type Node = Graph.Node -- Int
type Edge = Graph.Edge -- (Int, Int)
------------------------------------------------------------------
-- | Main Functions
mkGraph :: [Node] -> [Edge] -> DGIP.Gr () ()
mkGraph = DGI.mkUGraph
mkGraph :: [Graph.Node] -> [Graph.Edge] -> Graph () ()
mkGraph = Graph.mkUGraph
neighbors :: DGIP.Gr a b -> Node -> [Node]
neighbors = DGI.neighbors
neighbors :: Graph a b -> Graph.Node -> [Graph.Node]
neighbors = Graph.neighbors
-- TODO optimize
edges :: DGI.DynGraph gr => gr a b -> [Edge]
edges g = uniq $ DGI.edges g
edges :: Graph.DynGraph gr => gr a b -> [Graph.Edge]
edges g = uniq $ Graph.edges g
nodes :: DGIP.Gr a b -> [Node]
nodes = DGI.nodes
nodes :: Graph a b -> [Graph.Node]
nodes = Graph.nodes
------------------------------------------------------------------------
-- | Graph Tools
filterNeighbors :: DGIP.Gr a b -> Node -> [Node]
filterNeighbors :: Graph a b -> Graph.Node -> [Graph.Node]
filterNeighbors g n = List.nub $ neighbors g n
-- Q: why not D.G.I.deg ? (Int as result)
degree :: (DGI.DynGraph gr, Num weight) => gr a weight -> Node -> weight
degree :: (Graph.DynGraph gr, Num weight) => gr a weight -> Node -> weight
degree g n = sum
$ IntMap.elems
$ IntMap.fromListWith (+)
$ Dict.elems
$ Dict.fromListWith (+)
$ map swap
$ DGI.lneighbors g n
$ Graph.lneighbors g n
vcount :: DGIP.Gr a b -> Double
vcount :: Graph a b -> Double
vcount = fromIntegral . Set.size . Set.fromList . nodes
......@@ -73,29 +76,29 @@ ecount = sum
-- | HyperGraph
{- TODO ?
fromHypergraph :: IntMap (Set Node) -> DGI.Gr a Int -> DGI.Gr () ()
fromHypergraph :: Dict (Set Node) -> DGI.Gr a Int -> DGI.Gr () ()
fromHypergraph = undefined
-}
------------------------------------------------------------------------
toHypergraph :: (Num b, Ord b, Fractional b, Monoid a, Ord a)
=> IntMap (Set Node)
=> Dict (Set Node)
-> DGIP.Gr a b
-> DGIP.Gr a b
toHypergraph m g = toHypergraph' m g fromIntegral
toHypergraph' :: (Num b, Ord b, Fractional b, Monoid a, Ord a)
=> IntMap (Set Node)
-> DGIP.Gr a b
=> Dict (Set Node)
-> Graph a b
-> (Int -> b)
-> DGIP.Gr a b
toHypergraph' m g f = DGI.mkGraph ns es
-> Graph a b
toHypergraph' m g f = Graph.mkGraph ns es
where
vs = IntMap.toList m
vs = Dict.toList m
ns = map (\(n,ns) -> (n, List.foldl' (\m l -> m <> l) mempty
$ catMaybes
$ Set.toList
$ Set.map (DGI.lab g) ns
$ Set.map (Graph.lab g) ns
)
) vs
es = selfNodes <> nbors
......@@ -111,56 +114,61 @@ toHypergraph' m g f = DGI.mkGraph ns es
type ClusterId = Int
weightedNeighbors :: (Num b, Ord b)
=> IntMap ClusterId
-> DGIP.Gr a b
=> Dict ClusterId
-> Graph a b
-> (Node, Set Node)
-> [(Node,Node,b)]
weightedNeighbors m g (n,ns) = catMaybes
$ map (\(n',w) -> (,,) <$> Just n <*> IntMap.lookup n' m <*> Just w)
$ IntMap.toList
$ map (\(n',w) -> (,,) <$> Just n <*> Dict.lookup n' m <*> Just w)
$ Dict.toList
$ hasNeighbors g ns
{- TODO inverse
where
withEdge x y w = (,,) <$> Just n <*> IntMap.lookup n' m <*> Just w)
withEdge x y w = (,,) <$> Just n <*> Dict.lookup n' m <*> Just w)
-}
invIntMap :: IntMap (Set Node) -> IntMap ClusterId
invIntMap m = IntMap.fromList
invIntMap :: Dict (Set Node) -> Dict ClusterId
invIntMap m = Dict.fromList
$ List.concat
$ map swp
$ IntMap.toList m
$ Dict.toList m
where
swp (n, ns) = List.zip (Set.toList ns) (cycle [n])
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
class HasNeighbors a where
hasNeighbors :: (Num b, Ord b, Ord a)
=> DGIP.Gr a' b -> a -> IntMap b
=> Graph a' b -> a -> Dict b
instance HasNeighbors Node where
hasNeighbors g n = IntMap.fromListWith (+)
hasNeighbors g n = Dict.fromListWith (+)
$ map swap
$ DGI.lneighbors g n
$ Graph.lneighbors g n
instance HasNeighbors (Set Node) where
hasNeighbors g s0 = Set.foldl' (\m k -> IntMap.delete k m) (intMap g s0) s0
where
intMap :: (Num b, Ord b, HasNeighbors c, Ord c)
=> DGIP.Gr a b -> Set c -> IntMap b
intMap g s = IntMap.unionsWith (+)
$ Set.map (hasNeighbors g) s
hasNeighbors g s0 = Set.foldl' (\m k -> Dict.delete k m) (toDict g s0) s0
toDict :: (Num b, Ord b, HasNeighbors c, Ord c)
=> DGIP.Gr a b -> Set c -> Dict b
toDict g s = Dict.unionsWith (+)
$ Set.map (hasNeighbors g) s
------------------------------------------------------------------------
-- | Tools
hasWeight :: DGIP.Gr a b -> Set Node -> (Int -> b) -> b
hasWeight :: Graph a b -> Set Node -> (Int -> b) -> b
hasWeight g ns f = f
$ List.length
$ edges
$ DGI.subgraph (Set.toList ns) g
$ Graph.subgraph (Set.toList ns) g
------------------------------------------------------------------------
------------------------------------------------------------------------
mkGraphUfromEdges :: [(Int, Int)] -> DGIP.Gr () ()
mkGraphUfromEdges :: [(Int, Int)] -> Graph () ()
mkGraphUfromEdges es = mkGraph ns es
where
ns = List.nub (a <> b)
......@@ -168,7 +176,7 @@ mkGraphUfromEdges es = mkGraph ns es
(a, b) = List.unzip es
mkLGraphfromEdges :: [(Int, Int, Double)] -> DGIP.Gr () Double
mkLGraphfromEdges :: [(Int, Int, Double)] -> Graph () Double
mkLGraphfromEdges es = DGI.mkGraph ns es
where
ns = List.zip (List.nub (a <> b)) (List.cycle [()])
......
{-| Module : Graph.Tools
Description :
Copyright : (c) CNRS, Alexandre Delanoë
License : AGPL + CECILL v3
Maintainer : alexandre+dev@delanoe.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE NoImplicitPrelude #-}
module Graph.Tools where
import Data.Csv
import Data.Text (pack, splitOn, unpack)
import Data.Vector hiding (map, uniq)
import Prelude (read)
import Protolude
import qualified Data.Map as Map
import qualified Data.ByteString.Lazy as BL
import qualified Data.Graph.Inductive as DGI
import qualified Data.Graph.Inductive.PatriciaTree as DGIP
import qualified Data.List as List
import qualified Data.Set as Set
import qualified Data.Vector as Vector
------------------------------------------------------------------------
uniq :: [DGI.Edge] -> [DGI.Edge]
uniq ns = Set.toList
$ Set.fromList
$ map (\(n1,n2)
-> if n1 <= n2
then (n1,n2)
else (n2,n1)
) ns
uniq' :: [(Int,Int,Double)] -> [(Int,Int,Double)]
uniq' ns = map (\(n1,n2) -> (n1,n2,1))
$ Map.keys
$ Map.fromListWith (+)
$ map (\(n1,n2,w)
-> if n1 <= n2
then ((n1,n2), w)
else ((n2,n1), w)
) ns
------------------------------------------------------------------------
data FileGraph = CillexGraph | TestGraph
readFileGraph :: FileGraph -> FilePath -> IO (DGIP.Gr [Text] Double)
readFileGraph CillexGraph fp = toGraph <$> snd <$> readFileCsvCillex fp
readFileGraph TestGraph fp = toGraph <$> snd <$> readFileCsvTest fp
toGraph :: ToNode a => Vector a -> (DGIP.Gr [Text] Double)
toGraph vs = DGI.mkGraph ns (uniq' $ List.concat es)
where
(ns,es) = List.unzip
$ Vector.toList
$ Vector.map toNode vs
------------------------------------------------------------------------
class ToNode a where
toNode :: a -> ((Int, [Text]), [(Int,Int,Double)])
instance ToNode CillexCsv
where
toNode :: CillexCsv -> ((Int, [Text]), [(Int,Int,Double)])
toNode (CillexCsv n v _ _ l) = (ln, es)
where
ln = (n, [l])
es = List.zip3 (cycle [n]) ns (cycle [1])
ns = map (read . unpack) $ splitOn "," v
instance ToNode TestCsv
where
toNode :: TestCsv -> ((Int, [Text]), [(Int,Int,Double)])
toNode (TestCsv n v l) = (ln, es)
where
ln = (n, map show [l])
es = List.zip3 (cycle [n]) ns (cycle [1])
ns = map (read . unpack) $ splitOn "," v
------------------------------------------------------------------------
readFileCsvCillex :: FilePath -> IO (Header, Vector CillexCsv)
readFileCsvCillex = fmap readWith . BL.readFile
readFileCsvTest :: FilePath -> IO (Header, Vector TestCsv)
readFileCsvTest = fmap readWith . BL.readFile
csvDecodeOptions :: DecodeOptions
csvDecodeOptions = defaultDecodeOptions {decDelimiter = delimiter}
where
delimiter :: Word8
delimiter = fromIntegral $ ord ','
readWith :: FromNamedRecord a => BL.ByteString -> (Header, Vector a)
readWith bs = case decodeByNameWith csvDecodeOptions bs of
Left e -> panic (pack e)
Right csvDocs -> csvDocs
------------------------------------------------------------------------
data TestCsv =
TestCsv { rc_node :: Int
, rc_voisins :: Text
, rc_id :: Double
}
deriving (Show)
instance FromNamedRecord TestCsv where
parseNamedRecord r = TestCsv <$> r .: "@node: #id_pdg"
<*> r .: "%+voisins"
<*> r .: "label"
------------------------------------------------------------------------
data CillexCsv =
CillexCsv { node :: Int
, voisins :: Text
, id :: Double
, lemme :: Text
, label :: Text
}
deriving (Show)
instance FromNamedRecord CillexCsv where
parseNamedRecord r = CillexCsv <$> r .: "@node: #id_pdg"
<*> r .: "%+voisins"
<*> r .: "id"
<*> r .: "lemme"
<*> r .: "label"
{-
cillexCsvHeader =
header [ "@node: #id_pdg"
, "%+voisins"
, "id"
, "lemme"
, "label"
]
-}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment