Commit baab1af6 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[SnapCSV] parser

parent 367e6b2c
...@@ -15,8 +15,8 @@ import Data.Text (Text) ...@@ -15,8 +15,8 @@ import Data.Text (Text)
import Data.Vector.Unboxed (Vector) import Data.Vector.Unboxed (Vector)
import Graph.BAC.Clustering import Graph.BAC.Clustering
import Graph.BAC.ProxemyOptim import Graph.BAC.ProxemyOptim
import Graph.BAC.ProxemyOptim
import Graph.FGL import Graph.FGL
import Graph.Tools.CSV
import Graph.Tools.Import import Graph.Tools.Import
import Graph.Types import Graph.Types
import Prelude (String) import Prelude (String)
...@@ -41,7 +41,7 @@ import qualified Data.IntSet as IntSet ...@@ -41,7 +41,7 @@ import qualified Data.IntSet as IntSet
import qualified Data.Text as Text import qualified Data.Text as Text
setupEnv :: FilePath -> IO (Graph [Text] Double) setupEnv :: FilePath -> IO (Graph [Text] Double)
setupEnv fp = getGraph (WithFile fp) setupEnv fp = getGraph SnapGraph (WithFile fp)
main :: IO () main :: IO ()
main = do main = do
...@@ -60,6 +60,7 @@ main = do ...@@ -60,6 +60,7 @@ main = do
$ IntMap.elems clusts $ IntMap.elems clusts
putStrLn $ "#clusters: " ++ show (IntMap.size clusts) putStrLn $ "#clusters: " ++ show (IntMap.size clusts)
putStrLn $ "max cluster size: " ++ show (length (clusts' Prelude.!! 0)) putStrLn $ "max cluster size: " ++ show (length (clusts' Prelude.!! 0))
putStrLn $ "min cluster size: " ++ show (length (Prelude.last clusts'))
putStrLn $ "Clustering score: " ++ show score putStrLn $ "Clustering score: " ++ show score
withFile fpout WriteMode $ \hndl -> withFile fpout WriteMode $ \hndl ->
forM_ clusts' $ \clust -> forM_ clusts' $ \clust ->
......
#!/bin/bash
cat urls.txt | while read url; do lynx -dump $url | grep -oP "http.*gz" | while read gz; do wget $gz; done; done
https://snap.stanford.edu/data/com-DBLP.html
https://snap.stanford.edu/data/com-Amazon.html
#!/bin/bash #!/bin/bash
~/.local/bin/gargantext-graph-exe $1 +RTS -p # -sstderr #~/.local/bin/gargantext-graph-exe $1 +RTS -p # -sstderr
#time ~/.local/bin/gargantext-graph-exe +RTS -p # -sstderr #time ~/.local/bin/gargantext-graph-exe +RTS -p # -sstderr
INPUT=$1
OUTPUT1=${1%csv}out1
OUTPUT2=${1%csv}out2
touch $OUTPUT1
touch $OUTPUT2
#for beta in -0.6 -0.5 -0.3 -0.2 -0.1 0.0 0.3 0.4 0.5 0.6 0.7; do
for beta in 0.0 ; do
for gc in "gc" "nogc"; do
echo "$gc,$beta" >> $OUTPUT1 ;
echo "$gc,$beta" >> $OUTPUT2 ;
time ~/.local/bin/gargantext-graph-exe $1 $OUTPUT2 $gc $beta >> $OUTPUT1
done
done
...@@ -11,7 +11,7 @@ Portability : POSIX ...@@ -11,7 +11,7 @@ Portability : POSIX
publication it will be integrated to the backend with usual license -- publication it will be integrated to the backend with usual license --
see above) see above)
Article: Confluence for Graph Clustering, B. Gaume and A. Delanoë, Journal:TBA Article: Confluence for Graph Clustering, B. Gaume and A. Delanoë, A. Mestanogullari
Code written in Haskell by A. Delanoë from first Python Specifications by B. Code written in Haskell by A. Delanoë from first Python Specifications by B.
Gaume. Gaume.
......
...@@ -54,11 +54,12 @@ uniq' ns = map (\(n1,n2) -> (n1,n2,1)) ...@@ -54,11 +54,12 @@ uniq' ns = map (\(n1,n2) -> (n1,n2,1))
------------------------------------------------------------------------ ------------------------------------------------------------------------
data FileGraph = CillexGraph | TestGraph data FileGraph = CillexGraph | TestGraph | SnapGraph
readFileGraph :: FileGraph -> FilePath -> IO (DGIP.Gr [Text] Double) readFileGraph :: FileGraph -> FilePath -> IO (DGIP.Gr [Text] Double)
readFileGraph CillexGraph fp = toGraph <$> snd <$> readFileCsvCillex fp readFileGraph CillexGraph fp = toGraph <$> snd <$> readFileCsvCillex fp
readFileGraph TestGraph fp = toGraph <$> snd <$> readFileCsvTest fp readFileGraph TestGraph fp = toGraph <$> snd <$> readFileCsvTest fp
readFileGraph SnapGraph fp = toGraph <$> snd <$> readFileSnapCsv fp
toGraph :: ToNode a => Vector a -> (DGIP.Gr [Text] Double) toGraph :: ToNode a => Vector a -> (DGIP.Gr [Text] Double)
...@@ -90,6 +91,11 @@ instance ToNode TestCsv ...@@ -90,6 +91,11 @@ instance ToNode TestCsv
es = List.zip3 (cycle [n]) ns (cycle [1]) es = List.zip3 (cycle [n]) ns (cycle [1])
ns = map (read . unpack) $ splitOn "," v ns = map (read . unpack) $ splitOn "," v
instance ToNode SnapCsv
where
toNode :: SnapCsv -> ((Int, [Text]), [(Int,Int,Double)])
toNode (SnapCsv f t) = ((f, []), [(f,t,1)])
toNodeUnlabelled :: TestCsv -> ((Int, ()), [(Int,Int,())]) toNodeUnlabelled :: TestCsv -> ((Int, ()), [(Int,Int,())])
toNodeUnlabelled (TestCsv n v l) = (ln, es) toNodeUnlabelled (TestCsv n v l) = (ln, es)
where where
...@@ -100,22 +106,37 @@ toNodeUnlabelled (TestCsv n v l) = (ln, es) ...@@ -100,22 +106,37 @@ toNodeUnlabelled (TestCsv n v l) = (ln, es)
------------------------------------------------------------------------ ------------------------------------------------------------------------
readFileCsvCillex :: FilePath -> IO (Header, Vector CillexCsv) readFileCsvCillex :: FilePath -> IO (Header, Vector CillexCsv)
readFileCsvCillex = fmap readWith . BL.readFile readFileCsvCillex = fmap (readWith ',') . BL.readFile
readFileCsvTest :: FilePath -> IO (Header, Vector TestCsv) readFileCsvTest :: FilePath -> IO (Header, Vector TestCsv)
readFileCsvTest = fmap readWith . BL.readFile readFileCsvTest = fmap (readWith ',') . BL.readFile
csvDecodeOptions :: DecodeOptions readFileSnapCsv :: FilePath -> IO (Header, Vector SnapCsv)
csvDecodeOptions = defaultDecodeOptions {decDelimiter = delimiter} readFileSnapCsv = fmap (readWith '\t') . BL.readFile
where ------------------------------------------------------------------------
delimiter :: Word8
delimiter = fromIntegral $ ord ','
readWith :: FromNamedRecord a => BL.ByteString -> (Header, Vector a) readWith :: FromNamedRecord a => Char -> BL.ByteString -> (Header, Vector a)
readWith bs = case decodeByNameWith csvDecodeOptions bs of readWith x bs = case decodeByNameWith (csvDecodeOptions x) bs of
Left e -> panic (pack e) Left e -> panic (pack e)
Right csvDocs -> csvDocs Right csvDocs -> csvDocs
csvDecodeOptions :: Char -> DecodeOptions
csvDecodeOptions x = defaultDecodeOptions {decDelimiter = delimiter}
where
delimiter :: Word8
delimiter = fromIntegral $ ord x
------------------------------------------------------------------------
data SnapCsv =
SnapCsv { sc_fromNodeId :: Int
, sc_toNodeId :: Int
}
deriving (Show)
instance FromNamedRecord SnapCsv where
parseNamedRecord r = SnapCsv <$> r .: "FromNodeId"
<*> r .: "ToNodeId"
------------------------------------------------------------------------ ------------------------------------------------------------------------
data TestCsv = data TestCsv =
TestCsv { rc_node :: Int TestCsv { rc_node :: Int
......
...@@ -51,21 +51,21 @@ data GraphData a b where ...@@ -51,21 +51,21 @@ data GraphData a b where
:: { labelledGraph :: Graph [Text] Double } -> GraphData [Text] Double :: { labelledGraph :: Graph [Text] Double } -> GraphData [Text] Double
getGraph' :: GetGraph a b-> IO (GraphData a b) getGraph' :: FileGraph -> GetGraph a b-> IO (GraphData a b)
getGraph' (Random n) = reifyNat (fromIntegral n) $ \(pn :: Proxy n) -> getGraph' _ (Random n) = reifyNat (fromIntegral n) $ \(pn :: Proxy n) ->
randomAdjacency @n randomAdjacency @n
>>= \m -> pure $ LightGraph >>= \m -> pure $ LightGraph
$ mkGraphUfromEdges $ mkGraphUfromEdges
$ List.map (\(x,y,_) -> (x,y)) $ List.map (\(x,y,_) -> (x,y))
$ SMatrix.toList m $ SMatrix.toList m
getGraph' (WithFile fp) = do getGraph' fg (WithFile fp) = do
g <- readFileGraph CillexGraph fp g <- readFileGraph fg fp
pure $ LabelledGraph g pure $ LabelledGraph g
getGraph :: GetGraph a b -> IO (Graph a b) getGraph :: FileGraph -> GetGraph a b -> IO (Graph a b)
getGraph gg = toGraph' <$> getGraph' gg getGraph fg gg = toGraph' <$> getGraph' fg gg
toGraph' :: GraphData a b -> Graph a b toGraph' :: GraphData a b -> Graph a b
toGraph' (LightGraph g) = g toGraph' (LightGraph g) = g
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment