Commit baab1af6 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[SnapCSV] parser

parent 367e6b2c
......@@ -15,8 +15,8 @@ import Data.Text (Text)
import Data.Vector.Unboxed (Vector)
import Graph.BAC.Clustering
import Graph.BAC.ProxemyOptim
import Graph.BAC.ProxemyOptim
import Graph.FGL
import Graph.Tools.CSV
import Graph.Tools.Import
import Graph.Types
import Prelude (String)
......@@ -41,7 +41,7 @@ import qualified Data.IntSet as IntSet
import qualified Data.Text as Text
setupEnv :: FilePath -> IO (Graph [Text] Double)
setupEnv fp = getGraph (WithFile fp)
setupEnv fp = getGraph SnapGraph (WithFile fp)
main :: IO ()
main = do
......@@ -58,8 +58,9 @@ main = do
clusts' = Prelude.map (sort . Prelude.map (lkp dico) . IntSet.toList)
$ sortBy (\a b -> flipOrd $ comparing IntSet.size a b)
$ IntMap.elems clusts
putStrLn $ "#clusters: " ++ show (IntMap.size clusts)
putStrLn $ "#clusters: " ++ show (IntMap.size clusts)
putStrLn $ "max cluster size: " ++ show (length (clusts' Prelude.!! 0))
putStrLn $ "min cluster size: " ++ show (length (Prelude.last clusts'))
putStrLn $ "Clustering score: " ++ show score
withFile fpout WriteMode $ \hndl ->
forM_ clusts' $ \clust ->
......
#!/bin/bash
cat urls.txt | while read url; do lynx -dump $url | grep -oP "http.*gz" | while read gz; do wget $gz; done; done
https://snap.stanford.edu/data/com-DBLP.html
https://snap.stanford.edu/data/com-Amazon.html
#!/bin/bash
~/.local/bin/gargantext-graph-exe $1 +RTS -p # -sstderr
#~/.local/bin/gargantext-graph-exe $1 +RTS -p # -sstderr
#time ~/.local/bin/gargantext-graph-exe +RTS -p # -sstderr
INPUT=$1
OUTPUT1=${1%csv}out1
OUTPUT2=${1%csv}out2
touch $OUTPUT1
touch $OUTPUT2
#for beta in -0.6 -0.5 -0.3 -0.2 -0.1 0.0 0.3 0.4 0.5 0.6 0.7; do
for beta in 0.0 ; do
for gc in "gc" "nogc"; do
echo "$gc,$beta" >> $OUTPUT1 ;
echo "$gc,$beta" >> $OUTPUT2 ;
time ~/.local/bin/gargantext-graph-exe $1 $OUTPUT2 $gc $beta >> $OUTPUT1
done
done
......@@ -11,7 +11,7 @@ Portability : POSIX
publication it will be integrated to the backend with usual license --
see above)
Article: Confluence for Graph Clustering, B. Gaume and A. Delanoë, Journal:TBA
Article: Confluence for Graph Clustering, B. Gaume and A. Delanoë, A. Mestanogullari
Code written in Haskell by A. Delanoë from first Python Specifications by B.
Gaume.
......
......@@ -54,11 +54,12 @@ uniq' ns = map (\(n1,n2) -> (n1,n2,1))
------------------------------------------------------------------------
data FileGraph = CillexGraph | TestGraph
data FileGraph = CillexGraph | TestGraph | SnapGraph
readFileGraph :: FileGraph -> FilePath -> IO (DGIP.Gr [Text] Double)
readFileGraph CillexGraph fp = toGraph <$> snd <$> readFileCsvCillex fp
readFileGraph TestGraph fp = toGraph <$> snd <$> readFileCsvTest fp
readFileGraph SnapGraph fp = toGraph <$> snd <$> readFileSnapCsv fp
toGraph :: ToNode a => Vector a -> (DGIP.Gr [Text] Double)
......@@ -90,6 +91,11 @@ instance ToNode TestCsv
es = List.zip3 (cycle [n]) ns (cycle [1])
ns = map (read . unpack) $ splitOn "," v
instance ToNode SnapCsv
where
toNode :: SnapCsv -> ((Int, [Text]), [(Int,Int,Double)])
toNode (SnapCsv f t) = ((f, []), [(f,t,1)])
toNodeUnlabelled :: TestCsv -> ((Int, ()), [(Int,Int,())])
toNodeUnlabelled (TestCsv n v l) = (ln, es)
where
......@@ -100,22 +106,37 @@ toNodeUnlabelled (TestCsv n v l) = (ln, es)
------------------------------------------------------------------------
readFileCsvCillex :: FilePath -> IO (Header, Vector CillexCsv)
readFileCsvCillex = fmap readWith . BL.readFile
readFileCsvCillex = fmap (readWith ',') . BL.readFile
readFileCsvTest :: FilePath -> IO (Header, Vector TestCsv)
readFileCsvTest = fmap readWith . BL.readFile
readFileCsvTest = fmap (readWith ',') . BL.readFile
csvDecodeOptions :: DecodeOptions
csvDecodeOptions = defaultDecodeOptions {decDelimiter = delimiter}
where
delimiter :: Word8
delimiter = fromIntegral $ ord ','
readFileSnapCsv :: FilePath -> IO (Header, Vector SnapCsv)
readFileSnapCsv = fmap (readWith '\t') . BL.readFile
------------------------------------------------------------------------
readWith :: FromNamedRecord a => BL.ByteString -> (Header, Vector a)
readWith bs = case decodeByNameWith csvDecodeOptions bs of
readWith :: FromNamedRecord a => Char -> BL.ByteString -> (Header, Vector a)
readWith x bs = case decodeByNameWith (csvDecodeOptions x) bs of
Left e -> panic (pack e)
Right csvDocs -> csvDocs
csvDecodeOptions :: Char -> DecodeOptions
csvDecodeOptions x = defaultDecodeOptions {decDelimiter = delimiter}
where
delimiter :: Word8
delimiter = fromIntegral $ ord x
------------------------------------------------------------------------
data SnapCsv =
SnapCsv { sc_fromNodeId :: Int
, sc_toNodeId :: Int
}
deriving (Show)
instance FromNamedRecord SnapCsv where
parseNamedRecord r = SnapCsv <$> r .: "FromNodeId"
<*> r .: "ToNodeId"
------------------------------------------------------------------------
data TestCsv =
TestCsv { rc_node :: Int
......
......@@ -51,21 +51,21 @@ data GraphData a b where
:: { labelledGraph :: Graph [Text] Double } -> GraphData [Text] Double
getGraph' :: GetGraph a b-> IO (GraphData a b)
getGraph' (Random n) = reifyNat (fromIntegral n) $ \(pn :: Proxy n) ->
getGraph' :: FileGraph -> GetGraph a b-> IO (GraphData a b)
getGraph' _ (Random n) = reifyNat (fromIntegral n) $ \(pn :: Proxy n) ->
randomAdjacency @n
>>= \m -> pure $ LightGraph
$ mkGraphUfromEdges
$ List.map (\(x,y,_) -> (x,y))
$ SMatrix.toList m
getGraph' (WithFile fp) = do
g <- readFileGraph CillexGraph fp
getGraph' fg (WithFile fp) = do
g <- readFileGraph fg fp
pure $ LabelledGraph g
getGraph :: GetGraph a b -> IO (Graph a b)
getGraph gg = toGraph' <$> getGraph' gg
getGraph :: FileGraph -> GetGraph a b -> IO (Graph a b)
getGraph fg gg = toGraph' <$> getGraph' fg gg
toGraph' :: GraphData a b -> Graph a b
toGraph' (LightGraph g) = g
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment