Commit fc1084bf authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 70-dev-searx-parser

parents 966c3ed0 23a64f73
## Version 0.0.4.9.5
* [FEAT] Order 2 fixed with filtered edges
## Version 0.0.4.9.4
* [FEAT] Order 1 similarity validated and optimized
## Version 0.0.4.9.3
* [FIX] Node Calc import + more flexible delimiter for CSV parser
## Version 0.0.4.9.2
* [FEAT] Node Calc Parsing added (in tests)
## Version 0.0.4.9.1 ## Version 0.0.4.9.1
* [FIX] Graph Screenshot * [FIX] Graph Screenshot
......
name: gargantext name: gargantext
version: '0.0.4.9.1' version: '0.0.4.9.5'
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
......
...@@ -11,6 +11,7 @@ module Gargantext.API.Ngrams.Types where ...@@ -11,6 +11,7 @@ module Gargantext.API.Ngrams.Types where
import Codec.Serialise (Serialise()) import Codec.Serialise (Serialise())
import Control.Category ((>>>)) import Control.Category ((>>>))
import Control.DeepSeq (NFData)
import Control.Lens (makeLenses, makePrisms, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^?), (%~), (.~), (%=), at, _Just, Each(..), itraverse_, both, forOf_, (?~)) import Control.Lens (makeLenses, makePrisms, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^?), (%~), (.~), (%=), at, _Just, Each(..), itraverse_, both, forOf_, (?~))
import Control.Monad.State import Control.Monad.State
import Data.Aeson hiding ((.=)) import Data.Aeson hiding ((.=))
...@@ -121,7 +122,7 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where ...@@ -121,7 +122,7 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
------------------------------------------------------------------------ ------------------------------------------------------------------------
newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text } newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text }
deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable) deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable, NFData)
instance IsHashable NgramsTerm where instance IsHashable NgramsTerm where
hash (NgramsTerm t) = hash t hash (NgramsTerm t) = hash t
......
...@@ -80,6 +80,7 @@ frameCalcUploadAsync uId nId _f logStatus jobLog = do ...@@ -80,6 +80,7 @@ frameCalcUploadAsync uId nId _f logStatus jobLog = do
httpLbs req manager httpLbs req manager
let body = T.pack $ BSU8.toString $ BSL.toStrict $ responseBody res let body = T.pack $ BSU8.toString $ BSL.toStrict $ responseBody res
-- printDebug "body" body
mCId <- getClosestParentIdByType nId NodeCorpus mCId <- getClosestParentIdByType nId NodeCorpus
-- printDebug "[frameCalcUploadAsync] mCId" mCId -- printDebug "[frameCalcUploadAsync] mCId" mCId
......
...@@ -123,7 +123,7 @@ readCSVFile_Annuaire' :: FilePath -> IO (Header, Vector IMTUser) ...@@ -123,7 +123,7 @@ readCSVFile_Annuaire' :: FilePath -> IO (Header, Vector IMTUser)
readCSVFile_Annuaire' = fmap readCsvHalLazyBS' . BL.readFile readCSVFile_Annuaire' = fmap readCsvHalLazyBS' . BL.readFile
where where
readCsvHalLazyBS' :: BL.ByteString -> (Header, Vector IMTUser) readCsvHalLazyBS' :: BL.ByteString -> (Header, Vector IMTUser)
readCsvHalLazyBS' bs = case decodeByNameWith csvDecodeOptions bs of readCsvHalLazyBS' bs = case decodeByNameWith (csvDecodeOptions Tab) bs of
Left e -> panic (cs e) Left e -> panic (cs e)
Right rows -> rows Right rows -> rows
......
...@@ -15,108 +15,44 @@ Motivation and definition of the @Conditional@ distance. ...@@ -15,108 +15,44 @@ Motivation and definition of the @Conditional@ distance.
module Gargantext.Core.Methods.Distances.Conditional module Gargantext.Core.Methods.Distances.Conditional
where where
import Data.List (sortOn) import Control.DeepSeq (NFData)
import Data.Map (Map) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.Matrix hiding (identity) import Data.Hashable (Hashable)
import Gargantext.Core.Viz.Graph.Utils import Data.List (unzip)
import Data.Maybe (catMaybes)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Map as M import Gargantext.Core.Viz.Graph.Utils (getMax)
import qualified Data.Set as S import qualified Data.HashMap.Strict as Map
import qualified Data.Vector as V import qualified Data.Set as Set
------------------------------------------------------------------------
-- | Optimisation issue
toBeOptimized :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
toBeOptimized m = proba Col m
type HashMap = Map.HashMap
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Main Functions -- First version as first implementation
-- Compute the probability from axis -- - qualitatively verified
-- x' = x / (sum Col x) -- - parallized as main optimization
proba :: (Num a, Fractional a) => Axis -> Matrix a -> Matrix a conditional :: (Ord a, Hashable a, NFData a)
proba a m = mapOn a (\c x -> x / V.sum (axis a c m)) m => HashMap (a,a) Int
-> HashMap (a,a) Double
conditional m' = Map.fromList $ ((catMaybes results') `using` parList rdeepseq)
mapOn :: Axis -> (AxisId -> a -> a) -> Matrix a -> Matrix a
mapOn a f m = V.foldl' f' m (V.enumFromTo 1 (nOf a m))
where where
f' m' c = mapOnly a f c m' results' = [ let
ij = (/) <$> Map.lookup (i,j) m <*> Map.lookup (i,i) m
ji = (/) <$> Map.lookup (j,i) m <*> Map.lookup (j,j) m
in getMax (i,j) ij ji
mapOnly :: Axis -> (AxisId -> a -> a) -> AxisId -> Matrix a -> Matrix a | i <- keys
mapOnly Col = mapCol , j <- keys
mapOnly Row = mapRow , i < j
]
-- Converting from Int to Double
m = Map.map fromIntegral m'
mapAll :: (a -> a) -> Matrix a -> Matrix a -- Get the matrix coordinates, removing duplicates
mapAll f m = mapOn Col (\_ -> f) m keys = Set.toList $ Set.fromList (x <> y)
(x,y) = unzip $ Map.keys m
---------------------------------------------------------------
-- | Compute a distance from axis
-- xs = (sum Col x') - x'
distFromSum :: (Num a, Fractional a)
=> Axis -> Matrix a -> Matrix a
distFromSum a m = mapOn a (\c x -> V.sum (axis a c m) - x) m
---------------------------------------------------------------
---------------------------------------------------------------
-- | To compute included/excluded or specific/generic scores
opWith :: (Fractional a1, Num a1)
=> (Matrix a2 -> t -> Matrix a1) -> Matrix a2 -> t -> Matrix a1
opWith op xs ys = mapAll (\x -> x / (2*n -1)) (xs `op` ys)
where
n = fromIntegral $ nOf Col xs
---------------------------------------------------------------
-------------------------------------------------------
conditional :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
conditional m = filterMat (threshold m') m'
where
------------------------------------------------------------------------
-- | Main Operations
-- x' = x / (sum Col x)
x' = proba Col m
------------------------------------------------------------------------
-- xs = (sum Col x') - x'
xs = distFromSum Col x'
-- ys = (sum Row x') - x'
ys = distFromSum Row x'
------------------------------------------------------------------------
-- | Top included or excluded
ie = opWith (+) xs ys
-- ie = ( xs + ys) / (2 * (x.shape[0] - 1))
-- | Top specific or generic
sg = opWith (-) xs ys
-- sg = ( xs - ys) / (2 * (x.shape[0] - 1))
nodes_kept :: [Int]
nodes_kept = take k' $ S.toList
$ foldl' (\s (n1,n2) -> insert [n1,n2] s) S.empty
$ map fst
$ nodes_included k <> nodes_specific k
nodes_included n = take n $ sortOn snd $ toListsWithIndex ie
nodes_specific n = take n $ sortOn snd $ toListsWithIndex sg
insert as s = foldl' (\s' a -> S.insert a s') s as
k' = 2*k
k = 10
dico_nodes :: Map Int Int
dico_nodes = M.fromList $ zip ([1..] :: [Int]) nodes_kept
--dico_nodes_rev = M.fromList $ zip nodes_kept [1..]
m' = matrix (length nodes_kept)
(length nodes_kept)
(\(i,j) -> getElem ((M.!) dico_nodes i) ((M.!) dico_nodes j) x')
threshold m'' = V.minimum
$ V.map (\cId -> V.maximum $ getCol cId m'')
(V.enumFromTo 1 (nOf Col m'') )
filterMat t m'' = mapAll (\x -> filter' t x) m''
where
filter' t' x = case (x >= t') of
True -> x
False -> 0
------------------------------------------------------------------------
...@@ -218,14 +218,17 @@ hyperdataDocument2csvDoc h = CsvDoc { csv_title = m $ _hd_title h ...@@ -218,14 +218,17 @@ hyperdataDocument2csvDoc h = CsvDoc { csv_title = m $ _hd_title h
mI = maybe 0 identity mI = maybe 0 identity
csvDecodeOptions :: DecodeOptions data Delimiter = Tab | Comma
csvDecodeOptions = defaultDecodeOptions {decDelimiter = delimiter}
csvEncodeOptions :: EncodeOptions csvDecodeOptions :: Delimiter -> DecodeOptions
csvEncodeOptions = defaultEncodeOptions {encDelimiter = delimiter} csvDecodeOptions d = defaultDecodeOptions {decDelimiter = delimiter d}
delimiter :: Word8 csvEncodeOptions :: Delimiter -> EncodeOptions
delimiter = fromIntegral $ ord '\t' csvEncodeOptions d = defaultEncodeOptions {encDelimiter = delimiter d}
delimiter :: Delimiter -> Word8
delimiter Tab = fromIntegral $ ord '\t'
delimiter Comma = fromIntegral $ ord ','
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
readCsvOn' :: [CsvDoc -> Text] -> FilePath -> IO (Either Prelude.String [Text]) readCsvOn' :: [CsvDoc -> Text] -> FilePath -> IO (Either Prelude.String [Text])
...@@ -237,27 +240,44 @@ readCsvOn' fields fp = do ...@@ -237,27 +240,44 @@ readCsvOn' fields fp = do
------------------------------------------------------------------------ ------------------------------------------------------------------------
readFileLazy :: (FromNamedRecord a) => proxy a -> FilePath -> IO (Either Prelude.String (Header, Vector a)) readFileLazy :: (FromNamedRecord a) => proxy a -> Delimiter -> FilePath -> IO (Either Prelude.String (Header, Vector a))
readFileLazy f = fmap (readByteStringLazy f) . BL.readFile readFileLazy d f = fmap (readByteStringLazy d f) . BL.readFile
readFileStrict :: (FromNamedRecord a) => proxy a -> FilePath -> IO (Either Prelude.String (Header, Vector a)) readFileStrict :: (FromNamedRecord a)
readFileStrict f = fmap (readByteStringStrict f) . BS.readFile => proxy a
-> Delimiter
readByteStringLazy :: (FromNamedRecord a) => proxy a -> BL.ByteString -> Either Prelude.String (Header, Vector a) -> FilePath
readByteStringLazy _f bs = decodeByNameWith csvDecodeOptions bs -> IO (Either Prelude.String (Header, Vector a))
readFileStrict d f = fmap (readByteStringStrict d f) . BS.readFile
readByteStringStrict :: (FromNamedRecord a) => proxy a -> BS.ByteString -> Either Prelude.String (Header, Vector a)
readByteStringStrict ff = (readByteStringLazy ff) . BL.fromStrict readByteStringLazy :: (FromNamedRecord a)
=> proxy a
-> Delimiter
-> BL.ByteString
-> Either Prelude.String (Header, Vector a)
readByteStringLazy _f d bs = decodeByNameWith (csvDecodeOptions d) bs
readByteStringStrict :: (FromNamedRecord a)
=> proxy a
-> Delimiter
-> BS.ByteString
-> Either Prelude.String (Header, Vector a)
readByteStringStrict d ff = (readByteStringLazy d ff) . BL.fromStrict
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO use readFileLazy -- | TODO use readFileLazy
readFile :: FilePath -> IO (Either Prelude.String (Header, Vector CsvDoc)) readFile :: FilePath -> IO (Either Prelude.String (Header, Vector CsvDoc))
readFile = fmap readCsvLazyBS . BL.readFile readFile fp = do
result <- fmap (readCsvLazyBS Comma) $ BL.readFile fp
case result of
Left _err -> fmap (readCsvLazyBS Tab) $ BL.readFile fp
Right res -> pure $ Right res
-- | TODO use readByteStringLazy -- | TODO use readByteStringLazy
readCsvLazyBS :: BL.ByteString -> Either Prelude.String (Header, Vector CsvDoc) readCsvLazyBS :: Delimiter -> BL.ByteString -> Either Prelude.String (Header, Vector CsvDoc)
readCsvLazyBS bs = decodeByNameWith csvDecodeOptions bs readCsvLazyBS d bs = decodeByNameWith (csvDecodeOptions d) bs
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO use readFileLazy -- | TODO use readFileLazy
...@@ -266,7 +286,7 @@ readCsvHal = fmap readCsvHalLazyBS . BL.readFile ...@@ -266,7 +286,7 @@ readCsvHal = fmap readCsvHalLazyBS . BL.readFile
-- | TODO use readByteStringLazy -- | TODO use readByteStringLazy
readCsvHalLazyBS :: BL.ByteString -> Either Prelude.String (Header, Vector CsvHal) readCsvHalLazyBS :: BL.ByteString -> Either Prelude.String (Header, Vector CsvHal)
readCsvHalLazyBS bs = decodeByNameWith csvDecodeOptions bs readCsvHalLazyBS bs = decodeByNameWith (csvDecodeOptions Tab) bs
readCsvHalBSStrict :: BS.ByteString -> Either Prelude.String (Header, Vector CsvHal) readCsvHalBSStrict :: BS.ByteString -> Either Prelude.String (Header, Vector CsvHal)
readCsvHalBSStrict = readCsvHalLazyBS . BL.fromStrict readCsvHalBSStrict = readCsvHalLazyBS . BL.fromStrict
...@@ -274,13 +294,13 @@ readCsvHalBSStrict = readCsvHalLazyBS . BL.fromStrict ...@@ -274,13 +294,13 @@ readCsvHalBSStrict = readCsvHalLazyBS . BL.fromStrict
------------------------------------------------------------------------ ------------------------------------------------------------------------
writeFile :: FilePath -> (Header, Vector CsvDoc) -> IO () writeFile :: FilePath -> (Header, Vector CsvDoc) -> IO ()
writeFile fp (h, vs) = BL.writeFile fp $ writeFile fp (h, vs) = BL.writeFile fp $
encodeByNameWith csvEncodeOptions h (V.toList vs) encodeByNameWith (csvEncodeOptions Tab) h (V.toList vs)
writeDocs2Csv :: FilePath -> [HyperdataDocument] -> IO () writeDocs2Csv :: FilePath -> [HyperdataDocument] -> IO ()
writeDocs2Csv fp hs = BL.writeFile fp $ hyperdataDocument2csv hs writeDocs2Csv fp hs = BL.writeFile fp $ hyperdataDocument2csv hs
hyperdataDocument2csv :: [HyperdataDocument] -> BL.ByteString hyperdataDocument2csv :: [HyperdataDocument] -> BL.ByteString
hyperdataDocument2csv hs = encodeByNameWith csvEncodeOptions headerCsvGargV3 (map hyperdataDocument2csvDoc hs) hyperdataDocument2csv hs = encodeByNameWith (csvEncodeOptions Tab) headerCsvGargV3 (map hyperdataDocument2csvDoc hs)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Hal Format -- Hal Format
...@@ -425,13 +445,22 @@ parseHal' :: BL.ByteString -> Either Prelude.String [HyperdataDocument] ...@@ -425,13 +445,22 @@ parseHal' :: BL.ByteString -> Either Prelude.String [HyperdataDocument]
parseHal' bs = (V.toList . V.map csvHal2doc . snd) <$> readCsvHalLazyBS bs parseHal' bs = (V.toList . V.map csvHal2doc . snd) <$> readCsvHalLazyBS bs
------------------------------------------------------------------------ ------------------------------------------------------------------------
parseCsv :: FilePath -> IO (Either Prelude.String [HyperdataDocument]) parseCsv :: FilePath -> IO (Either Prelude.String [HyperdataDocument])
parseCsv fp = do parseCsv fp = fmap (V.toList . V.map csv2doc . snd) <$> readFile fp
r <- readFile fp
pure $ (V.toList . V.map csv2doc . snd) <$> r {-
parseCsv' :: BL.ByteString -> Either Prelude.String [HyperdataDocument]
parseCsv' bs = (V.toList . V.map csv2doc . snd) <$> readCsvLazyBS Comma bs
-}
parseCsv' :: BL.ByteString -> Either Prelude.String [HyperdataDocument] parseCsv' :: BL.ByteString -> Either Prelude.String [HyperdataDocument]
parseCsv' bs = (V.toList . V.map csv2doc . snd) <$> readCsvLazyBS bs parseCsv' bs = do
let
result = case readCsvLazyBS Comma bs of
Left _err -> readCsvLazyBS Tab bs
Right res -> Right res
(V.toList . V.map csv2doc . snd) <$> result
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Csv v3 weighted for phylo -- Csv v3 weighted for phylo
...@@ -462,7 +491,7 @@ instance FromNamedRecord Csv' where ...@@ -462,7 +491,7 @@ instance FromNamedRecord Csv' where
readWeightedCsv :: FilePath -> IO (Header, Vector Csv') readWeightedCsv :: FilePath -> IO (Header, Vector Csv')
readWeightedCsv fp = readWeightedCsv fp =
fmap (\bs -> fmap (\bs ->
case decodeByNameWith csvDecodeOptions bs of case decodeByNameWith (csvDecodeOptions Tab) bs of
Left e -> panic (pack e) Left e -> panic (pack e)
Right corpus -> corpus Right corpus -> corpus
) $ BL.readFile fp ) $ BL.readFile fp
...@@ -18,13 +18,15 @@ module Gargantext.Core.Viz.Graph.Tools ...@@ -18,13 +18,15 @@ module Gargantext.Core.Viz.Graph.Tools
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Map (Map) import Data.Map (Map)
import Data.Text (Text) import Data.Text (Text)
import Debug.Trace (trace) -- import Debug.Trace (trace)
import GHC.Float (sin, cos) import GHC.Float (sin, cos)
import Gargantext.API.Ngrams.Types (NgramsTerm(..)) import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Methods.Distances.Conditional (conditional)
import Gargantext.Core.Methods.Distances (Distance(..), measure) import Gargantext.Core.Methods.Distances (Distance(..), measure)
import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence) import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
import Gargantext.Core.Statistics import Gargantext.Core.Statistics
import Gargantext.Core.Viz.Graph import Gargantext.Core.Viz.Graph
import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..)) import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..)) import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass) import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
...@@ -49,6 +51,7 @@ defaultClustering x = spinglass 1 x ...@@ -49,6 +51,7 @@ defaultClustering x = spinglass 1 x
------------------------------------------------------------- -------------------------------------------------------------
type Threshold = Double type Threshold = Double
cooc2graph' :: Ord t => Distance cooc2graph' :: Ord t => Distance
-> Double -> Double
-> Map (t, t) Int -> Map (t, t) Int
...@@ -96,7 +99,6 @@ cooc2graphWith' doPartitions distance threshold myCooc = do ...@@ -96,7 +99,6 @@ cooc2graphWith' doPartitions distance threshold myCooc = do
where where
(as, bs) = List.unzip $ Map.keys distanceMap (as, bs) = List.unzip $ Map.keys distanceMap
n' = Set.size $ Set.fromList $ as <> bs n' = Set.size $ Set.fromList $ as <> bs
ClustersParams rivers _level = clustersParams nodesApprox
{- -- Debug {- -- Debug
saveAsFileDebug "debug/distanceMap" distanceMap saveAsFileDebug "debug/distanceMap" distanceMap
...@@ -108,9 +110,7 @@ cooc2graphWith' doPartitions distance threshold myCooc = do ...@@ -108,9 +110,7 @@ cooc2graphWith' doPartitions distance threshold myCooc = do
else panic "Text.Flow: DistanceMap is empty" else panic "Text.Flow: DistanceMap is empty"
let let
-- bridgeness' = distanceMap bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
bridgeness' = trace ("Rivers: " <> show rivers)
$ bridgeness rivers partitions distanceMap
confluence' = confluence (Map.keys bridgeness') 3 True False confluence' = confluence (Map.keys bridgeness') 3 True False
...@@ -118,7 +118,6 @@ cooc2graphWith' doPartitions distance threshold myCooc = do ...@@ -118,7 +118,6 @@ cooc2graphWith' doPartitions distance threshold myCooc = do
diag bridgeness' confluence' partitions diag bridgeness' confluence' partitions
doDistanceMap :: Distance doDistanceMap :: Distance
-> Threshold -> Threshold
-> HashMap (NgramsTerm, NgramsTerm) Int -> HashMap (NgramsTerm, NgramsTerm) Int
...@@ -126,7 +125,7 @@ doDistanceMap :: Distance ...@@ -126,7 +125,7 @@ doDistanceMap :: Distance
, Map (Index, Index) Int , Map (Index, Index) Int
, Map NgramsTerm Index , Map NgramsTerm Index
) )
doDistanceMap distance threshold myCooc = (distanceMap, toIndex ti diag, ti) doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
where where
-- TODO remove below -- TODO remove below
(diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y) (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
...@@ -136,43 +135,45 @@ doDistanceMap distance threshold myCooc = (distanceMap, toIndex ti diag, ti) ...@@ -136,43 +135,45 @@ doDistanceMap distance threshold myCooc = (distanceMap, toIndex ti diag, ti)
(ti, _it) = createIndices theMatrix (ti, _it) = createIndices theMatrix
tiSize = Map.size ti tiSize = Map.size ti
{-
matCooc = case distance of -- Shape of the Matrix matCooc = case distance of -- Shape of the Matrix
Conditional -> map2mat Triangle 0 tiSize Conditional -> map2mat Triangle 0 tiSize
Distributional -> map2mat Square 0 tiSize Distributional -> map2mat Square 0 tiSize
{-$ case distance of -- Removing the Diagonal ?
Conditional -> Map.filterWithKey (\(a,b) _ -> a /= b)
Distributional -> Map.filterWithKey (\(a,b) _ -> a /= b)
-}
$ toIndex ti theMatrix $ toIndex ti theMatrix
similarities = measure distance matCooc similarities = measure distance matCooc
-}
similarities = measure Distributional
$ map2mat Square 0 tiSize
$ toIndex ti theMatrix
links = round (let n :: Double = fromIntegral tiSize in n * log n) links = round (let n :: Double = fromIntegral tiSize in n * log n)
distanceMap = Map.fromList distanceMap = Map.fromList
$ List.take links $ List.take links
$ List.sortOn snd $ List.sortOn snd
$ Map.toList $ Map.toList
$ edgesFilter
$ Map.filter (> threshold) $ Map.filter (> threshold)
$ mat2map similarities $ mat2map similarities
doDistanceMap Conditional _threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
where
myCooc' = Map.fromList $ HashMap.toList myCooc
(ti, _it) = createIndices myCooc'
-- tiSize = Map.size ti
-- links = round (let n :: Double = fromIntegral tiSize in n * log n)
distanceMap = toIndex ti
$ Map.fromList
-- $ List.take links
-- $ List.sortOn snd
$ HashMap.toList
-- $ HashMap.filter (> threshold)
$ conditional myCooc
------------------------------------------------------------------------
------------------------------------------------------------------------
data ClustersParams = ClustersParams { bridgness :: Double
, louvain :: Text
} deriving (Show)
clustersParams :: Int -> ClustersParams
clustersParams x = ClustersParams (fromIntegral x) "0.00000001" -- y
{- where
y | x < 100 = "0.000001"
| x < 350 = "0.000001"
| x < 500 = "0.000001"
| x < 1000 = "0.000001"
| otherwise = "1"
-}
---------------------------------------------------------- ----------------------------------------------------------
-- | From data to Graph -- | From data to Graph
...@@ -188,7 +189,8 @@ data2graph :: ToComId a ...@@ -188,7 +189,8 @@ data2graph :: ToComId a
-> Graph -> Graph
data2graph labels occurences bridge conf partitions = Graph { _graph_nodes = nodes data2graph labels occurences bridge conf partitions = Graph { _graph_nodes = nodes
, _graph_edges = edges , _graph_edges = edges
, _graph_metadata = Nothing } , _graph_metadata = Nothing
}
where where
community_id_by_node_id = Map.fromList community_id_by_node_id = Map.fromList
...@@ -215,14 +217,15 @@ data2graph labels occurences bridge conf partitions = Graph { _graph_nodes = nod ...@@ -215,14 +217,15 @@ data2graph labels occurences bridge conf partitions = Graph { _graph_nodes = nod
edges = [ Edge { edge_source = cs (show s) edges = [ Edge { edge_source = cs (show s)
, edge_target = cs (show t) , edge_target = cs (show t)
, edge_weight = d , edge_weight = weight
, edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
-- , edge_confluence = maybe (panic "E: data2graph edges") identity $ Map.lookup (s,t) conf -- , edge_confluence = maybe (panic "E: data2graph edges") identity $ Map.lookup (s,t) conf
, edge_id = cs (show i) , edge_id = cs (show i)
} }
| (i, ((s,t), d)) <- zip ([0..]::[Integer] ) | (i, ((s,t), weight)) <- zip ([0..]::[Integer] )
(Map.toList bridge) (Map.toList bridge)
, s /= t, d > 0 , s /= t
, weight > 0
] ]
......
...@@ -17,14 +17,16 @@ These functions are used for Vector.Matrix only. ...@@ -17,14 +17,16 @@ These functions are used for Vector.Matrix only.
module Gargantext.Core.Viz.Graph.Utils module Gargantext.Core.Viz.Graph.Utils
where where
import Data.Map (Map)
import Data.Matrix hiding (identity) import Data.Matrix hiding (identity)
import Data.Vector (Vector) import Data.Vector (Vector)
import qualified Data.Vector as V
import qualified Data.List as L import qualified Data.List as L
import qualified Data.Map as Map
import Gargantext.Prelude import Gargantext.Prelude
import Data.List (unzip)
import qualified Data.Vector as V
import Data.Maybe (catMaybes)
import qualified Data.Set as Set
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Some utils to build the matrix from cooccurrence results -- | Some utils to build the matrix from cooccurrence results
...@@ -63,8 +65,35 @@ toListsWithIndex m = concat' $ zip [1..] $ map (\c -> zip [1..] c) $ toLists m ...@@ -63,8 +65,35 @@ toListsWithIndex m = concat' $ zip [1..] $ map (\c -> zip [1..] c) $ toLists m
concat' :: [(Int, [(Int, a)])] -> [((Int, Int), a)] concat' :: [(Int, [(Int, a)])] -> [((Int, Int), a)]
concat' xs = L.concat $ map (\(x, ys) -> map (\(y, a) -> ((x,y), a)) ys ) xs concat' xs = L.concat $ map (\(x, ys) -> map (\(y, a) -> ((x,y), a)) ys ) xs
------------------------------------------------------------------------
-- Utils to manage Graphs
edgesFilter :: (Ord a, Ord b) => Map (a,a) b -> Map (a,a) b
edgesFilter m = Map.fromList $ catMaybes results
where
results = [ let
ij = Map.lookup (i,j) m
ji = Map.lookup (j,i) m
in getMax (i,j) ij ji
| i <- keys
, j <- keys
, i < j
]
keys = Set.toList $ Set.fromList (x <> y)
(x,y) = unzip $ Map.keys m
getMax :: Ord b
=> (a,a)
-> Maybe b
-> Maybe b
-> Maybe ((a,a), b)
getMax (i,j) (Just d) Nothing = Just ((i,j), d)
getMax (i,j) Nothing (Just d) = Just ((j,i), d)
getMax ij (Just di) (Just dj) = if di >= dj then getMax ij (Just di) Nothing
else getMax ij Nothing (Just dj)
getMax _ _ _ = Nothing
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment