Commit f5f0ae1b authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Merge branch 'glocqueville/update-igraph' into 'dev'

Update IGraph

See merge request !411
parents 45371e41 93d91e0c
Pipeline #7621 passed with stages
in 46 minutes and 1 second
......@@ -16,8 +16,8 @@ fi
# with the `sha256sum` result calculated on the `cabal.project` and
# `cabal.project.freeze`. This ensures the files stay deterministic so that CI
# cache can kick in.
expected_cabal_project_hash="963418e37a17d4bb67d4b885613144b36d290f612eea80355e82abc7e76b450c"
expected_cabal_project_freeze_hash="cd52143d3a9d285360b59c6371d3e258552c1bc115bd612024db3de1f7593ff7"
expected_cabal_project_hash="7d021a8e3d0b68421e26bdfe4e1da82f6ea26b6c420fc984b3c30c14bc5fea98"
expected_cabal_project_freeze_hash="553b98aadb35506a305bd740cdd71f5fadc1e6d55d10f91cf39daa6735a63d78"
cabal --store-dir=$STORE_DIR v2-build --dry-run
......
......@@ -87,7 +87,7 @@ source-repository-package
source-repository-package
type: git
location: https://gitlab.iscpif.fr/gargantext/gargantext-graph.git
tag: a08ceed71b297a811f90cb86c3c61dc0b153036b
tag: 316d48b6a89593faaf1f2102e9714cea7e416e56
subdir: gargantext-graph-core
-- Support for GHC 9.6.x
......@@ -99,7 +99,7 @@ source-repository-package
source-repository-package
type: git
location: https://gitlab.iscpif.fr/gargantext/haskell-igraph.git
tag: 9f8a2f4a014539826a4eab3215cc70c0813f20cb
tag: 05e62da3aa466b7d0608d4918b030dc024119b32
source-repository-package
type: git
......
......@@ -25,8 +25,6 @@ constraints: any.Boolean ==0.2.4,
any.StateVar ==1.2.2,
any.accelerate ==1.3.0.0,
accelerate +bounds-checks -debug -internal-checks -nofib -unsafe-checks,
any.accelerate-arithmetic ==1.0.0.1,
any.accelerate-utility ==1.0.0.1,
any.adjunctions ==4.4.2,
any.aeson ==2.1.2.1,
aeson -cffi +ordered-keymap,
......@@ -79,6 +77,8 @@ constraints: any.Boolean ==0.2.4,
any.blaze-builder ==0.4.2.3,
any.blaze-html ==0.9.2.0,
any.blaze-markup ==0.8.3.0,
any.blaze-textual ==0.2.3.1,
blaze-textual -developer -integer-simple +native,
any.boolexpr ==0.3,
any.boring ==0.2.2,
boring +tagged,
......@@ -103,6 +103,7 @@ constraints: any.Boolean ==0.2.4,
cassava-conduit +small_base,
any.cborg ==0.2.10.0,
cborg +optimize-gmp,
any.cborg-json ==0.2.6.0,
any.cereal ==0.5.8.3,
cereal -bytestring-builder,
any.charset ==0.3.11,
......@@ -161,6 +162,8 @@ constraints: any.Boolean ==0.2.4,
any.dense-linear-algebra ==0.1.0.0,
any.digest ==0.0.2.1,
digest -have_arm64_crc32c -have_builtin_prefetch -have_mm_prefetch -have_sse42 -have_strong_getauxval -have_weak_getauxval +pkg-config,
any.direct-sqlite ==2.3.29,
direct-sqlite +dbstat +fulltextsearch +haveusleep +json1 -mathfunctions -systemlib +urifilenames,
any.directory ==1.3.8.5,
any.discrimination ==0.5,
any.distributive ==0.6.2.1,
......@@ -170,7 +173,6 @@ constraints: any.Boolean ==0.2.4,
any.double-conversion ==2.0.5.0,
double-conversion -developer +embedded_double_conversion,
any.easy-file ==0.2.5,
any.eigen ==3.3.7.0,
any.either ==5.0.2,
any.ekg-core ==0.1.1.8,
any.ekg-json ==0.1.1.1,
......@@ -223,7 +225,7 @@ constraints: any.Boolean ==0.2.4,
any.haskell-bee ==0.1.0.0,
any.haskell-bee-pgmq ==0.1.0.0,
any.haskell-bee-tests ==0.1.0.0,
any.haskell-igraph ==0.10.4,
any.haskell-igraph ==0.10.4.1,
any.haskell-lexer ==1.1.2,
any.haskell-pgmq ==0.1.0.0,
any.haskell-src-exts ==1.23.1,
......@@ -300,7 +302,6 @@ constraints: any.Boolean ==0.2.4,
any.linear ==1.23,
linear -herbie +template-haskell,
any.list-t ==1.0.5.7,
any.lockfree-queue ==0.2.4,
any.logict ==0.8.2.0,
any.loop ==0.3.0,
any.lzma ==0.0.1.1,
......@@ -460,6 +461,7 @@ constraints: any.Boolean ==0.2.4,
any.servant-blaze ==0.9.1,
any.servant-client ==0.20.2,
any.servant-client-core ==0.20.2,
any.servant-conduit ==0.16.1,
any.servant-ekg ==0.3.1,
any.servant-openapi3 ==2.0.1.6,
any.servant-routes ==0.1.0.0,
......@@ -487,6 +489,7 @@ constraints: any.Boolean ==0.2.4,
any.split ==0.2.5,
any.splitmix ==0.1.1,
splitmix -optimised-mixer,
any.sqlite-simple ==0.4.19.0,
any.statistics ==0.16.3.0,
statistics -benchpapi,
any.stemmer ==0.5.2,
......@@ -571,7 +574,6 @@ constraints: any.Boolean ==0.2.4,
any.uri-encode ==1.5.0.7,
uri-encode +network-uri -tools,
any.utf8-string ==1.0.2,
any.utility-ht ==0.0.17.2,
any.uuid ==1.3.16,
any.uuid-types ==1.0.6,
any.validity ==0.12.1.0,
......
......@@ -552,7 +552,7 @@ library
, graphviz ^>= 2999.20.1.0
, haskell-bee
, haskell-bee-pgmq
, haskell-igraph ^>= 0.10.4
, haskell-igraph ^>= 0.10.4.1
, haskell-pgmq >= 0.1.0.0 && < 0.2
, haskell-throttle
, hlcm ^>= 0.2.2
......@@ -627,7 +627,6 @@ library
, servant-websockets >= 2.0.0 && < 2.1
, servant-xml-conduit ^>= 0.1.0.4
, shelly >= 1.9 && < 2
, singletons ^>= 3.0.2
, singletons-th >= 3.1 && < 3.3
, smtp-mail >= 0.3.0.0
, split >= 0.2.3.4
......
......@@ -31,7 +31,7 @@ import Data.Text (pack, unpack, toLower)
import Data.Tuple.Extra (both)
import GHC.Generics
import Gargantext.Core (Lang(..), allLangs)
import Gargantext.Core.Text.Metrics.Count (occurrencesWith)
import Gargantext.Core.Text.Metrics.Count (countOccurrences)
import Gargantext.Core.Text.Samples.DE qualified as DE
import Gargantext.Core.Text.Samples.EN qualified as EN
import Gargantext.Core.Text.Samples.ES qualified as ES
......@@ -197,7 +197,7 @@ wordToBook ns n txt = EventBook ef en
where
chks = allChunks ns n txt
en = DM.fromList $ map (\(n',ns') -> (n', length ns')) $ zip ns chks
ef = foldl' DM.union DM.empty $ map (occurrencesWith identity) chks
ef = foldl' DM.union DM.empty $ map countOccurrences chks
op :: (Freq -> Freq -> Freq) -> EventBook -> EventBook -> EventBook
op f (EventBook ef1 en1)
......
......@@ -133,14 +133,24 @@ coocOnSingleContext fun ts = xs
occurrences :: [Terms] -> Map Grouped (Map Terms Int)
occurrences = occurrencesOn _terms_stem
-- | Constructs the occurence map corresponding to a given collection:
-- the value at key `key` is the number of times `key` appears in the collection
-- Note: Compared to `occurences`, this is the more elementary function, maybe
-- it would make more sense to rename this one into `occurences` and the other
-- into something more descriptive
countOccurrences :: (Foldable f, Ord a)
=> f a -- ^ The collection whose items will be counted
-> Map a Int -- ^ A map whose keys are items of the input
-- collection, and whose values are the number of
-- times those items appear in the input collection
countOccurrences collection =
foldl' (\occurenceMap item -> insertWith (+) item 1 occurenceMap)
empty
collection
occurrencesOn :: (Ord a, Ord b) => (a -> b) -> [a] -> Map b (Map a Int)
occurrencesOn f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a 1) m) empty
occurrencesWith :: (Foldable list, Ord k, Num a, Show k, Show a, Show (list b)) => (b -> k) -> list b -> Map k a
occurrencesWith f xs = trace (show (xs,m) :: Text) m
where
m = foldl' (\x y -> insertWith (+) (f y) 1 x) empty xs
-- TODO add groups and filter stops
sumOcc :: Ord a => [Occ a] -> Occ a
......
......@@ -21,7 +21,7 @@ import Gargantext.API.Ngrams.NgramsTree ( toTree, NgramsTree )
import Gargantext.API.Ngrams.Tools ( filterListWithRoot, getListNgrams, getRepo, mapTermListRoot )
import Gargantext.API.Ngrams.Types ( NgramsTerm(NgramsTerm) )
import Gargantext.Core.NodeStory.Types ( NodeStoryEnv )
import Gargantext.Core.Text.Metrics.Count (occurrencesWith)
import Gargantext.Core.Text.Metrics.Count (countOccurrences)
import Gargantext.Core.Text.Ngrams (NgramsType)
import Gargantext.Core.Types.Main ( ListType )
import Gargantext.Database.Admin.Types.Node ( NodeType(NodeList), CorpusId, contextId2NodeId )
......@@ -43,7 +43,7 @@ histoData cId = do
$ V.fromList
$ sortOn fst -- TODO Vector.sortOn
$ toList
$ occurrencesWith identity dates
$ countOccurrences dates
pure (Histo ls css)
......
......@@ -31,9 +31,9 @@ import Gargantext.Core.Statistics ( pcaReduceTo, Dimension(Dimension) )
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, nodeId2comId, setNodes2clusterNodes)
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass')
import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
import Gargantext.Core.Viz.Graph.Types (Attributes(..), BridgenessMethod, Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..), LegendField(..))
import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
import Gargantext.Core.Viz.Graph.Utils (edgesFilter, filterNodesByCount)
import Gargantext.Prelude
import Graph.BAC.ProxemyOptim qualified as BAC
import Graph.Types (ClusterNode(..))
......@@ -42,11 +42,6 @@ import IGraph.Algorithms.Layout qualified as Layout
import IGraph.Random ( Gen ) -- (Gen(..))
-------------------------------------------------------------
defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
-- defaultClustering x = pure $ BAC.defaultClustering x
defaultClustering x = spinglass 1 x
-------------------------------------------------------------
type Threshold = Double
......@@ -82,7 +77,7 @@ cooc2graphWith bridgenessMethod multi similarity threshold strength myCooc = do
distanceMap `seq` diag `seq` ti `seq` pure ()
partitions <- if Map.size distanceMap > 0
then spinglass' 1 distanceMap
then spinglass 1 distanceMap
else panic $ Text.unwords [ "I can not compute the graph you request"
, "because either the quantity of documents"
, "or the quantity of terms"
......@@ -102,84 +97,53 @@ cooc2graphWith bridgenessMethod multi similarity threshold strength myCooc = do
pure $ data2graph multi ti diag bridgeness' confluence' (setNodes2clusterNodes partitions)
-- | A converter from the partition type returned by `spinglass'`
-- to the partition type required by `bridgeness`
-- | Given a list of sets, each identifying the nodes in a cluster, returns
-- a list of 'ClusterNode' where each node has been uniquely labeled
-- by its community ID. This allows flattening the input sets without conflicts
-- on nodes with the same ID (as they would belong to a different community).
partitionsToClusterNodes :: [Set Int] -> [ClusterNode]
partitionsToClusterNodes setlist =
setlist &
-- Convert sets to lists:
fmap toList &
-- Assign an integer index to each cluster:
zip [1 ..] &
-- Attach cluster IDs to individual nodes instead to whole clusters
fmap (\(id, clusterIds) -> zip (repeat id) clusterIds) &
-- Flatten list of clusters of nodes labeled by cluster indices
-- into a list of labeled nodes:
join &
-- Turn pairs into `ClusterNode`s
fmap (\(clusterId, nodeId) -> ClusterNode nodeId clusterId)
type Reverse = Bool
partitionsToClusterNodes setlist = setlist
& fmap toList -- Convert sets to lists
& zip [1 ..] -- Assign an integer index to each cluster
& fmap (\(id, clusterIds) -> zip (repeat id) clusterIds) -- Attach cluster IDs to individual nodes rather than whole clusters
& join -- Flatten list of clusters of nodes labeled by cluster indices into a list of labeled nodes
& fmap (\(clusterId, nodeId) -> ClusterNode nodeId clusterId) -- Turn pairs into `ClusterNode`s
doSimilarityMap :: Similarity
-> Threshold
-> Strength
-> HashMap (NgramsTerm, NgramsTerm) Int
-> ( Map (Int,Int) Double
, Map (Index, Index) Int
, Map NgramsTerm Index
)
doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
where
myCooc' = Map.fromList $ HashMap.toList myCooc
(_diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
$ Map.fromList
$ HashMap.toList myCooc
(ti, _it) = createIndices theMatrix
tiSize = Map.size ti
similarities = (\m -> m `seq` m)
$ (\m -> m `seq` measure Conditional m)
$ (\m -> m `seq` map2mat Square 0 tiSize m)
$ theMatrix `seq` toIndex ti theMatrix
links = round (let n :: Double = fromIntegral (Map.size ti) in 10 * n * (log n)^(2::Int))
distanceMap = Map.fromList
$ List.take links
$ (if strength == Weak then List.reverse else identity)
$ List.sortOn snd
$ Map.toList
$ Map.filter (> threshold)
$ similarities `seq` mat2map similarities
doSimilarityMap distriType threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
-> Threshold
-> Strength
-> HashMap (NgramsTerm, NgramsTerm) Int -- cooccurrence map
-> ( Map (Int,Int) Double -- weight map
, Map (Index, Index) Int -- cooccurrence map
, Map NgramsTerm Index -- ???
)
doSimilarityMap similarityType threshold strength coocMap =
(weightMap, toIndex ti coocMap', ti)
where
-- TODO remove below
(diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
coocMap' = case similarityType of
Conditional -> Map.fromList $ HashMap.toList coocMap
Distributional -> diag
(diag, theMatrix) = Map.partitionWithKey (\(x, y) _ -> x == y)
$ Map.fromList
$ HashMap.toList myCooc
$ HashMap.toList coocMap
(ti, _it) = createIndices theMatrix
tiSize = Map.size ti
similarities = (\m -> m `seq` m)
$ (\m -> m `seq` measure distriType m)
similarities = (\m -> m `seq` measure similarityType m)
$ (\m -> m `seq` map2mat Square 0 tiSize m)
$ theMatrix `seq` toIndex ti theMatrix
links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
distanceMap = Map.fromList
$ List.take links
$ (if strength == Weak then List.reverse else identity)
$ List.sortOn snd
$ Map.toList
$ edgesFilter
$ (\m -> m `seq` Map.filter (> threshold) m)
$ similarities `seq` mat2map similarities
tiSize = Map.size ti
links = let n = fromIntegral $ Map.size ti :: Double
factor = if similarityType == Conditional then 10 else 1
in round $ factor * n * log n^(2::Int)
filterMap = case similarityType of
Conditional -> Map.filter (> threshold)
Distributional -> edgesFilter . (\m -> m `seq` Map.filter (> threshold) m)
weightMap = Map.fromList
$ List.take links
$ (if strength == Weak then List.reverse else identity)
$ List.sortOn snd
$ Map.toList
$ filterMap
$ similarities `seq` mat2map similarities
----------------------------------------------------------
-- | From data to Graph
......@@ -223,8 +187,9 @@ data2graph multi labels' occurences bridge conf partitions =
| (label, n) <- labels
, Set.member n toKeep
]
(bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
-- Remove vertices not connected to any other node, i.e. vertices that have
-- zero edge joining them to other vertices
(bridge', toKeep) = filterNodesByCount (> 0) bridge
edges = [ Edge { edge_source = show s
, edge_hidden = Nothing
......@@ -299,7 +264,7 @@ getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") ident
-- | KamadaKawai Layout
-- TODO TEST: check labels, nodeId and coordinates
layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
layout :: Map (Int, Int) Double -> Int -> Gen s -> (Double, Double)
layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
where
coord :: (Map Int (Double,Double))
......
......@@ -13,117 +13,72 @@ Reference:
-}
module Gargantext.Core.Viz.Graph.Tools.IGraph
where
( spinglass
, mkGraphUfromEdges
) where
import Data.Serialize
import Data.Singletons (SingI)
import Gargantext.Core.Viz.Graph.Index
import Graph.Types (ClusterNode(..))
import Data.Serialize (Serialize)
import IGraph hiding (mkGraph, neighbors, edges, nodes, Node, Graph)
import Protolude
import Gargantext.Prelude (saveAsFileDebug)
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import qualified IGraph as IG
import qualified IGraph.Algorithms.Clique as IG
import qualified IGraph.Algorithms.Community as IG
import qualified IGraph.Algorithms.Structure as IG
import qualified IGraph.Random as IG
import qualified Data.Set as Set
------------------------------------------------------------------
-- | Main Types
type Graph_Undirected = IG.Graph 'U () ()
type Graph_Directed = IG.Graph 'D () ()
type Node = IG.Node
type Graph = IG.Graph
------------------------------------------------------------------
-- | Main Graph management Functions
neighbors :: IG.Graph d v e -> IG.Node -> [IG.Node]
neighbors = IG.neighbors
edges :: IG.Graph d v e -> [Edge]
edges = IG.edges
nodes :: IG.Graph d v e -> [IG.Node]
nodes = IG.nodes
------------------------------------------------------------------
-- | Partitions
maximalCliques :: IG.Graph d v e -> [[Int]]
maximalCliques g = IG.maximalCliques g (min',max')
where
min' = 0
max' = 0
------------------------------------------------------------------
type Seed = Int
spinglass :: Seed -> Map (Int, Int) Double -> IO [ClusterNode]
spinglass s g = toClusterNode
<$> map catMaybes
<$> map (map (\n -> Map.lookup n fromI))
<$> List.concat
<$> mapM (partitions_spinglass' s) g'
where
-- Not connected components of the graph make crash spinglass
g' = IG.decompose $ mkGraphUfromEdges
$ Map.keys
$ toIndex toI g
(toI, fromI) = createIndices g
spinglass' :: Seed -> Map (Int, Int) Double -> IO [Set Int]
spinglass' s g = map Set.fromList
<$> map catMaybes
<$> map (map (\n -> Map.lookup n fromI))
<$> List.concat
<$> mapM (partitions_spinglass' s) g'
where
-- Not connected components of the graph make crash spinglass
g' = IG.decompose $ mkGraphUfromEdges
$ Map.keys
$ toIndex toI g
(toI, fromI) = createIndices g
-- | Tools to analyze graphs
partitions_spinglass' :: (Serialize v, Serialize e)
=> Seed -> IG.Graph 'U v e -> IO [[Int]]
partitions_spinglass' s g = do
gen <- IG.withSeed s pure
res <- IG.findCommunity g Nothing Nothing IG.spinglass gen
-- res <- IG.findCommunity g Nothing Nothing IG.leiden gen
-- res <- IG.findCommunity g Nothing Nothing IG.infomap gen
saveAsFileDebug "/tmp/res" res
pure res
toClusterNode :: [[Int]] -> [ClusterNode]
toClusterNode ns = List.concat
$ map (\(cId, ns') -> map (\n -> ClusterNode n cId) ns')
$ List.zip [1..] ns
------------------------------------------------------------------
mkGraph :: (SingI d, Ord v,
Serialize v, Serialize e) =>
[v] -> [LEdge e] -> IG.Graph d v e
mkGraph = IG.mkGraph
------------------------------------------------------------------
mkGraphUfromEdges :: [(Int, Int)] -> Graph_Undirected
mkGraphUfromEdges es = mkGraph (List.replicate n ()) $ zip es $ repeat ()
-- | Cluster a graph using the Spinglass algorithm
-- Warning: Currently, this does not take the weights into account, all vertices
-- and edges are treated equally.
-- TODO Take the weights into account
spinglass :: Int -- ^ Random seed
-> Map (Int, Int) Double -- ^ Weight map of the graph
-> IO [Set Int] -- ^ A list of clusters, in the form of sets of vertex IDs
spinglass seed graph = graph
-- Non-connected graphs make Spinglass crash, so we
-- decompose the graph into connected components perform
-- the algorithm on each component, and then put the
-- clusterings together.
& Map.keys -- get all edges in the form of pairs of vertex IDs
& edgeList2UGraph -- turn that into an IGraph graph
& IG.decompose -- split the graph into connected components
& mapM (spinglassAux seed) -- perform Spinglass on each subgraph
<&> List.concat -- put all clusterings together
<&> map Set.fromList -- convert clusters from list to set
-- | Helper function for `spinglass`. Same as `spinglass`, except the input and
-- output are represented using different types
spinglassAux :: (Serialize v, Serialize e, Show v)
=> Int -- ^ Random seed
-> IG.Graph 'U v e -- ^ Input graph
-> IO [[v]] -- ^ List of clusters, in the form of lists of vertex labels
spinglassAux seed graph = IG.withSeed seed $ \gen -> do -- initialize random generator
rawClusters <- IG.findCommunity graph Nothing Nothing IG.spinglass gen -- perform clustering
-- The clusters we get are composed of vertex IDs corresponding to the internal
-- representation of IGraph graphs, so we need to retrieve the vertex labels:
let clusterLabels = (fmap . fmap) (IG.nodeLab graph) rawClusters
pure clusterLabels -- return the result
-- | Make an undirected IGraph graph from a list of edges between `Int`s.
-- The output graph's vertices are labeled with the original `Int`s, and the
-- edges are not labeled.
edgeList2UGraph :: [(Int, Int)] -> IG.Graph 'U Int ()
edgeList2UGraph edgeList =
-- We're not using `IG.mkGraph` because of the issue raised in the following ticket:
-- https://gitlab.iscpif.fr/gargantext/haskell-igraph/issues/4
IG.fromLabeledEdges $ fmap (\edge -> (edge, ())) $ edgeList
-- | Make an "anonymous" (i.e. without labels) graph out of a list of edges.
-- Warning: there is no guarantee, as far as I know, that the underlying
-- representation of the nodes corresponds to the original `Int`.
mkGraphUfromEdges :: [(Int, Int)] -> IG.Graph 'U () ()
mkGraphUfromEdges es = IG.mkGraph (List.replicate n ()) $ fmap makeLEdge es
where
(a,b) = List.unzip es
n = List.length (List.nub $ a <> b)
{-
mkGraphDfromEdges :: [(Int, Int)] -> Graph_Directed
mkGraphDfromEdges = undefined
-}
makeLEdge e = (e, ())
n = Set.size nodes
nodes = Set.fromList $ map fst es
......@@ -23,7 +23,7 @@ import Data.Matrix hiding (identity)
import Data.Set qualified as Set
import Data.Vector (Vector)
import Data.Vector qualified as Vector
import Gargantext.Core.Text.Metrics.Count (occurrencesWith)
import Gargantext.Core.Text.Metrics.Count (countOccurrences)
import Gargantext.Prelude
------------------------------------------------------------------------
......@@ -81,14 +81,23 @@ edgesFilter m = Map.fromList $ catMaybes results
keys = Set.toList $ Set.fromList (x <> y)
(x,y) = unzip $ Map.keys m
nodesFilter :: (Show a, Show b, Ord a, Ord b, Num b) => (b -> Bool) -> Map (a,a) b -> (Map (a,a) b, Set a)
nodesFilter f m = (m', toKeep)
-- | Filter nodes depending on how many times they (co)occur.
-- More specifically, for a given value `x :: a`, this sums all entries in the
-- map that have `x` as a value, and then it filters by applying `f` to the sum.
-- Warning: This counts the value at `(x, x)` twice.
filterNodesByCount :: (Show node, Ord node)
=> (Int -> Bool) -- ^ Filtering function
-> Map (node, node) b -- ^ Input map
-> (Map (node, node) b, Set node)
-- ^ The map without the filtered out nodes, and the set of
-- remaining nodes
filterNodesByCount f m = (m', toKeep)
where
m' = Map.filterWithKey (\(a,b) _ -> Set.member a toKeep && Set.member b toKeep) m
toKeep = Set.fromList
$ Map.keys
$ Map.filter f
$ occurrencesWith identity
$ countOccurrences
$ tupleConcat
$ List.unzip
$ Map.keys m
......
......@@ -170,7 +170,7 @@
git: "https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git"
subdirs:
- .
- commit: a08ceed71b297a811f90cb86c3c61dc0b153036b
- commit: 316d48b6a89593faaf1f2102e9714cea7e416e56
git: "https://gitlab.iscpif.fr/gargantext/gargantext-graph.git"
subdirs:
- "gargantext-graph-core"
......@@ -190,7 +190,7 @@
git: "https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude"
subdirs:
- .
- commit: 9f8a2f4a014539826a4eab3215cc70c0813f20cb
- commit: 05e62da3aa466b7d0608d4918b030dc024119b32
git: "https://gitlab.iscpif.fr/gargantext/haskell-igraph.git"
subdirs:
- .
......
This diff is collapsed.
......@@ -15,14 +15,61 @@ commentary with @some markup@.
module Test.Graph.Distance where
{-
import Gargantext.Core.Methods.Matrix.Accelerate.Utils (cross', matrix)
import Data.HashMap.Strict qualified as HM
import Data.Map qualified as Map
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Methods.Similarities (Similarity(..))
import Gargantext.Core.Viz.Graph.Tools (doSimilarityMap)
import Gargantext.Core.Viz.Graph.Types (Strength(..))
import Gargantext.Prelude
import Test.Hspec (Spec, describe, it, shouldBe)
test :: IO ()
test = hspec $ do
describe "Cross" $ do
let result = cross' $ matrix 3 ([1,1..] :: [Double])
it "compare" $ do
shouldBe result (matrix 3 ([2,2..] :: [Double]))
-}
-- | The terms are composed of 4 food items: 2 "bases" ("feuille" and "pate")
-- and 2 "fillings" ("tomate" and "boeuf").
-- The corpus is composed of 4 recipes (one context = one recipe),
-- one for each possible pairing of a base and a filling.
recipeCooc :: HM.HashMap (NgramsTerm, NgramsTerm) Int
recipeCooc = HM.fromList
[ ((NgramsTerm "feuille", NgramsTerm "pate" ), 0)
, ((NgramsTerm "boeuf" , NgramsTerm "tomate" ), 0)
, ((NgramsTerm "boeuf" , NgramsTerm "pate" ), 1)
, ((NgramsTerm "tomate" , NgramsTerm "tomate" ), 2)
, ((NgramsTerm "pate" , NgramsTerm "tomate" ), 1)
, ((NgramsTerm "boeuf" , NgramsTerm "feuille"), 1)
, ((NgramsTerm "pate" , NgramsTerm "pate" ), 2)
, ((NgramsTerm "boeuf" , NgramsTerm "boeuf" ), 2)
, ((NgramsTerm "feuille", NgramsTerm "feuille"), 2)
, ((NgramsTerm "feuille", NgramsTerm "tomate" ), 1)
]
test :: Spec
test = do
describe "Recipe O2 Weight Map" $ do
let (recipeWeights, _coocs, dictionary) =
doSimilarityMap Distributional 0 Strong recipeCooc
it "has the right edges" $ do
-- Distributional weight map of the `recipeCooc` virtual corpus
let weights = do -- Maybe monad
-- Lookup nodes
feuilleID <- Map.lookup (NgramsTerm "feuille") dictionary
pateID <- Map.lookup (NgramsTerm "pate" ) dictionary
boeufID <- Map.lookup (NgramsTerm "boeuf" ) dictionary
tomateID <- Map.lookup (NgramsTerm "tomate" ) dictionary
-- Lookup edge weights, if any
let feuillePate = Map.lookup (feuilleID, pateID) recipeWeights
let boeufTomate = Map.lookup (boeufID, tomateID) recipeWeights
let feuilleTomate = Map.lookup (feuilleID, tomateID) recipeWeights
let feuilleBoeuf = Map.lookup (feuilleID, boeufID) recipeWeights
let pateTomate = Map.lookup (pateID, tomateID) recipeWeights
let pateBoeuf = Map.lookup (pateID, boeufID) recipeWeights
return [ feuillePate, boeufTomate, feuilleTomate
, feuilleBoeuf, pateTomate, pateBoeuf ]
-- We check that there are edges exactly where we expect there to be:
(fmap . fmap) positiveWeight weights `shouldBe`
Just [True, True, False, False, False, False]
-- | Checks whether `Map.lookup` outputs something, and that that something is
-- strictly positive, denoting the presence of an edge.
positiveWeight :: (Num a, Ord a) => Maybe a -> Bool
positiveWeight Nothing = False
positiveWeight (Just w) = w > 0
......@@ -20,7 +20,8 @@ import qualified Test.Core.Text.Corpus.Query as CorpusQuery
import qualified Test.Core.Text.Corpus.TSV as TSVParser
import qualified Test.Core.Utils as Utils
import qualified Test.Core.Worker as Worker
import qualified Test.Graph.Clustering as Graph
import qualified Test.Graph.Clustering as Clustering
import qualified Test.Graph.Distance as Distance
import qualified Test.Ngrams.Lang.Occurrences as Occurrences
import qualified Test.Ngrams.NLP as NLP
import qualified Test.Ngrams.Query as NgramsQuery
......@@ -48,7 +49,8 @@ protectStdoutBuffering action =
main :: IO ()
main = do
utilSpec <- testSpec "Utils" Utils.test
clusteringSpec <- testSpec "Graph Clustering" Graph.test
clusteringSpec <- testSpec "Graph Clustering" Clustering.test
distanceSpec <- testSpec "Graph Distance" Distance.test
dateSplitSpec <- testSpec "Date split" PD.testDateSplit
cryptoSpec <- testSpec "Crypto" Crypto.test
nlpSpec <- testSpec "NLP" NLP.test
......@@ -60,6 +62,7 @@ main = do
protectStdoutBuffering $ defaultMain $ testGroup "Gargantext"
[ utilSpec
, clusteringSpec
, distanceSpec
, dateSplitSpec
, cryptoSpec
, nlpSpec
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment