Commit 4499edc1 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] Order 2 regression and split of clustering

parent 3025b9f6
......@@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack
name: gargantext
version: 0.0.6.9.4.6
version: 0.0.6.9.4.6
synopsis: Search, map, share
description: Please see README.md
category: Data
......
......@@ -159,37 +159,24 @@ reIndexWith cId lId nt lts = do
<$> HashMap.toList
<$> getTermsWith identity [lId] nt lts
let orphans = ts {- List.concat
$ map (\t -> case HashMap.lookup t occs of
Nothing -> [t]
Just n -> if n <= 1 then [t] else [ ]
) ts
-}
printDebug "orphans" orphans
-- Get all documents of the corpus
docs <- selectDocNodes cId
-- printDebug "docs length" (List.length docs)
-- Checking Text documents where orphans match
-- TODO Tests here
let
-- fromListWith (<>)
ngramsByDoc = map (HashMap.fromList)
ngramsByDoc = map (HashMap.fromListWith (Map.unionWith (Map.unionWith (\(_a,b) (_a',b') -> (1,b+b')))))
$ map (map (\((k, cnt), v) -> (SimpleNgrams (text2ngrams k), over (traverse . traverse) (\p -> (p, cnt)) v)))
$ map (\doc -> List.zip
(termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) orphans)
$ Text.unlines $ catMaybes
[ doc ^. context_hyperdata . hd_title
, doc ^. context_hyperdata . hd_abstract
]
(termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) ts)
$ Text.unlines $ catMaybes
[ doc ^. context_hyperdata . hd_title
, doc ^. context_hyperdata . hd_abstract
]
)
(List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. context_id) 1 )]])
) docs
printDebug "ngramsByDoc: " ngramsByDoc
-- printDebug "ngramsByDoc: " ngramsByDoc
-- Saving the indexation in database
_ <- mapM (saveDocNgramsWith lId) ngramsByDoc
......@@ -224,7 +211,7 @@ postAsync' l (WithJsonFile m _) logStatus = do
, _scst_remaining = Just 2
, _scst_events = Just []
}
printDebug "New list as file" l
-- printDebug "New list as file" l
_ <- setList l m
-- printDebug "Done" r
......@@ -283,15 +270,15 @@ csvPost :: FlowCmdM env err m
-> Text
-> m Bool
csvPost l m = do
printDebug "[csvPost] l" l
-- printDebug "[csvPost] l" l
-- printDebug "[csvPost] m" m
-- status label forms
let lst = readCsvText m
let p = parseCsvData lst
--printDebug "[csvPost] lst" lst
printDebug "[csvPost] p" p
-- printDebug "[csvPost] p" p
_ <- setListNgrams l NgramsTerms p
printDebug "ReIndexing List" l
-- printDebug "ReIndexing List" l
corpus_node <- getNode l -- (Proxy :: Proxy HyperdataList)
let corpus_id = fromMaybe (panic "") (_node_parent_id corpus_node)
_ <- reIndexWith corpus_id l NgramsTerms (Set.fromList [MapTerm, CandidateTerm])
......@@ -301,10 +288,10 @@ csvPost l m = do
------------------------------------------------------------------------
csvPostAsync :: ServerT CSVAPI (GargM Env GargError)
csvPostAsync lId =
serveJobsAPI UpdateNgramsListJobCSV $ \f@(WithTextFile ft _ n) log' -> do
serveJobsAPI UpdateNgramsListJobCSV $ \f@(WithTextFile _ft _ _n) log' -> do
let log'' x = do
printDebug "[csvPostAsync] filetype" ft
printDebug "[csvPostAsync] name" n
-- printDebug "[csvPostAsync] filetype" ft
-- printDebug "[csvPostAsync] name" n
liftBase $ log' x
csvPostAsync' lId f log''
......
......@@ -192,6 +192,9 @@ dim m = n
matSumCol :: (Elt a, P.Num (Exp a)) => Dim -> Acc (Matrix a) -> Acc (Matrix a)
matSumCol r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose mat
matSumLin :: (Elt a, P.Num (Exp a)) => Dim -> Acc (Matrix a) -> Acc (Matrix a)
matSumLin r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
matSumCol' :: (Elt a, P.Num (Exp a)) => Matrix a -> Matrix a
matSumCol' m = run $ matSumCol n m'
where
......@@ -210,6 +213,8 @@ matSumCol' m = run $ matSumCol n m'
matProba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
matProba d mat = zipWith (/) mat (matSumCol d mat)
-- | Diagonal of the matrix
--
-- >>> run $ diag (use $ matrix 3 ([1..] :: [Int]))
......@@ -242,11 +247,21 @@ divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All))
matMiniMax :: (Elt a, Ord a, P.Num a)
=> Acc (Matrix a)
-> Acc (Matrix a)
matMiniMax m = trace "matMiniMax" $ filterWith' miniMax' (constant 0) m
matMiniMax m = filterWith' (>=) miniMax' (constant 0) m
where
miniMax' = the $ minimum $ maximum m
matMaxMini :: (Elt a, Ord a, P.Num a)
=> Acc (Matrix a)
-> Acc (Matrix a)
matMaxMini m = filterWith' (>) miniMax' (constant 0) m
where
miniMax' = the $ maximum $ minimum m
-- | Filters the matrix with a constant
--
-- >>> run $ matFilter 5 $ use $ matrix 3 [1..]
......@@ -260,15 +275,12 @@ filter' t m = filterWith t 0 m
filterWith :: Double -> Double -> Acc (Matrix Double) -> Acc (Matrix Double)
filterWith t v m = map (\x -> ifThenElse (x > (constant t)) x (constant v)) (transpose m)
filterWith' :: (Elt a, Ord a) => Exp a -> Exp a -> Acc (Matrix a) -> Acc (Matrix a)
filterWith' t v m = map (\x -> ifThenElse (x > t) x v) m
filterWith' :: (Elt a, Ord a) => (Exp a -> Exp a -> Exp Bool) -> Exp a -> Exp a -> Acc (Matrix a) -> Acc (Matrix a)
filterWith' f t v m = map (\x -> ifThenElse (f x t) x v) m
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | TODO use Lenses
data Direction = MatCol (Exp Int) | MatRow (Exp Int) | Diag
......
......@@ -17,13 +17,18 @@ module Gargantext.Core.Methods.Similarities
import Data.Aeson
import Data.Array.Accelerate (Matrix)
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.Core.Methods.Similarities.Accelerate.Conditional (measureConditional)
import Gargantext.Core.Methods.Similarities.Accelerate.Distributional (logDistributional)
import Gargantext.Prelude (Ord, Eq, Int, Double, Show)
-- import Gargantext.Core.Text.Metrics.Count (coocOn)
-- import Gargantext.Core.Viz.Graph.Index
import Gargantext.Prelude (Ord, Eq, Int, Double, Show, map)
import Prelude (Enum, Bounded, minBound, maxBound)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
-- import qualified Data.Map as Map
import qualified Data.Text as Text
------------------------------------------------------------------------
data Similarity = Conditional | Distributional
......@@ -31,9 +36,7 @@ data Similarity = Conditional | Distributional
measure :: Similarity -> Matrix Int -> Matrix Double
measure Conditional x = measureConditional x
measure Distributional x = y
where
y = logDistributional x
measure Distributional x = logDistributional x
------------------------------------------------------------------------
withMetric :: GraphMetric -> Similarity
......@@ -52,3 +55,32 @@ instance Arbitrary GraphMetric where
------------------------------------------------------------------------
-- Let's take 2 different forms to produce another one:
hello_words :: [[Text]]
hello_words = map (Text.splitOn "-") wrds
where
wrds = [ "bio-logie"
, "socio-logie"
-- , "ana-logie"
-- , "micro-scope"
-- , "micro-phone"
-- , "micro-cosme"
-- -- , "micro-biote"
-- , "tele-scope"
-- , "tele-phone"
-- , "tele-surveillance"
-- , "macro-scope"
-- , "macro-cosme"
-- , "macro-biote"
]
{-
hello_matrix ms' = measureConditional
$ map2mat Square 0 (Map.size ti)
$ toIndex ti ms
where
ms = coocOn identity ms'
(ti, it) = createIndices ms
-}
......@@ -45,17 +45,20 @@ import qualified Gargantext.Prelude as P
--
-- Conditional metric is an absolute metric which reflects
-- interactions of 2 terms in the corpus.
-- Filtered with MiniMax.
measureConditional :: Matrix Int -> Matrix Double
measureConditional m = run $ zipWith (/) m' (matSumCol d m')
measureConditional m = run $ x $ map fromIntegral $ use m
where
m' = map fromIntegral (use m)
d = dim m
x :: Acc (Matrix Double) -> Acc (Matrix Double)
x mat = matMiniMax $ matProba r mat
r :: Dim
r = dim m
-- *** Conditional distance (advanced)
-- | Conditional distance (advanced version)
--
-- | To filter the nodes
-- The conditional metric P(i|j) of 2 terms @i@ and @j@, also called
-- "confidence" , is the maximum probability between @i@ and @j@ to see
-- @i@ in the same context of @j@ knowing @j@.
......@@ -69,19 +72,25 @@ conditional' m = ( run $ ie $ map fromIntegral $ use m
, run $ sg $ map fromIntegral $ use m
)
where
x :: Acc (Matrix Double) -> Acc (Matrix Double)
x mat = (matProba r mat)
xs :: Acc (Matrix Double) -> Acc (Matrix Double)
xs mat = let mat' = x mat in zipWith (-) (matSumLin r mat') mat'
ys :: Acc (Matrix Double) -> Acc (Matrix Double)
ys mat = let mat' = x mat in zipWith (-) (matSumCol r mat') mat'
ie :: Acc (Matrix Double) -> Acc (Matrix Double)
ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
ie mat = map (\x' -> x' / (2*(n-1))) $ zipWith (+) (xs mat) (ys mat)
sg :: Acc (Matrix Double) -> Acc (Matrix Double)
sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
sg mat = map (\x' -> x' / (2*(n-1))) $ zipWith (-) (xs mat) (ys mat)
r :: Dim
r = dim m
n :: Exp Double
n = P.fromIntegral r
r :: Dim
r = dim m
xs :: Acc (Matrix Double) -> Acc (Matrix Double)
xs mat = zipWith (-) (matSumCol r $ matProba r mat) (matProba r mat)
ys :: Acc (Matrix Double) -> Acc (Matrix Double)
ys mat = zipWith (-) (matSumCol r $ transpose $ matProba r mat) (matProba r mat)
......@@ -31,8 +31,54 @@ where $n_{ij}$ is the cooccurrence between term $i$ and term $j$
* MI=(M ./ O * D(M)) .* (M / D(M) * O )
* distr(i,j)=$\frac{|min(V'_i * (MI-D(MI)),V'_j * (MI-D(MI)))|_1}{|V'_i.(MI-D(MI))|_1}$
[Specifications written by David Chavalarias on Garg v4 shared NodeWrite, team Pyremiel 2020]
[Finally, we have used as convention the Distributional metric used in Legacy GarganText](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/50)
mi = defaultdict(lambda : defaultdict(int))
total_cooc = x.sum().sum()
for i in matrix.keys():
si = sum([matrix[i][j] for j in matrix[i].keys() if i != j])
for j in matrix[i].keys():
sj = sum([matrix[j][k] for k in matrix[j].keys() if j != k])
if i!=j :
mi[i][j] = log( matrix[i][j] / ((si * sj) / total_cooc) )
r = defaultdict(lambda : defaultdict(int))
for i in matrix.keys():
for j in matrix.keys():
sumMin = sum(
[
min(mi[i][k], mi[j][k])
for k in matrix.keys()
if i != j and k != i and k != j and mi[i][k] > 0
]
)
sumMi = sum(
[
mi[i][k]
for k in matrix.keys()
if k != i and k != j and mi[i][k] > 0
]
)
try:
r[i][j] = sumMin / sumMi
except Exception as error:
r[i][j] = 0
# Need to filter the weak links, automatic threshold here
minmax = min([ max([ r[i][j] for i in r.keys()]) for j in r.keys()])
G = nx.DiGraph()
G.add_edges_from(
[
(i, j, {'weight': r[i][j]})
for i in r.keys() for j in r.keys()
if i != j and r[i][j] > minmax and r[i][j] > r[j][i]
]
)
-}
{-# LANGUAGE TypeFamilies #-}
......@@ -122,10 +168,10 @@ distributional m' = result
result = termDivNan z_1 z_2
logDistributional :: Matrix Int -> Matrix Double
logDistributional m = trace ("logDistributional, dim=" `mappend` show n) . run
logDistributional2 :: Matrix Int -> Matrix Double
logDistributional2 m = trace ("logDistributional, dim=" `mappend` show n) . run
$ diagNull n
$ matMiniMax
$ matMaxMini
$ logDistributional' n m
where
n = dim m
......@@ -216,8 +262,61 @@ logDistributional' n m' = trace ("logDistributional'") result
-- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
--
logDistributional :: Matrix Int -> Matrix Double
logDistributional m' = run $ diagNull n result
where
m = map fromIntegral $ use m'
n = dim m'
-- Scalar. Sum of all elements of m.
to = the $ sum (flatten m)
-- Diagonal matrix with the diagonal of m.
d_m = (.*) m (matrixIdentity n)
-- Size n vector. s = [s_i]_i
s = sum ((.-) m d_m)
-- Matrix nxn. Vector s replicated as rows.
s_1 = replicate (constant (Z :. All :. n)) s
-- Matrix nxn. Vector s replicated as columns.
s_2 = replicate (constant (Z :. n :. All)) s
-- Matrix nxn. ss = [s_i * s_j]_{i,j}. Outer product of s with itself.
ss = (.*) s_1 s_2
-- Matrix nxn. mi = [m_{i,j}]_{i,j} where
-- m_{i,j} = 0 if n_{i,j} = 0 or i = j,
-- m_{i,j} = log(to * n_{i,j} / s_{i,j}) otherwise.
mi = (.*) (matrixEye n)
(map (lift1 (\x -> cond (x == 0) 0 (log (x * to)))) ((./) m ss))
-- Tensor nxnxn. Matrix mi replicated along the 2nd axis.
w_1 = replicate (constant (Z :. All :. n :. All)) mi
-- Tensor nxnxn. Matrix mi replicated along the 1st axis.
w_2 = replicate (constant (Z :. n :. All :. All)) mi
-- Tensor nxnxn.
w' = zipWith min w_1 w_2
-- A predicate that is true when the input (i, j, k) satisfy
-- k /= i AND k /= j
k_diff_i_and_j = lift1 (\(Z :. i :. j :. k) -> ((&&) ((/=) k i) ((/=) k j)))
-- Matrix nxn.
sumMin = sum (condOrDefault k_diff_i_and_j 0 w')
-- Matrix nxn. All columns are the same.
sumM = sum (condOrDefault k_diff_i_and_j 0 w_1)
result = termDivNan sumMin sumM
distributional'' :: Matrix Int -> Matrix Double
distributional'' m = -- run {- $ matMiniMax -}
distributional'' m = -- run {- $ matMaxMini -}
run $ diagNull n
$ rIJ n
$ filterWith 0 100
......@@ -255,7 +354,7 @@ distributional'' m = -- run {- $ matMiniMax -}
rIJ :: (Elt a, Ord a, P.Fractional (Exp a), P.Num a)
=> Dim -> Acc (Matrix a) -> Acc (Matrix a)
rIJ n m = matMiniMax $ divide a b
rIJ n m = matMaxMini $ divide a b
where
a = sumRowMin n m
b = sumColMin n m
......
......@@ -37,8 +37,8 @@ conditional :: (Ord a, Hashable a, NFData a)
conditional m' = Map.fromList $ ((catMaybes results') `using` parList rdeepseq)
where
results' = [ let
ij = (/) <$> Map.lookup (i,j) m <*> Map.lookup (i,i) m
ji = (/) <$> Map.lookup (j,i) m <*> Map.lookup (j,j) m
ij = (/) <$> Map.lookup (i,j) m <*> Map.lookup (j,j) m
ji = (/) <$> Map.lookup (j,i) m <*> Map.lookup (i,i) m
in getMax (i,j) ij ji
| i <- keys
......@@ -53,6 +53,3 @@ conditional m' = Map.fromList $ ((catMaybes results') `using` parList rdeepseq)
(x,y) = unzip $ Map.keys m
......@@ -23,19 +23,20 @@ TODO use Map LouvainNodeId (Map LouvainNodeId)
module Gargantext.Core.Viz.Graph.Bridgeness -- (bridgeness)
where
import Gargantext.Core.Methods.Similarities (Similarity(..))
-- import Data.IntMap (IntMap)
import Data.Map.Strict (Map, fromListWith, lookup, toList, mapWithKey, elems)
import Data.Maybe (catMaybes)
import Data.Ord (Down(..))
import Data.Set (Set)
import Data.Tuple.Extra (swap)
import Debug.Trace (trace)
import Gargantext.Core.Methods.Similarities (Similarity(..))
import Gargantext.Prelude
import Graph.Types (ClusterNode(..))
-- import qualified Data.IntMap as IntMap
import qualified Data.List as List
import qualified Data.Map.Strict as Map
-- import qualified Data.Set as Set
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Data.Tuple.Extra as Tuple
import qualified Data.IntMap as Dico
----------------------------------------------------------------------
type Partitions = Map (Int, Int) Double -> IO [ClusterNode]
......@@ -46,6 +47,43 @@ nodeId2comId (ClusterNode i1 i2) = (i1, i2)
type NodeId = Int
type CommunityId = Int
----------------------------------------------------------------------
----------------------------------------------------------------------
-- recursiveClustering : get get more granularity of a given clustering
-- tested with spinglass clustering only (WIP)
recursiveClustering :: Partitions -> Map (Int, Int) Double -> IO [ClusterNode]
recursiveClustering f mp = do
let
n :: Double
n = fromIntegral $ Set.size
$ Set.unions $ List.concat
$ map (\(k1,k2) -> map Set.singleton [k1, k2])
$ Map.keys mp
t :: Int
t = round $ (n / 2) * (sqrt n) / 100
(toSplit,others) <- List.span (\a -> Set.size a > t) <$> clusterNodes2sets <$> f mp
cls' <- mapM f $ map (\s -> removeNodes s mp) toSplit
pure $ setNodes2clusterNodes $ others <> (List.concat $ map clusterNodes2sets cls')
setNodes2clusterNodes :: [Set NodeId] -> [ClusterNode]
setNodes2clusterNodes ns = List.concat $ map (\(n,ns') -> toCluster n ns') $ zip [1..] ns
where
toCluster :: CommunityId -> Set NodeId -> [ClusterNode]
toCluster cId setNodeId = map (\n -> ClusterNode n cId) (Set.toList setNodeId)
removeNodes :: Set NodeId
-> Map (NodeId, NodeId) Double
-> Map (NodeId, NodeId) Double
removeNodes s = Map.filterWithKey (\(n1,n2) _v -> Set.member n1 s && Set.member n2 s)
clusterNodes2sets :: [ClusterNode] -> [Set NodeId]
clusterNodes2sets = Dico.elems
. Dico.fromListWith (<>)
. (map ((Tuple.second Set.singleton) . swap . nodeId2comId))
----------------------------------------------------------------------
----------------------------------------------------------------------
data Bridgeness = Bridgeness_Basic { bridgeness_partitions :: [ClusterNode]
......@@ -116,7 +154,7 @@ filterComs _b m = Map.filter (\n -> length n > 0) $ mapWithKey filter' m
filter' (c1,c2) a
| c1 == c2 = a
-- TODO use n here
| otherwise = take n $ List.sortOn (Down . snd) a
| otherwise = take (2*n) $ List.sortOn (Down . snd) a
where
n :: Int
n = round $ 100 * a' / t
......
......@@ -23,9 +23,9 @@ import GHC.Float (sin, cos)
import GHC.Generics (Generic)
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
import Gargantext.Core.Methods.Similarities.Conditional (conditional)
-- import Gargantext.Core.Methods.Similarities.Conditional (conditional)
import Gargantext.Core.Statistics
import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId)
import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId, recursiveClustering)
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
......@@ -48,7 +48,6 @@ import qualified Graph.BAC.ProxemyOptim as BAC
import qualified IGraph as Igraph
import qualified IGraph.Algorithms.Layout as Layout
data PartitionMethod = Spinglass | Confluence | Infomap
deriving (Generic, Eq, Ord, Enum, Bounded, Show)
instance FromJSON PartitionMethod
......@@ -124,16 +123,27 @@ cooc2graphWith' doPartitions bridgenessMethod multi similarity threshold strengt
distanceMap `seq` diag `seq` ti `seq` return ()
--{- -- Debug
-- saveAsFileDebug "/tmp/distanceMap" distanceMap
-- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
-- To Work with Igraph
saveAsFileDebug "/tmp/distanceMap" ( List.intercalate ";"
$ Set.toList
$ Set.fromList
$ map (\(k1,k2) -> if k1 < k2
then show (k1+1) <> " " <> show (k2+1)
else show (k2+1) <> " " <> show (k1+1)
)
$ Map.keys
$ Map.filter (>0.005) distanceMap
)
saveAsFileDebug "/tmp/distanceMap.data" distanceMap
saveAsFileDebug "/tmp/distanceMap.cooc" myCooc
-- printDebug "similarities" similarities
--}
partitions <- if (Map.size distanceMap > 0)
then doPartitions distanceMap
then recursiveClustering doPartitions distanceMap
else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
, "Maybe you should add more Map Terms in your list"
, "Tutorial: link todo"
, "Tutorial: TODO"
]
length partitions `seq` return ()
......@@ -155,6 +165,33 @@ doSimilarityMap :: Similarity
, Map (Index, Index) Int
, Map NgramsTerm Index
)
doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
where
myCooc' = Map.fromList $ HashMap.toList myCooc
(_diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
$ Map.fromList
$ HashMap.toList myCooc
(ti, _it) = createIndices theMatrix
tiSize = Map.size ti
similarities = (\m -> m `seq` m)
$ (\m -> m `seq` measure Conditional m)
$ (\m -> m `seq` map2mat Square 0 tiSize m)
$ theMatrix `seq` toIndex ti theMatrix
links = round (let n :: Double = fromIntegral (Map.size ti) in 10 * n * (log n)^(2::Int))
distanceMap = Map.fromList
$ List.take links
$ (if strength == Weak then List.reverse else identity)
$ List.sortOn snd
$ Map.toList
$ Map.filter (> threshold)
-- $ conditional myCooc
$ similarities `seq` mat2map similarities
doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
where
-- TODO remove below
......@@ -181,20 +218,6 @@ doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex
$ (\m -> m `seq` Map.filter (> threshold) m)
$ similarities `seq` mat2map similarities
doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
where
myCooc' = Map.fromList $ HashMap.toList myCooc
(ti, _it) = createIndices myCooc'
links = round (let n :: Double = fromIntegral (Map.size ti) in n * (log n)^(2::Int))
distanceMap = toIndex ti
$ Map.fromList
$ List.take links
$ (if strength == Weak then List.reverse else identity)
$ List.sortOn snd
$ HashMap.toList
$ HashMap.filter (> threshold)
$ conditional myCooc
----------------------------------------------------------
-- | From data to Graph
type Occurrences = Int
......
......@@ -21,6 +21,7 @@ import Gargantext.Core.Viz.Graph.Index
import Graph.Types (ClusterNode(..))
import IGraph hiding (mkGraph, neighbors, edges, nodes, Node, Graph)
import Protolude
import Gargantext.Prelude (saveAsFileDebug)
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import qualified IGraph as IG
......@@ -79,7 +80,11 @@ partitions_spinglass' :: (Serialize v, Serialize e)
=> Seed -> IG.Graph 'U v e -> IO [[Int]]
partitions_spinglass' s g = do
gen <- IG.withSeed s pure
IG.findCommunity g Nothing Nothing IG.spinglass gen
res <- IG.findCommunity g Nothing Nothing IG.spinglass gen
-- res <- IG.findCommunity g Nothing Nothing IG.leiden gen
-- res <- IG.findCommunity g Nothing Nothing IG.infomap gen
saveAsFileDebug "/tmp/res" res
pure res
toClusterNode :: [[Int]] -> [ClusterNode]
......
......@@ -424,14 +424,15 @@ saveDocNgramsWith lId mapNgramsDocs' = do
--printDebug "saveDocNgramsWith" mapCgramsId
-- insertDocNgrams
_return <- insertContextNodeNgrams2
$ catMaybes [ ContextNodeNgrams2 <$> Just nId
let ngrams2insert = catMaybes [ ContextNodeNgrams2 <$> Just nId
<*> (getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms''))
<*> Just (fromIntegral w :: Double)
| (terms'', mapNgramsTypes) <- HashMap.toList mapNgramsDocs
, (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
, (nId, (w, _cnt)) <- Map.toList mapNodeIdWeight
, (nId, (w, _cnt)) <- Map.toList mapNodeIdWeight
]
printDebug "Ngrams2Insert" ngrams2insert
_return <- insertContextNodeNgrams2 ngrams2insert
-- to be removed
_ <- insertDocNgrams lId $ HashMap.mapKeys (indexNgrams terms2id) mapNgramsDocs
......
......@@ -7,8 +7,9 @@ extra-package-dbs: []
skip-ghc-check: true
packages:
- .
#- 'deps/gargantext-graph'
#- 'deps/haskell-opaleye'
# - 'deps/haskell-igraph'
# - 'deps/crawlers/arxiv-api'
# - 'deps/haskell-opaleye'
docker:
enable: false
......@@ -78,6 +79,8 @@ extra-deps:
commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b
- git: https://gitlab.iscpif.fr/gargantext/crawlers/arxiv-api.git
commit: 2d7e5753cbbce248b860b571a0e9885415c846f7
#- git: https://gitlab.iscpif.fr/gargantext/crawlers/arxiv-api.git
#commit: a2d78abeaec9315be765b90d5e51a4a50c48e7b8
#- git: https://gitlab.iscpif.fr/cgenie/arxiv-api.git
#- arxiv-0.0.3@sha256:02de1114091d11f1f3ab401d104d125ad4301260806feb7f63b3dcefc7db88cf,1588
......@@ -95,8 +98,10 @@ extra-deps:
# Graph libs
#- git: https://github.com/kaizhang/haskell-igraph.git
- git: https://github.com/alpmestan/haskell-igraph.git
commit: 9f55eb36639c8e0965c8bc539a57738869f33e9a
#- git: https://github.com/alpmestan/haskell-igraph.git
# commit: 9f55eb36639c8e0965c8bc539a57738869f33e9a
- git: https://gitlab.iscpif.fr/gargantext/haskell-igraph.git
commit: 03c8885d0166255ed7bf0b624a07610ea68ec02c
- git: https://gitlab.iscpif.fr/gargantext/haskell-infomap.git
commit: 6d1d60b952b9b2b272b58fc5539700fd8890ac88
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment