Commit 639232be authored by Alp Mestanogullari's avatar Alp Mestanogullari

Fix confluence re-indexing bugs

parent b98ddbfc
Pipeline #3182 passed with stage
in 96 minutes and 26 seconds
......@@ -128,7 +128,6 @@ recomputeGraph :: FlowCmdM env err m
-> Bool
-> m Graph
recomputeGraph _uId nId method maybeDistance maybeStrength force = do
printDebug "recomputeGraph begins" (nId, method)
nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
let
graph = nodeGraph ^. node_hyperdata . hyperdataGraph
......@@ -150,22 +149,15 @@ recomputeGraph _uId nId method maybeDistance maybeStrength force = do
mcId <- getClosestParentIdByType nId NodeCorpus
let cId = maybe (panic "[G.V.G.API] Node has no parent") identity mcId
printDebug "recomputeGraph corpus" cId
listId <- defaultList cId
printDebug "recomputeGraph list" listId
repo <- getRepo [listId]
let v = repo ^. unNodeStory . at listId . _Just . a_version
printDebug "recomputeGraph got repo, version: " v
let computeG mt = do
printDebug "about to run computeGraph" ()
g <- computeGraph cId method similarity strength NgramsTerms repo
seq g $ printDebug "graph computed" ()
!g <- computeGraph cId method similarity strength NgramsTerms repo
let g' = set graph_metadata mt g
seq g' $ printDebug "computed graph with new metadata" ()
nentries <- updateHyperdata nId (HyperdataGraph (Just g') camera)
printDebug "graph hyperdata updated" ("entries" :: [Char], nentries)
_nentries <- updateHyperdata nId (HyperdataGraph (Just g') camera)
pure g'
case graph of
......@@ -189,11 +181,8 @@ computeGraph :: FlowCmdM env err m
-> NodeListStory
-> m Graph
computeGraph cId method d strength nt repo = do
printDebug "computeGraph" (cId, method, nt)
lId <- defaultList cId
printDebug "computeGraph got list id: " lId
lIds <- selectNodesWithUsername NodeList userMaster
printDebug "computeGraph got nodes with username: " userMaster
let ngs = filterListWithRoot [MapTerm]
$ mapTermListRoot [lId] nt repo
......@@ -201,10 +190,8 @@ computeGraph cId method d strength nt repo = do
<$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs
<$> getContextsByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
printDebug "computeGraph got coocs" (HashMap.size myCooc)
graph <- liftBase $ cooc2graphWith method d 0 strength myCooc
printDebug "computeGraph got graph" ()
--listNgrams <- getListNgrams [lId] nt
--let graph' = mergeGraphNgrams graph (Just listNgrams)
......@@ -265,7 +252,7 @@ graphRecompute u n logStatus = do
, _scst_remaining = Just 1
, _scst_events = Just []
}
_g <- trace (show u) $ recomputeGraph u n Spinglass Nothing Nothing False
_g <- recomputeGraph u n Spinglass Nothing Nothing False
pure JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 0
......
......@@ -9,13 +9,11 @@ Portability : POSIX
-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
module Gargantext.Core.Viz.Graph.Tools
where
import Debug.Trace
import Data.Aeson
import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
......@@ -26,7 +24,6 @@ import GHC.Generics (Generic)
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Methods.Distances (Distance(..), measure)
import Gargantext.Core.Methods.Distances.Conditional (conditional)
-- import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
import Gargantext.Core.Statistics
import Gargantext.Core.Viz.Graph
import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
......@@ -110,7 +107,7 @@ cooc2graphWith' :: ToComId a
-> IO Graph
cooc2graphWith' doPartitions distance threshold strength myCooc = do
let (distanceMap, diag, ti) = doDistanceMap distance threshold strength myCooc
distanceMap `seq` trace "distanceMap OK" diag `seq` trace "diag OK" ti `seq` printDebug "ti done" ()
distanceMap `seq` diag `seq` ti `seq` return ()
--{- -- Debug
-- saveAsFileDebug "/tmp/distanceMap" distanceMap
......@@ -124,21 +121,15 @@ cooc2graphWith' doPartitions distance threshold strength myCooc = do
, "Maybe you should add more Map Terms in your list"
, "Tutorial: link todo"
]
partitions `seq` printDebug "partitions done" ()
length partitions `seq` return ()
let
nodesApprox :: Int
nodesApprox = n'
where
(as, bs) = List.unzip $ Map.keys distanceMap
n' = Set.size $ Set.fromList $ as <> bs
bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
-- indices of bridgeness seem to start at 1, while computeConfluences
-- expects 0-based indexing.
-- ks = map (\(a, b) -> (a-1, b-1)) (Map.keys bridgeness')
confluence' = Map.empty -- Map.mapKeys (\(a, b) -> (a+1, b+1)) $ BAC.computeConfluences 3 ks True
-- confluence (Map.keys bridgeness') 3 True False
seq bridgeness' $ printDebug "bridgeness OK" ()
seq confluence' $ printDebug "confluence OK" ()
!bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
!confluence' = BAC.computeConfluences 3 (Map.keys bridgeness') True
pure $ data2graph ti diag bridgeness' confluence' partitions
type Reverse = Bool
......@@ -161,21 +152,21 @@ doDistanceMap Distributional threshold strength myCooc = (distanceMap, toIndex t
(ti, _it) = createIndices theMatrix
tiSize = Map.size ti
similarities = (\m -> m `seq` trace "measure done" m)
$ (\m -> m `seq` trace "map2mat done" (measure Distributional m))
$ (\m -> m `seq` trace "toIndex done" (map2mat Square 0 tiSize m))
$ theMatrix `seq` trace "theMatrix done" (toIndex ti theMatrix)
similarities = (\m -> m `seq` m)
$ (\m -> m `seq` measure Distributional m)
$ (\m -> m `seq` map2mat Square 0 tiSize m)
$ theMatrix `seq` toIndex ti theMatrix
links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
distanceMap = Map.fromList . trace "fromList" identity
distanceMap = Map.fromList
$ List.take links
$ (if strength == Weak then List.reverse else identity)
$ List.sortOn snd
$ Map.toList
$ edgesFilter
$ (\m -> m `seq` trace "map2map done" (Map.filter (> threshold) m))
$ similarities `seq` mat2map (trace "similarities done" similarities)
$ (\m -> m `seq` Map.filter (> threshold) m)
$ similarities `seq` mat2map similarities
doDistanceMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
where
......
......@@ -35,7 +35,7 @@ extra-deps:
- git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
commit: 08096a4913572cf22762fa77613340207ec6d9fd
- git: https://gitlab.iscpif.fr/gargantext/gargantext-graph.git
commit: 642b9ec7ffa59a5db7b2ec7b24436e07309dc097
commit: 588e104fe7593210956610cab0041fd16584a4ce
# Data Mining Libs
- git: https://github.com/delanoe/data-time-segment.git
commit: 10a416b9f6c443866b36479c3441ebb3bcdeb7ef
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment