Commit be444d2e authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] reindex ngrams with all documents and with all ngrams

parent 01be6e4a
......@@ -37,7 +37,6 @@ import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Database.Action.Flow (saveDocNgramsWith)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast')
import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getNode)
......@@ -145,16 +144,16 @@ reIndexWith cId lId nt lts = do
-- printDebug "ts" ts
-- Taking the ngrams with 0 occurrences only (orphans)
occs <- getOccByNgramsOnlyFast' cId lId nt ts
-- occs <- getOccByNgramsOnlyFast' cId lId nt ts
-- printDebug "occs" occs
let orphans = List.concat
let orphans = ts {- List.concat
$ map (\t -> case HashMap.lookup t occs of
Nothing -> [t]
Just n -> if n <= 1 then [t] else [ ]
) ts
-- printDebug "orphans" orphans
-- Get all documents of the corpus
......@@ -67,7 +67,7 @@ data UpdateNodeParams = UpdateNodeParamsList { methodList :: !Method }
| UpdateNodeParamsBoard { methodBoard :: !Charts }
| LinkNodeReq { nodeType :: !NodeType
, id :: !NodeId }
, id :: !NodeId }
| UpdateNodePhylo { config :: !PhyloSubConfig }
deriving (Generic)
......@@ -128,7 +128,7 @@ updateNode _uId nid1 (LinkNodeReq nt nid2) logStatus = do
NodeAnnuaire -> pairing nid2 nid1 Nothing -- defaultList
NodeCorpus -> pairing nid1 nid2 Nothing -- defaultList
_ -> panic $ "[G.API.N.Update.updateNode] NodeType not implemented"
<> cs (show nt)
<> cs (show nt <> " nid1: " <> show nid1 <> " nid2: " <> show nid2)
pure JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
......@@ -94,7 +94,7 @@ getGraph _uId nId = do
mcId <- getClosestParentIdByType nId NodeCorpus
let cId = maybe (panic "[G.V.G.API] Node has no parent") identity mcId
printDebug "[getGraph] getting list for cId" cId
-- printDebug "[getGraph] getting list for cId" cId
listId <- defaultList cId
repo <- getRepo' [listId]
......@@ -178,23 +178,18 @@ computeGraph cId method d nt repo = do
$ mapTermListRoot [lId] nt repo
myCooc <- HashMap.filter (>1) -- Removing the hapax (ngrams with 1 cooc)
-- <$> HashMap.filterWithKey (\(x,y) _ -> x /= y)
-- <$> getCoocByNgrams (if d == Conditional then Diagonal True else Diagonal False)
<$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs
<$> getContextsByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
-- printDebug "myCooc" myCooc
-- saveAsFileDebug "debug/my-cooc" myCooc
listNgrams <- getListNgrams [lId] nt
-- graph <- liftBase $ cooc2graphWith Confluence d 0 myCooc
-- graph <- liftBase $ cooc2graphWith Spinglass d 0 myCooc
graph <- liftBase $ cooc2graphWith method d 0 myCooc
-- saveAsFileDebug "debug/graph" graph
pure $ mergeGraphNgrams graph (Just listNgrams)
let graph' = mergeGraphNgrams graph (Just listNgrams)
-- saveAsFileDebug "/tmp/graphWithNodes" graph'
pure graph'
defaultGraphMetadata :: HasNodeError err
......@@ -293,7 +288,7 @@ graphVersions n nId = do
Just listId -> do
repo <- getRepo' [listId]
let v = repo ^. unNodeStory . at listId . _Just . a_version
printDebug "graphVersions" v
-- printDebug "graphVersions" v
pure $ GraphVersions { gv_graph = listVersion
, gv_repo = v }
......@@ -104,10 +104,11 @@ cooc2graphWith' doPartitions distance threshold myCooc = do
(distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
{- -- Debug
saveAsFileDebug "debug/distanceMap" distanceMap
printDebug "similarities" similarities
--{- -- Debug
saveAsFileDebug "/tmp/distanceMap" distanceMap
saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
-- printDebug "similarities" similarities
partitions <- if (Map.size distanceMap > 0)
then doPartitions distanceMap
......@@ -169,13 +170,13 @@ doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', t
myCooc' = Map.fromList $ HashMap.toList myCooc
(ti, _it) = createIndices myCooc'
tiSize = Map.size ti
-- tiSize = Map.size ti
links = round (let n :: Double = fromIntegral tiSize in n * log n)
-- links = round (let n :: Double = fromIntegral tiSize in n * log n)
distanceMap = toIndex ti
$ Map.fromList
$ List.take links
-- $ List.take links
$ List.sortOn snd
$ HashMap.toList
$ HashMap.filter (> threshold)
......@@ -209,17 +210,18 @@ data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = no
, node_attributes = Attributes { clust_default = fromMaybe 0
(Map.lookup n community_id_by_node_id)
, node_children = [] }
, node_children = []
| (l, n) <- labels
, Set.member n nodesWithScores
edges = [ Edge { edge_source = cs (show s)
, edge_target = cs (show t)
, edge_weight = weight
, edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
, edge_id = cs (show i)
, edge_target = cs (show t)
, edge_weight = weight
, edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
, edge_id = cs (show i)
| (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
, s /= t
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment