Commit be444d2e authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] reindex ngrams with all documents and with all ngrams

parent 01be6e4a
...@@ -37,7 +37,6 @@ import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText) ...@@ -37,7 +37,6 @@ import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText)
import Gargantext.Core.Types.Main (ListType(..)) import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Database.Action.Flow (saveDocNgramsWith) import Gargantext.Database.Action.Flow (saveDocNgramsWith)
import Gargantext.Database.Action.Flow.Types (FlowCmdM) import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast')
import Gargantext.Database.Admin.Types.Hyperdata.Document import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getNode) import Gargantext.Database.Query.Table.Node (getNode)
...@@ -145,16 +144,16 @@ reIndexWith cId lId nt lts = do ...@@ -145,16 +144,16 @@ reIndexWith cId lId nt lts = do
-- printDebug "ts" ts -- printDebug "ts" ts
-- Taking the ngrams with 0 occurrences only (orphans) -- Taking the ngrams with 0 occurrences only (orphans)
occs <- getOccByNgramsOnlyFast' cId lId nt ts -- occs <- getOccByNgramsOnlyFast' cId lId nt ts
-- printDebug "occs" occs -- printDebug "occs" occs
let orphans = List.concat let orphans = ts {- List.concat
$ map (\t -> case HashMap.lookup t occs of $ map (\t -> case HashMap.lookup t occs of
Nothing -> [t] Nothing -> [t]
Just n -> if n <= 1 then [t] else [ ] Just n -> if n <= 1 then [t] else [ ]
) ts ) ts
-}
-- printDebug "orphans" orphans -- printDebug "orphans" orphans
-- Get all documents of the corpus -- Get all documents of the corpus
......
...@@ -67,7 +67,7 @@ data UpdateNodeParams = UpdateNodeParamsList { methodList :: !Method } ...@@ -67,7 +67,7 @@ data UpdateNodeParams = UpdateNodeParamsList { methodList :: !Method }
| UpdateNodeParamsBoard { methodBoard :: !Charts } | UpdateNodeParamsBoard { methodBoard :: !Charts }
| LinkNodeReq { nodeType :: !NodeType | LinkNodeReq { nodeType :: !NodeType
, id :: !NodeId } , id :: !NodeId }
| UpdateNodePhylo { config :: !PhyloSubConfig } | UpdateNodePhylo { config :: !PhyloSubConfig }
deriving (Generic) deriving (Generic)
...@@ -128,7 +128,7 @@ updateNode _uId nid1 (LinkNodeReq nt nid2) logStatus = do ...@@ -128,7 +128,7 @@ updateNode _uId nid1 (LinkNodeReq nt nid2) logStatus = do
NodeAnnuaire -> pairing nid2 nid1 Nothing -- defaultList NodeAnnuaire -> pairing nid2 nid1 Nothing -- defaultList
NodeCorpus -> pairing nid1 nid2 Nothing -- defaultList NodeCorpus -> pairing nid1 nid2 Nothing -- defaultList
_ -> panic $ "[G.API.N.Update.updateNode] NodeType not implemented" _ -> panic $ "[G.API.N.Update.updateNode] NodeType not implemented"
<> cs (show nt) <> cs (show nt <> " nid1: " <> show nid1 <> " nid2: " <> show nid2)
pure JobLog { _scst_succeeded = Just 2 pure JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0 , _scst_failed = Just 0
......
...@@ -94,7 +94,7 @@ getGraph _uId nId = do ...@@ -94,7 +94,7 @@ getGraph _uId nId = do
mcId <- getClosestParentIdByType nId NodeCorpus mcId <- getClosestParentIdByType nId NodeCorpus
let cId = maybe (panic "[G.V.G.API] Node has no parent") identity mcId let cId = maybe (panic "[G.V.G.API] Node has no parent") identity mcId
printDebug "[getGraph] getting list for cId" cId -- printDebug "[getGraph] getting list for cId" cId
listId <- defaultList cId listId <- defaultList cId
repo <- getRepo' [listId] repo <- getRepo' [listId]
...@@ -178,23 +178,18 @@ computeGraph cId method d nt repo = do ...@@ -178,23 +178,18 @@ computeGraph cId method d nt repo = do
$ mapTermListRoot [lId] nt repo $ mapTermListRoot [lId] nt repo
myCooc <- HashMap.filter (>1) -- Removing the hapax (ngrams with 1 cooc) myCooc <- HashMap.filter (>1) -- Removing the hapax (ngrams with 1 cooc)
-- <$> HashMap.filterWithKey (\(x,y) _ -> x /= y)
-- <$> getCoocByNgrams (if d == Conditional then Diagonal True else Diagonal False)
<$> getCoocByNgrams (Diagonal True) <$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs <$> groupNodesByNgrams ngs
<$> getContextsByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs) <$> getContextsByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
-- printDebug "myCooc" myCooc
-- saveAsFileDebug "debug/my-cooc" myCooc
listNgrams <- getListNgrams [lId] nt listNgrams <- getListNgrams [lId] nt
-- graph <- liftBase $ cooc2graphWith Confluence d 0 myCooc
-- graph <- liftBase $ cooc2graphWith Spinglass d 0 myCooc
graph <- liftBase $ cooc2graphWith method d 0 myCooc graph <- liftBase $ cooc2graphWith method d 0 myCooc
-- saveAsFileDebug "debug/graph" graph
pure $ mergeGraphNgrams graph (Just listNgrams) let graph' = mergeGraphNgrams graph (Just listNgrams)
-- saveAsFileDebug "/tmp/graphWithNodes" graph'
pure graph'
defaultGraphMetadata :: HasNodeError err defaultGraphMetadata :: HasNodeError err
...@@ -293,7 +288,7 @@ graphVersions n nId = do ...@@ -293,7 +288,7 @@ graphVersions n nId = do
Just listId -> do Just listId -> do
repo <- getRepo' [listId] repo <- getRepo' [listId]
let v = repo ^. unNodeStory . at listId . _Just . a_version let v = repo ^. unNodeStory . at listId . _Just . a_version
printDebug "graphVersions" v -- printDebug "graphVersions" v
pure $ GraphVersions { gv_graph = listVersion pure $ GraphVersions { gv_graph = listVersion
, gv_repo = v } , gv_repo = v }
......
...@@ -104,10 +104,11 @@ cooc2graphWith' doPartitions distance threshold myCooc = do ...@@ -104,10 +104,11 @@ cooc2graphWith' doPartitions distance threshold myCooc = do
let let
(distanceMap, diag, ti) = doDistanceMap distance threshold myCooc (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
{- -- Debug --{- -- Debug
saveAsFileDebug "debug/distanceMap" distanceMap saveAsFileDebug "/tmp/distanceMap" distanceMap
printDebug "similarities" similarities saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
-} -- printDebug "similarities" similarities
--}
partitions <- if (Map.size distanceMap > 0) partitions <- if (Map.size distanceMap > 0)
then doPartitions distanceMap then doPartitions distanceMap
...@@ -169,13 +170,13 @@ doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', t ...@@ -169,13 +170,13 @@ doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', t
where where
myCooc' = Map.fromList $ HashMap.toList myCooc myCooc' = Map.fromList $ HashMap.toList myCooc
(ti, _it) = createIndices myCooc' (ti, _it) = createIndices myCooc'
tiSize = Map.size ti -- tiSize = Map.size ti
links = round (let n :: Double = fromIntegral tiSize in n * log n) -- links = round (let n :: Double = fromIntegral tiSize in n * log n)
distanceMap = toIndex ti distanceMap = toIndex ti
$ Map.fromList $ Map.fromList
$ List.take links -- $ List.take links
$ List.sortOn snd $ List.sortOn snd
$ HashMap.toList $ HashMap.toList
$ HashMap.filter (> threshold) $ HashMap.filter (> threshold)
...@@ -209,17 +210,18 @@ data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = no ...@@ -209,17 +210,18 @@ data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = no
, node_attributes = Attributes { clust_default = fromMaybe 0 , node_attributes = Attributes { clust_default = fromMaybe 0
(Map.lookup n community_id_by_node_id) (Map.lookup n community_id_by_node_id)
} }
, node_children = [] } , node_children = []
}
) )
| (l, n) <- labels | (l, n) <- labels
, Set.member n nodesWithScores , Set.member n nodesWithScores
] ]
edges = [ Edge { edge_source = cs (show s) edges = [ Edge { edge_source = cs (show s)
, edge_target = cs (show t) , edge_target = cs (show t)
, edge_weight = weight , edge_weight = weight
, edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
, edge_id = cs (show i) , edge_id = cs (show i)
} }
| (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
, s /= t , s /= t
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment