[FIX] Indexes for database and score counts

e608bb05 · Alexandre Delanoë · 262a4e7d · e608bb05 · e608bb05 · e608bb05
Commit e608bb05 authored Jan 26, 2022 by Alexandre Delanoë
9 changed files
--- a/bin/gargantext-upgrade/Main.hs
+++ b/bin/gargantext-upgrade/Main.hs
@@ -252,6 +252,35 @@ sqlSchema = do
        DROP TRIGGER if EXISTS trigger_insert_count     ON nodes_nodes;


+        -- Indexes needed to speed up the deletes
+        -- Trigger for constraint node_ngrams_node_id_fkey
+        CREATE INDEX IF NOT EXISTS node_ngrams_node_id_idx       ON public.node_ngrams USING btree (node_id);
+
+        -- Trigger for constraint node_node_ngrams2_node_id_fkey
+        CREATE INDEX IF NOT EXISTS node_node_ngrams2_node_id_idx ON public.node_node_ngrams2 USING btree (node_id);
+
+        -- Trigger for constraint node_node_ngrams_node1_id_fkey
+        CREATE INDEX IF NOT EXISTS node_node_ngrams_node1_id_idx  ON public.node_node_ngrams USING btree (node1_id);
+
+        -- Trigger for constraint node_node_ngrams_node2_id_fkey
+        CREATE INDEX IF NOT EXISTS node_node_ngrams_node2_id_idx  ON public.node_node_ngrams USING btree (node2_id);
+
+        -- Trigger for constraint nodes_nodes_node1_id_fkey
+        CREATE INDEX IF NOT EXISTS nodes_nodes_node1_id_idx ON public.nodes_nodes USING btree (node1_id);
+        -- Trigger for constraint nodes_nodes_node2_id_fkey
+        CREATE INDEX IF NOT EXISTS nodes_nodes_node2_id_idx ON public.nodes_nodes USING btree (node2_id);
+
+        -- Trigger for constraint nodes_parent_id_fkey
+        CREATE INDEX IF NOT EXISTS nodes_parent_id_idx ON public.nodes USING btree (parent_id);
+
+        -- Trigger for constraint rights_node_id_fkey
+        CREATE INDEX IF NOT EXISTS rights_node_id_idx ON public.rights USING btree (node_id);
+
+        -- Trigger for constraint nodes_contexts_node_id_fkey
+        CREATE INDEX IF NOT EXISTS nodes_contexts_node_id_idx ON public.nodes_contexts USING btree (node_id);
+
+        -- Trigger for constraint context_node_ngrams_node_id_fkey
+        CREATE INDEX IF NOT EXISTS context_node_node_id_idx ON public.context_node_ngrams USING btree (node_id);

  |]


--- a/devops/postgres/create
+++ b/devops/postgres/create
 #!/bin/bash

 # sudo su postgres
-
 # postgresql://$USER:$PW@localhost/$DB

-PW="C8kdcUrAQy66U"
-DB="gargandb1"
-USER="gargantua"
+INIFILE=$1

-#psql -c "CREATE USER \"${USER}\""
-#psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'"
+getter () {
+  grep $1 $INIFILE | sed "s/^.*= //"
+}

-psql -c "DROP DATABASE IF EXISTS \"${DB}\""
-createdb "${DB}"
-#psql "${DB}" < schema.sql
+USER=$(getter "DB_USER")
+NAME=$(getter "DB_NAME")
+PASS=$(getter "DB_PASS")
+HOST=$(getter "DB_HOST")
+PORT=$(getter "DB_PORT")

-../../bin/psql ../../gargantext.ini < gargandb.dump

-psql -c "ALTER DATABASE \"${DB}\" OWNER to \"${USER}\""
+#psql -c "CREATE USER \"${USER}\""
+#psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'"
+
+psql -c "DROP DATABASE IF EXISTS \"${NAME}\""
+createdb "${NAME}"
+psql "${NAME}" <  extensions.sql
+#psql "${NAME}" < schema.sql


+#../../bin/psql ../../gargantext.ini < gargandb.dump

+psql -c "ALTER DATABASE \"${NAME}\" OWNER to \"${USER}\""


--- a/devops/postgres/extensions.sql
+++ b/devops/postgres/extensions.sql
+CREATE EXTENSION IF NOT EXISTS pgcrypto;
+CREATE EXTENSION IF NOT EXISTS tsm_system_rows;
+
--- a/src/Gargantext/API/Admin/Settings.hs
+++ b/src/Gargantext/API/Admin/Settings.hs
@@ -106,7 +106,7 @@ repoSnapshot repoDir = repoDir <> "/repo.cbor"
 repoSaverAction :: RepoDirFilePath -> Serialise a => a -> IO ()
 repoSaverAction repoDir a = do
  withTempFile repoDir "tmp-repo.cbor" $ \fp h -> do
-    printDebug "repoSaverAction" fp
+    -- printDebug "repoSaverAction" fp
    L.hPut h $ serialise a
    hClose h
    renameFile fp (repoSnapshot repoDir)

--- a/src/Gargantext/API/Ngrams/List.hs
+++ b/src/Gargantext/API/Ngrams/List.hs
@@ -41,7 +41,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document
 import Gargantext.Database.Admin.Types.Node
 import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
 import Gargantext.Database.Schema.Ngrams
-import Gargantext.Database.Schema.Node
+import Gargantext.Database.Schema.Context
 import Gargantext.Database.Types (Indexed(..))
 import Gargantext.Prelude
 import Network.HTTP.Media ((//), (/:))
@@ -155,12 +155,12 @@ reIndexWith cId lId nt lts = do
     <$> HashMap.toList
     <$> getTermsWith identity [lId] nt lts
  
-  -- printDebug "ts" ts
+  printDebug "ts" ts

  -- Taking the ngrams with 0 occurrences only (orphans)
  occs <- getOccByNgramsOnlyFast' cId lId nt ts

-  -- printDebug "occs" occs
+  printDebug "occs" occs

  let orphans = List.concat 
              $ map (\t -> case HashMap.lookup t occs of
@@ -168,28 +168,28 @@ reIndexWith cId lId nt lts = do
                       Just n  -> if n <= 1 then [t] else [ ]
                       ) ts

-  -- printDebug "orphans" orphans
+  printDebug "orphans" orphans

  -- Get all documents of the corpus
  docs <- selectDocNodes cId
-  -- printDebug "docs length" (List.length docs)
+  printDebug "docs length" (List.length docs)

  -- Checking Text documents where orphans match
  -- TODO Tests here
  let
    ngramsByDoc = map (HashMap.fromList)
                $ map (map (\(k,v) -> (SimpleNgrams (text2ngrams k), v)))
-                $  map (\doc -> List.zip
+                $ map (\doc -> List.zip
                                (termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) orphans)
                                             $ Text.unlines $ catMaybes
-                                               [ doc ^. node_hyperdata . hd_title
-                                               , doc ^. node_hyperdata . hd_abstract
+                                               [ doc ^. context_hyperdata . hd_title
+                                               , doc ^. context_hyperdata . hd_abstract
                                               ]
                                 )
-                                (List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. node_id) 1 )]])
-                        ) (map context2node docs)
+                                (List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. context_id) 1 )]])
+                        ) docs

-  -- printDebug "ngramsByDoc" ngramsByDoc
+  printDebug "ngramsByDoc" ngramsByDoc

  -- Saving the indexation in database
  _ <- mapM (saveDocNgramsWith lId) ngramsByDoc

--- a/src/Gargantext/Core/NodeStory.hs
+++ b/src/Gargantext/Core/NodeStory.hs
@@ -177,8 +177,8 @@ type NodeStoryDir = FilePath

 writeNodeStories :: NodeStoryDir -> NodeListStory -> IO ()
 writeNodeStories fp nls = do
-  done <- mapM (writeNodeStory fp) $ splitByNode nls
-  printDebug "[writeNodeStories]" done
+  _done <- mapM (writeNodeStory fp) $ splitByNode nls
+  -- printDebug "[writeNodeStories]" done
  pure ()

 writeNodeStory :: NodeStoryDir -> (NodeId, NodeListStory) -> IO ()
@@ -192,7 +192,7 @@ splitByNode (NodeStory m) =
 saverAction' :: Serialise a => NodeStoryDir -> NodeId -> a -> IO ()
 saverAction' repoDir nId a = do
  withTempFile repoDir ((cs $ show nId) <> "-tmp-repo.cbor") $ \fp h -> do
-    printDebug "[repoSaverAction]" fp
+    -- printDebug "[repoSaverAction]" fp
    DBL.hPut h $ serialise a
    hClose h
    renameFile fp (nodeStoryPath repoDir nId)

--- a/src/Gargantext/Database/Action/Flow.hs
+++ b/src/Gargantext/Database/Action/Flow.hs
@@ -319,6 +319,8 @@ saveDocNgramsWith :: ( FlowCmdM env err m)
                  -> m ()
 saveDocNgramsWith lId mapNgramsDocs' = do
  terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
+  printDebug "terms2id" terms2id
+
  let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs'

  -- new
@@ -326,7 +328,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
               $ map (first _ngramsTerms . second Map.keys)
               $ HashMap.toList mapNgramsDocs

-  -- printDebug "saveDocNgramsWith" mapCgramsId
+  printDebug "saveDocNgramsWith" mapCgramsId
  -- insertDocNgrams
  _return <- insertContextNodeNgrams2
           $ catMaybes [ ContextNodeNgrams2 <$> Just nId

--- a/src/Gargantext/Database/Admin/Trigger/ContextNodeNgrams.hs
+++ b/src/Gargantext/Database/Admin/Trigger/ContextNodeNgrams.hs
@@ -35,7 +35,7 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList)
              RETURN NEW;
            END IF;
            IF TG_OP = 'INSERT' THEN
-                INSERT INTO context_node_ngrams (context_id, node_id, ngrams_id, ngrams_type, weight)
+                INSERT INTO node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
                select n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type, count(*) from NEW as new0
                    INNER JOIN contexts n ON n.id  = new0.context_id
                    INNER JOIN nodes n2 ON n2.id = new0.node_id
@@ -43,8 +43,8 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList)
                      AND n.typename = ?   -- not mandatory
                      AND n.parent_id <> n2.id -- not mandatory
                    GROUP BY n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type
-                ON CONFLICT (context_id, node_id, ngrams_id, ngrams_type)
-                   DO UPDATE set weight = context_node_ngrams.weight + excluded.weight
+                ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
+                   DO UPDATE set weight = node_node_ngrams.weight + excluded.weight
                   ;
            END IF;


--- a/src/Gargantext/Database/Admin/Trigger/NodesContexts.hs
+++ b/src/Gargantext/Database/Admin/Trigger/NodesContexts.hs
@@ -42,7 +42,7 @@ triggerInsertCount lId = execPGSQuery query (lId, nodeTypeId NodeList)
                     , count(*)         AS weight
                FROM NEW as new1
                    INNER JOIN contexts            doc ON doc.id          = new1.context_id
-                    INNER JOIN nodes             lists ON lists.parent_id = lists.parent_id
+                    INNER JOIN nodes             lists ON lists.parent_id = new1.node_id
                    INNER JOIN context_node_ngrams cnn ON cnn.context_id  = doc.id
                    WHERE lists.id in (?, lists.id)
                      AND lists.typename = ?
@@ -76,9 +76,9 @@ triggerUpdateAdd lId = execPGSQuery query (lId, nodeTypeId NodeList)
                    , cnn.ngrams_type        AS ngrams_type
                    , count(*)               AS fix_count
                       FROM NEW as new1
-                       INNER JOIN contexts    doc         ON doc.id         = new1.context_id
-                       INNER JOIN nodes       lists       ON new1.node_id   = lists.parent_id
-                       INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id
+                       INNER JOIN contexts    doc         ON doc.id          = new1.context_id
+                       INNER JOIN nodes       lists       ON lists.parent_id = new1.node_id
+                       INNER JOIN context_node_ngrams cnn ON cnn.context_id  = doc.id
                      WHERE lists.id in (?, lists.id)  -- (masterList_id, userLists)
                        AND lists.typename = ?
                      GROUP BY node1_id, node2_id, ngrams_id, ngrams_type