Commit e608bb05 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] Indexes for database and score counts

parent 262a4e7d
......@@ -252,6 +252,35 @@ sqlSchema = do
DROP TRIGGER if EXISTS trigger_insert_count ON nodes_nodes;
-- Indexes needed to speed up the deletes
-- Trigger for constraint node_ngrams_node_id_fkey
CREATE INDEX IF NOT EXISTS node_ngrams_node_id_idx ON public.node_ngrams USING btree (node_id);
-- Trigger for constraint node_node_ngrams2_node_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams2_node_id_idx ON public.node_node_ngrams2 USING btree (node_id);
-- Trigger for constraint node_node_ngrams_node1_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams_node1_id_idx ON public.node_node_ngrams USING btree (node1_id);
-- Trigger for constraint node_node_ngrams_node2_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams_node2_id_idx ON public.node_node_ngrams USING btree (node2_id);
-- Trigger for constraint nodes_nodes_node1_id_fkey
CREATE INDEX IF NOT EXISTS nodes_nodes_node1_id_idx ON public.nodes_nodes USING btree (node1_id);
-- Trigger for constraint nodes_nodes_node2_id_fkey
CREATE INDEX IF NOT EXISTS nodes_nodes_node2_id_idx ON public.nodes_nodes USING btree (node2_id);
-- Trigger for constraint nodes_parent_id_fkey
CREATE INDEX IF NOT EXISTS nodes_parent_id_idx ON public.nodes USING btree (parent_id);
-- Trigger for constraint rights_node_id_fkey
CREATE INDEX IF NOT EXISTS rights_node_id_idx ON public.rights USING btree (node_id);
-- Trigger for constraint nodes_contexts_node_id_fkey
CREATE INDEX IF NOT EXISTS nodes_contexts_node_id_idx ON public.nodes_contexts USING btree (node_id);
-- Trigger for constraint context_node_ngrams_node_id_fkey
CREATE INDEX IF NOT EXISTS context_node_node_id_idx ON public.context_node_ngrams USING btree (node_id);
|]
......
#!/bin/bash
# sudo su postgres
# postgresql://$USER:$PW@localhost/$DB
PW="C8kdcUrAQy66U"
DB="gargandb1"
USER="gargantua"
INIFILE=$1
#psql -c "CREATE USER \"${USER}\""
#psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'"
getter () {
grep $1 $INIFILE | sed "s/^.*= //"
}
psql -c "DROP DATABASE IF EXISTS \"${DB}\""
createdb "${DB}"
#psql "${DB}" < schema.sql
USER=$(getter "DB_USER")
NAME=$(getter "DB_NAME")
PASS=$(getter "DB_PASS")
HOST=$(getter "DB_HOST")
PORT=$(getter "DB_PORT")
../../bin/psql ../../gargantext.ini < gargandb.dump
psql -c "ALTER DATABASE \"${DB}\" OWNER to \"${USER}\""
#psql -c "CREATE USER \"${USER}\""
#psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'"
psql -c "DROP DATABASE IF EXISTS \"${NAME}\""
createdb "${NAME}"
psql "${NAME}" < extensions.sql
#psql "${NAME}" < schema.sql
#../../bin/psql ../../gargantext.ini < gargandb.dump
psql -c "ALTER DATABASE \"${NAME}\" OWNER to \"${USER}\""
CREATE EXTENSION IF NOT EXISTS pgcrypto;
CREATE EXTENSION IF NOT EXISTS tsm_system_rows;
......@@ -106,7 +106,7 @@ repoSnapshot repoDir = repoDir <> "/repo.cbor"
repoSaverAction :: RepoDirFilePath -> Serialise a => a -> IO ()
repoSaverAction repoDir a = do
withTempFile repoDir "tmp-repo.cbor" $ \fp h -> do
printDebug "repoSaverAction" fp
-- printDebug "repoSaverAction" fp
L.hPut h $ serialise a
hClose h
renameFile fp (repoSnapshot repoDir)
......
......@@ -41,7 +41,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Context
import Gargantext.Database.Types (Indexed(..))
import Gargantext.Prelude
import Network.HTTP.Media ((//), (/:))
......@@ -155,12 +155,12 @@ reIndexWith cId lId nt lts = do
<$> HashMap.toList
<$> getTermsWith identity [lId] nt lts
-- printDebug "ts" ts
printDebug "ts" ts
-- Taking the ngrams with 0 occurrences only (orphans)
occs <- getOccByNgramsOnlyFast' cId lId nt ts
-- printDebug "occs" occs
printDebug "occs" occs
let orphans = List.concat
$ map (\t -> case HashMap.lookup t occs of
......@@ -168,28 +168,28 @@ reIndexWith cId lId nt lts = do
Just n -> if n <= 1 then [t] else [ ]
) ts
-- printDebug "orphans" orphans
printDebug "orphans" orphans
-- Get all documents of the corpus
docs <- selectDocNodes cId
-- printDebug "docs length" (List.length docs)
printDebug "docs length" (List.length docs)
-- Checking Text documents where orphans match
-- TODO Tests here
let
ngramsByDoc = map (HashMap.fromList)
$ map (map (\(k,v) -> (SimpleNgrams (text2ngrams k), v)))
$ map (\doc -> List.zip
$ map (\doc -> List.zip
(termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) orphans)
$ Text.unlines $ catMaybes
[ doc ^. node_hyperdata . hd_title
, doc ^. node_hyperdata . hd_abstract
[ doc ^. context_hyperdata . hd_title
, doc ^. context_hyperdata . hd_abstract
]
)
(List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. node_id) 1 )]])
) (map context2node docs)
(List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. context_id) 1 )]])
) docs
-- printDebug "ngramsByDoc" ngramsByDoc
printDebug "ngramsByDoc" ngramsByDoc
-- Saving the indexation in database
_ <- mapM (saveDocNgramsWith lId) ngramsByDoc
......
......@@ -177,8 +177,8 @@ type NodeStoryDir = FilePath
writeNodeStories :: NodeStoryDir -> NodeListStory -> IO ()
writeNodeStories fp nls = do
done <- mapM (writeNodeStory fp) $ splitByNode nls
printDebug "[writeNodeStories]" done
_done <- mapM (writeNodeStory fp) $ splitByNode nls
-- printDebug "[writeNodeStories]" done
pure ()
writeNodeStory :: NodeStoryDir -> (NodeId, NodeListStory) -> IO ()
......@@ -192,7 +192,7 @@ splitByNode (NodeStory m) =
saverAction' :: Serialise a => NodeStoryDir -> NodeId -> a -> IO ()
saverAction' repoDir nId a = do
withTempFile repoDir ((cs $ show nId) <> "-tmp-repo.cbor") $ \fp h -> do
printDebug "[repoSaverAction]" fp
-- printDebug "[repoSaverAction]" fp
DBL.hPut h $ serialise a
hClose h
renameFile fp (nodeStoryPath repoDir nId)
......
......@@ -319,6 +319,8 @@ saveDocNgramsWith :: ( FlowCmdM env err m)
-> m ()
saveDocNgramsWith lId mapNgramsDocs' = do
terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
printDebug "terms2id" terms2id
let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs'
-- new
......@@ -326,7 +328,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
$ map (first _ngramsTerms . second Map.keys)
$ HashMap.toList mapNgramsDocs
-- printDebug "saveDocNgramsWith" mapCgramsId
printDebug "saveDocNgramsWith" mapCgramsId
-- insertDocNgrams
_return <- insertContextNodeNgrams2
$ catMaybes [ ContextNodeNgrams2 <$> Just nId
......
......@@ -35,7 +35,7 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList)
RETURN NEW;
END IF;
IF TG_OP = 'INSERT' THEN
INSERT INTO context_node_ngrams (context_id, node_id, ngrams_id, ngrams_type, weight)
INSERT INTO node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
select n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type, count(*) from NEW as new0
INNER JOIN contexts n ON n.id = new0.context_id
INNER JOIN nodes n2 ON n2.id = new0.node_id
......@@ -43,8 +43,8 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList)
AND n.typename = ? -- not mandatory
AND n.parent_id <> n2.id -- not mandatory
GROUP BY n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type
ON CONFLICT (context_id, node_id, ngrams_id, ngrams_type)
DO UPDATE set weight = context_node_ngrams.weight + excluded.weight
ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
DO UPDATE set weight = node_node_ngrams.weight + excluded.weight
;
END IF;
......
......@@ -42,7 +42,7 @@ triggerInsertCount lId = execPGSQuery query (lId, nodeTypeId NodeList)
, count(*) AS weight
FROM NEW as new1
INNER JOIN contexts doc ON doc.id = new1.context_id
INNER JOIN nodes lists ON lists.parent_id = lists.parent_id
INNER JOIN nodes lists ON lists.parent_id = new1.node_id
INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id
WHERE lists.id in (?, lists.id)
AND lists.typename = ?
......@@ -76,9 +76,9 @@ triggerUpdateAdd lId = execPGSQuery query (lId, nodeTypeId NodeList)
, cnn.ngrams_type AS ngrams_type
, count(*) AS fix_count
FROM NEW as new1
INNER JOIN contexts doc ON doc.id = new1.context_id
INNER JOIN nodes lists ON new1.node_id = lists.parent_id
INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id
INNER JOIN contexts doc ON doc.id = new1.context_id
INNER JOIN nodes lists ON lists.parent_id = new1.node_id
INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id
WHERE lists.id in (?, lists.id) -- (masterList_id, userLists)
AND lists.typename = ?
GROUP BY node1_id, node2_id, ngrams_id, ngrams_type
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment