Commit e608bb05 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] Indexes for database and score counts

parent 262a4e7d
...@@ -252,6 +252,35 @@ sqlSchema = do ...@@ -252,6 +252,35 @@ sqlSchema = do
DROP TRIGGER if EXISTS trigger_insert_count ON nodes_nodes; DROP TRIGGER if EXISTS trigger_insert_count ON nodes_nodes;
-- Indexes needed to speed up the deletes
-- Trigger for constraint node_ngrams_node_id_fkey
CREATE INDEX IF NOT EXISTS node_ngrams_node_id_idx ON public.node_ngrams USING btree (node_id);
-- Trigger for constraint node_node_ngrams2_node_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams2_node_id_idx ON public.node_node_ngrams2 USING btree (node_id);
-- Trigger for constraint node_node_ngrams_node1_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams_node1_id_idx ON public.node_node_ngrams USING btree (node1_id);
-- Trigger for constraint node_node_ngrams_node2_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams_node2_id_idx ON public.node_node_ngrams USING btree (node2_id);
-- Trigger for constraint nodes_nodes_node1_id_fkey
CREATE INDEX IF NOT EXISTS nodes_nodes_node1_id_idx ON public.nodes_nodes USING btree (node1_id);
-- Trigger for constraint nodes_nodes_node2_id_fkey
CREATE INDEX IF NOT EXISTS nodes_nodes_node2_id_idx ON public.nodes_nodes USING btree (node2_id);
-- Trigger for constraint nodes_parent_id_fkey
CREATE INDEX IF NOT EXISTS nodes_parent_id_idx ON public.nodes USING btree (parent_id);
-- Trigger for constraint rights_node_id_fkey
CREATE INDEX IF NOT EXISTS rights_node_id_idx ON public.rights USING btree (node_id);
-- Trigger for constraint nodes_contexts_node_id_fkey
CREATE INDEX IF NOT EXISTS nodes_contexts_node_id_idx ON public.nodes_contexts USING btree (node_id);
-- Trigger for constraint context_node_ngrams_node_id_fkey
CREATE INDEX IF NOT EXISTS context_node_node_id_idx ON public.context_node_ngrams USING btree (node_id);
|] |]
......
#!/bin/bash #!/bin/bash
# sudo su postgres # sudo su postgres
# postgresql://$USER:$PW@localhost/$DB # postgresql://$USER:$PW@localhost/$DB
PW="C8kdcUrAQy66U" INIFILE=$1
DB="gargandb1"
USER="gargantua"
#psql -c "CREATE USER \"${USER}\"" getter () {
#psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'" grep $1 $INIFILE | sed "s/^.*= //"
}
psql -c "DROP DATABASE IF EXISTS \"${DB}\"" USER=$(getter "DB_USER")
createdb "${DB}" NAME=$(getter "DB_NAME")
#psql "${DB}" < schema.sql PASS=$(getter "DB_PASS")
HOST=$(getter "DB_HOST")
PORT=$(getter "DB_PORT")
../../bin/psql ../../gargantext.ini < gargandb.dump
psql -c "ALTER DATABASE \"${DB}\" OWNER to \"${USER}\"" #psql -c "CREATE USER \"${USER}\""
#psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'"
psql -c "DROP DATABASE IF EXISTS \"${NAME}\""
createdb "${NAME}"
psql "${NAME}" < extensions.sql
#psql "${NAME}" < schema.sql
#../../bin/psql ../../gargantext.ini < gargandb.dump
psql -c "ALTER DATABASE \"${NAME}\" OWNER to \"${USER}\""
CREATE EXTENSION IF NOT EXISTS pgcrypto;
CREATE EXTENSION IF NOT EXISTS tsm_system_rows;
...@@ -106,7 +106,7 @@ repoSnapshot repoDir = repoDir <> "/repo.cbor" ...@@ -106,7 +106,7 @@ repoSnapshot repoDir = repoDir <> "/repo.cbor"
repoSaverAction :: RepoDirFilePath -> Serialise a => a -> IO () repoSaverAction :: RepoDirFilePath -> Serialise a => a -> IO ()
repoSaverAction repoDir a = do repoSaverAction repoDir a = do
withTempFile repoDir "tmp-repo.cbor" $ \fp h -> do withTempFile repoDir "tmp-repo.cbor" $ \fp h -> do
printDebug "repoSaverAction" fp -- printDebug "repoSaverAction" fp
L.hPut h $ serialise a L.hPut h $ serialise a
hClose h hClose h
renameFile fp (repoSnapshot repoDir) renameFile fp (repoSnapshot repoDir)
......
...@@ -41,7 +41,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document ...@@ -41,7 +41,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes) import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Ngrams import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Node import Gargantext.Database.Schema.Context
import Gargantext.Database.Types (Indexed(..)) import Gargantext.Database.Types (Indexed(..))
import Gargantext.Prelude import Gargantext.Prelude
import Network.HTTP.Media ((//), (/:)) import Network.HTTP.Media ((//), (/:))
...@@ -155,12 +155,12 @@ reIndexWith cId lId nt lts = do ...@@ -155,12 +155,12 @@ reIndexWith cId lId nt lts = do
<$> HashMap.toList <$> HashMap.toList
<$> getTermsWith identity [lId] nt lts <$> getTermsWith identity [lId] nt lts
-- printDebug "ts" ts printDebug "ts" ts
-- Taking the ngrams with 0 occurrences only (orphans) -- Taking the ngrams with 0 occurrences only (orphans)
occs <- getOccByNgramsOnlyFast' cId lId nt ts occs <- getOccByNgramsOnlyFast' cId lId nt ts
-- printDebug "occs" occs printDebug "occs" occs
let orphans = List.concat let orphans = List.concat
$ map (\t -> case HashMap.lookup t occs of $ map (\t -> case HashMap.lookup t occs of
...@@ -168,28 +168,28 @@ reIndexWith cId lId nt lts = do ...@@ -168,28 +168,28 @@ reIndexWith cId lId nt lts = do
Just n -> if n <= 1 then [t] else [ ] Just n -> if n <= 1 then [t] else [ ]
) ts ) ts
-- printDebug "orphans" orphans printDebug "orphans" orphans
-- Get all documents of the corpus -- Get all documents of the corpus
docs <- selectDocNodes cId docs <- selectDocNodes cId
-- printDebug "docs length" (List.length docs) printDebug "docs length" (List.length docs)
-- Checking Text documents where orphans match -- Checking Text documents where orphans match
-- TODO Tests here -- TODO Tests here
let let
ngramsByDoc = map (HashMap.fromList) ngramsByDoc = map (HashMap.fromList)
$ map (map (\(k,v) -> (SimpleNgrams (text2ngrams k), v))) $ map (map (\(k,v) -> (SimpleNgrams (text2ngrams k), v)))
$ map (\doc -> List.zip $ map (\doc -> List.zip
(termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) orphans) (termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) orphans)
$ Text.unlines $ catMaybes $ Text.unlines $ catMaybes
[ doc ^. node_hyperdata . hd_title [ doc ^. context_hyperdata . hd_title
, doc ^. node_hyperdata . hd_abstract , doc ^. context_hyperdata . hd_abstract
] ]
) )
(List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. node_id) 1 )]]) (List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. context_id) 1 )]])
) (map context2node docs) ) docs
-- printDebug "ngramsByDoc" ngramsByDoc printDebug "ngramsByDoc" ngramsByDoc
-- Saving the indexation in database -- Saving the indexation in database
_ <- mapM (saveDocNgramsWith lId) ngramsByDoc _ <- mapM (saveDocNgramsWith lId) ngramsByDoc
......
...@@ -177,8 +177,8 @@ type NodeStoryDir = FilePath ...@@ -177,8 +177,8 @@ type NodeStoryDir = FilePath
writeNodeStories :: NodeStoryDir -> NodeListStory -> IO () writeNodeStories :: NodeStoryDir -> NodeListStory -> IO ()
writeNodeStories fp nls = do writeNodeStories fp nls = do
done <- mapM (writeNodeStory fp) $ splitByNode nls _done <- mapM (writeNodeStory fp) $ splitByNode nls
printDebug "[writeNodeStories]" done -- printDebug "[writeNodeStories]" done
pure () pure ()
writeNodeStory :: NodeStoryDir -> (NodeId, NodeListStory) -> IO () writeNodeStory :: NodeStoryDir -> (NodeId, NodeListStory) -> IO ()
...@@ -192,7 +192,7 @@ splitByNode (NodeStory m) = ...@@ -192,7 +192,7 @@ splitByNode (NodeStory m) =
saverAction' :: Serialise a => NodeStoryDir -> NodeId -> a -> IO () saverAction' :: Serialise a => NodeStoryDir -> NodeId -> a -> IO ()
saverAction' repoDir nId a = do saverAction' repoDir nId a = do
withTempFile repoDir ((cs $ show nId) <> "-tmp-repo.cbor") $ \fp h -> do withTempFile repoDir ((cs $ show nId) <> "-tmp-repo.cbor") $ \fp h -> do
printDebug "[repoSaverAction]" fp -- printDebug "[repoSaverAction]" fp
DBL.hPut h $ serialise a DBL.hPut h $ serialise a
hClose h hClose h
renameFile fp (nodeStoryPath repoDir nId) renameFile fp (nodeStoryPath repoDir nId)
......
...@@ -319,6 +319,8 @@ saveDocNgramsWith :: ( FlowCmdM env err m) ...@@ -319,6 +319,8 @@ saveDocNgramsWith :: ( FlowCmdM env err m)
-> m () -> m ()
saveDocNgramsWith lId mapNgramsDocs' = do saveDocNgramsWith lId mapNgramsDocs' = do
terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs' terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
printDebug "terms2id" terms2id
let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs' let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs'
-- new -- new
...@@ -326,7 +328,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do ...@@ -326,7 +328,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
$ map (first _ngramsTerms . second Map.keys) $ map (first _ngramsTerms . second Map.keys)
$ HashMap.toList mapNgramsDocs $ HashMap.toList mapNgramsDocs
-- printDebug "saveDocNgramsWith" mapCgramsId printDebug "saveDocNgramsWith" mapCgramsId
-- insertDocNgrams -- insertDocNgrams
_return <- insertContextNodeNgrams2 _return <- insertContextNodeNgrams2
$ catMaybes [ ContextNodeNgrams2 <$> Just nId $ catMaybes [ ContextNodeNgrams2 <$> Just nId
......
...@@ -35,7 +35,7 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList) ...@@ -35,7 +35,7 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList)
RETURN NEW; RETURN NEW;
END IF; END IF;
IF TG_OP = 'INSERT' THEN IF TG_OP = 'INSERT' THEN
INSERT INTO context_node_ngrams (context_id, node_id, ngrams_id, ngrams_type, weight) INSERT INTO node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
select n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type, count(*) from NEW as new0 select n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type, count(*) from NEW as new0
INNER JOIN contexts n ON n.id = new0.context_id INNER JOIN contexts n ON n.id = new0.context_id
INNER JOIN nodes n2 ON n2.id = new0.node_id INNER JOIN nodes n2 ON n2.id = new0.node_id
...@@ -43,8 +43,8 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList) ...@@ -43,8 +43,8 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList)
AND n.typename = ? -- not mandatory AND n.typename = ? -- not mandatory
AND n.parent_id <> n2.id -- not mandatory AND n.parent_id <> n2.id -- not mandatory
GROUP BY n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type GROUP BY n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type
ON CONFLICT (context_id, node_id, ngrams_id, ngrams_type) ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
DO UPDATE set weight = context_node_ngrams.weight + excluded.weight DO UPDATE set weight = node_node_ngrams.weight + excluded.weight
; ;
END IF; END IF;
......
...@@ -42,7 +42,7 @@ triggerInsertCount lId = execPGSQuery query (lId, nodeTypeId NodeList) ...@@ -42,7 +42,7 @@ triggerInsertCount lId = execPGSQuery query (lId, nodeTypeId NodeList)
, count(*) AS weight , count(*) AS weight
FROM NEW as new1 FROM NEW as new1
INNER JOIN contexts doc ON doc.id = new1.context_id INNER JOIN contexts doc ON doc.id = new1.context_id
INNER JOIN nodes lists ON lists.parent_id = lists.parent_id INNER JOIN nodes lists ON lists.parent_id = new1.node_id
INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id
WHERE lists.id in (?, lists.id) WHERE lists.id in (?, lists.id)
AND lists.typename = ? AND lists.typename = ?
...@@ -76,9 +76,9 @@ triggerUpdateAdd lId = execPGSQuery query (lId, nodeTypeId NodeList) ...@@ -76,9 +76,9 @@ triggerUpdateAdd lId = execPGSQuery query (lId, nodeTypeId NodeList)
, cnn.ngrams_type AS ngrams_type , cnn.ngrams_type AS ngrams_type
, count(*) AS fix_count , count(*) AS fix_count
FROM NEW as new1 FROM NEW as new1
INNER JOIN contexts doc ON doc.id = new1.context_id INNER JOIN contexts doc ON doc.id = new1.context_id
INNER JOIN nodes lists ON new1.node_id = lists.parent_id INNER JOIN nodes lists ON lists.parent_id = new1.node_id
INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id
WHERE lists.id in (?, lists.id) -- (masterList_id, userLists) WHERE lists.id in (?, lists.id) -- (masterList_id, userLists)
AND lists.typename = ? AND lists.typename = ?
GROUP BY node1_id, node2_id, ngrams_id, ngrams_type GROUP BY node1_id, node2_id, ngrams_id, ngrams_type
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment