[ngrams] remove materialized view, some refactorings

parent e42661a8
Pipeline #4159 passed with stages
in 74 minutes and 53 seconds
...@@ -68,8 +68,8 @@ main = do ...@@ -68,8 +68,8 @@ main = do
let _secret = _gc_secretkey cfg let _secret = _gc_secretkey cfg
withDevEnv iniPath $ \env -> do withDevEnv iniPath $ \env -> do
_ <- runCmdDev env addIndex -- _ <- runCmdDev env addIndex
_ <- runCmdDev env refreshIndex -- _ <- runCmdDev env refreshIndex
___ ___
...@@ -77,34 +77,34 @@ main = do ...@@ -77,34 +77,34 @@ main = do
___ ___
pure () pure ()
refreshIndex :: Cmd'' DevEnv IOException () -- refreshIndex :: Cmd'' DevEnv IOException ()
refreshIndex = do -- refreshIndex = do
_ <- execPGSQuery [sql| REFRESH MATERIALIZED VIEW CONCURRENTLY context_node_ngrams_view; |] () -- _ <- execPGSQuery [sql| REFRESH MATERIALIZED VIEW CONCURRENTLY context_node_ngrams_view; |] ()
pure () -- pure ()
addIndex :: Cmd'' DevEnv IOException Int64 -- addIndex :: Cmd'' DevEnv IOException Int64
addIndex = do -- addIndex = do
execPGSQuery query () -- execPGSQuery query ()
where -- where
query = [sql| -- query = [sql|
CREATE MATERIALIZED VIEW IF NOT EXISTS context_node_ngrams_view AS -- CREATE MATERIALIZED VIEW IF NOT EXISTS context_node_ngrams_view AS
SELECT DISTINCT context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id -- SELECT DISTINCT context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id
FROM nodes_contexts -- FROM nodes_contexts
JOIN context_node_ngrams -- JOIN context_node_ngrams
ON context_node_ngrams.context_id = nodes_contexts.context_id; -- ON context_node_ngrams.context_id = nodes_contexts.context_id;
CREATE INDEX IF NOT EXISTS context_node_ngrams_context_id_ngrams_id_idx -- CREATE INDEX IF NOT EXISTS context_node_ngrams_context_id_ngrams_id_idx
ON context_node_ngrams(context_id, ngrams_id); -- ON context_node_ngrams(context_id, ngrams_id);
CREATE INDEX IF NOT EXISTS context_node_ngrams_view_context_id_idx -- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_context_id_idx
ON context_node_ngrams_view(context_id); -- ON context_node_ngrams_view(context_id);
CREATE INDEX IF NOT EXISTS context_node_ngrams_view_ngrams_id_idx -- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_ngrams_id_idx
ON context_node_ngrams_view(ngrams_id); -- ON context_node_ngrams_view(ngrams_id);
CREATE INDEX IF NOT EXISTS context_node_ngrams_view_node_id_idx -- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_node_id_idx
ON context_node_ngrams_view(node_id); -- ON context_node_ngrams_view(node_id);
CREATE UNIQUE INDEX IF NOT EXISTS context_node_ngrams_view_context_ngrams_node_uniq_idx -- CREATE UNIQUE INDEX IF NOT EXISTS context_node_ngrams_view_context_ngrams_node_uniq_idx
ON context_node_ngrams_view (context_id, ngrams_id, node_id); -- ON context_node_ngrams_view (context_id, ngrams_id, node_id);
CREATE INDEX IF NOT EXISTS node_stories_ngrams_id_idx -- CREATE INDEX IF NOT EXISTS node_stories_ngrams_id_idx
ON node_stories(ngrams_id); -- ON node_stories(ngrams_id);
|] -- |]
...@@ -337,23 +337,23 @@ CREATE OR REPLACE function node_pos(int, int) returns bigint ...@@ -337,23 +337,23 @@ CREATE OR REPLACE function node_pos(int, int) returns bigint
--create index node_by_pos on nodes using btree(node_pos(id,typename)); --create index node_by_pos on nodes using btree(node_pos(id,typename));
-- Optimization for Ngrams Table View -- Optimization for Ngrams Table View
CREATE MATERIALIZED VIEW IF NOT EXISTS context_node_ngrams_view AS -- CREATE MATERIALIZED VIEW IF NOT EXISTS context_node_ngrams_view AS
SELECT DISTINCT context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id -- SELECT DISTINCT context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id
FROM nodes_contexts -- FROM nodes_contexts
JOIN context_node_ngrams -- JOIN context_node_ngrams
ON context_node_ngrams.context_id = nodes_contexts.context_id; -- ON context_node_ngrams.context_id = nodes_contexts.context_id;
-- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_context_id_idx
-- ON context_node_ngrams_view(context_id);
-- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_ngrams_id_idx
-- ON context_node_ngrams_view(ngrams_id);
-- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_node_id_idx
-- ON context_node_ngrams_view(node_id);
-- CREATE UNIQUE INDEX IF NOT EXISTS context_node_ngrams_view_context_ngrams_node_uniq_idx
-- ON context_node_ngrams_view (context_id, ngrams_id, node_id);
CREATE INDEX IF NOT EXISTS context_node_ngrams_context_id_ngrams_id_idx CREATE INDEX IF NOT EXISTS context_node_ngrams_context_id_ngrams_id_idx
ON context_node_ngrams(context_id, ngrams_id); ON context_node_ngrams(context_id, ngrams_id);
CREATE INDEX IF NOT EXISTS context_node_ngrams_view_context_id_idx
ON context_node_ngrams_view(context_id);
CREATE INDEX IF NOT EXISTS context_node_ngrams_view_ngrams_id_idx
ON context_node_ngrams_view(ngrams_id);
CREATE INDEX IF NOT EXISTS context_node_ngrams_view_node_id_idx
ON context_node_ngrams_view(node_id);
CREATE UNIQUE INDEX IF NOT EXISTS context_node_ngrams_view_context_ngrams_node_uniq_idx
ON context_node_ngrams_view (context_id, ngrams_id, node_id);
CREATE INDEX IF NOT EXISTS node_stories_ngrams_id_idx CREATE INDEX IF NOT EXISTS node_stories_ngrams_id_idx
ON node_stories(ngrams_id); ON node_stories(ngrams_id);
-- create materialized view if not exists context_node_ngrams_view as
-- select context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id
-- from nodes_contexts
-- join context_node_ngrams
-- on context_node_ngrams.context_id = nodes_contexts.context_id;
-- create index if not exists context_node_ngrams_view_context_id_idx on context_node_ngrams_view(context_id);
create materialized view if not exists context_node_ngrams_view as -- create index if not exists context_node_ngrams_view_ngrams_id_idx on context_node_ngrams_view(ngrams_id);
select context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id -- create index if not exists context_node_ngrams_view_node_id_idx on context_node_ngrams_view(node_id);
from nodes_contexts
join context_node_ngrams
on context_node_ngrams.context_id = nodes_contexts.context_id;
create index if not exists context_node_ngrams_context_id_ngrams_id_idx on context_node_ngrams(context_id, ngrams_id); create index if not exists context_node_ngrams_context_id_ngrams_id_idx on context_node_ngrams(context_id, ngrams_id);
create index if not exists context_node_ngrams_view_context_id_idx on context_node_ngrams_view(context_id);
create index if not exists context_node_ngrams_view_ngrams_id_idx on context_node_ngrams_view(ngrams_id);
create index if not exists context_node_ngrams_view_node_id_idx on context_node_ngrams_view(node_id);
create index if not exists node_stories_ngrams_id_idx on node_stories(ngrams_id); create index if not exists node_stories_ngrams_id_idx on node_stories(ngrams_id);
...@@ -138,6 +138,7 @@ dbNodeContext context_id node_id = do ...@@ -138,6 +138,7 @@ dbNodeContext context_id node_id = do
c <- lift $ getNodeContext (NodeId context_id) (NodeId node_id) c <- lift $ getNodeContext (NodeId context_id) (NodeId node_id)
pure $ toNodeContextGQL <$> [c] pure $ toNodeContextGQL <$> [c]
-- | Returns list of `ContextGQL` for given ngrams in given corpus id.
dbContextForNgrams dbContextForNgrams
:: (CmdCommon env) :: (CmdCommon env)
=> Int -> [Text] -> GqlM e env [ContextGQL] => Int -> [Text] -> GqlM e env [ContextGQL]
...@@ -146,6 +147,7 @@ dbContextForNgrams node_id ngrams_terms = do ...@@ -146,6 +147,7 @@ dbContextForNgrams node_id ngrams_terms = do
--lift $ printDebug "[dbContextForNgrams] contextsForNgramsTerms" contextsForNgramsTerms --lift $ printDebug "[dbContextForNgrams] contextsForNgramsTerms" contextsForNgramsTerms
pure $ toContextGQL <$> contextsForNgramsTerms pure $ toContextGQL <$> contextsForNgramsTerms
-- | Fetch ngrams matching given context in a given list id.
dbContextNgrams dbContextNgrams
:: (CmdCommon env) :: (CmdCommon env)
=> Int -> Int -> GqlM e env [Text] => Int -> Int -> GqlM e env [Text]
......
...@@ -138,7 +138,7 @@ filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m ...@@ -138,7 +138,7 @@ filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
isMapTerm (l, maybeRoot) = case maybeRoot of isMapTerm (l, maybeRoot) = case maybeRoot of
Nothing -> l == lt Nothing -> l == lt
Just r -> case HM.lookup r m of Just r -> case HM.lookup r m of
Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterListWithRootHashMap, unknown key: " <> unNgramsTerm r
Just (l',_) -> l' == lt Just (l',_) -> l' == lt
filterListWithRoot :: [ListType] filterListWithRoot :: [ListType]
...@@ -149,7 +149,7 @@ filterListWithRoot lt m = snd <$> HM.filter isMapTerm m ...@@ -149,7 +149,7 @@ filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
isMapTerm (l, maybeRoot) = case maybeRoot of isMapTerm (l, maybeRoot) = case maybeRoot of
Nothing -> elem l lt Nothing -> elem l lt
Just r -> case HM.lookup r m of Just r -> case HM.lookup r m of
Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterListWithRoot, unknown key: " <> unNgramsTerm r
Just (l',_) -> elem l' lt Just (l',_) -> elem l' lt
groupNodesByNgrams :: ( At root_map groupNodesByNgrams :: ( At root_map
......
...@@ -18,7 +18,7 @@ module Gargantext.Database.Action.Metrics.NgramsByContext ...@@ -18,7 +18,7 @@ module Gargantext.Database.Action.Metrics.NgramsByContext
-- import Debug.Trace (trace) -- import Debug.Trace (trace)
--import Data.Map.Strict.Patch (PatchMap, Replace, diff) --import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Control.Monad (void) -- import Control.Monad (void)
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Map.Strict (Map) import Data.Map.Strict (Map)
import Data.Set (Set) import Data.Set (Set)
...@@ -30,7 +30,7 @@ import Gargantext.API.Ngrams.Types (NgramsTerm(..)) ...@@ -30,7 +30,7 @@ import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core import Gargantext.Core
import Gargantext.Data.HashMap.Strict.Utils as HM import Gargantext.Data.HashMap.Strict.Utils as HM
import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId, MasterCorpusId, NodeType(NodeDocument), UserCorpusId, DocId) import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId, MasterCorpusId, NodeType(NodeDocument), UserCorpusId, DocId)
import Gargantext.Database.Prelude (Cmd, runPGSQuery, execPGSQuery) import Gargantext.Database.Prelude (Cmd, runPGSQuery) -- , execPGSQuery)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..)) import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM import qualified Data.HashMap.Strict as HM
...@@ -428,9 +428,9 @@ queryNgramsByContextMaster' = [sql| ...@@ -428,9 +428,9 @@ queryNgramsByContextMaster' = [sql|
-- - at reindex stage -- - at reindex stage
-- - at the end of each text flow -- - at the end of each text flow
refreshNgramsMaterialized :: Cmd err () -- refreshNgramsMaterialized :: Cmd err ()
refreshNgramsMaterialized = void $ execPGSQuery refreshNgramsMaterializedQuery () -- refreshNgramsMaterialized = void $ execPGSQuery refreshNgramsMaterializedQuery ()
where -- where
refreshNgramsMaterializedQuery :: DPS.Query -- refreshNgramsMaterializedQuery :: DPS.Query
refreshNgramsMaterializedQuery = -- refreshNgramsMaterializedQuery =
[sql| REFRESH MATERIALIZED VIEW CONCURRENTLY context_node_ngrams_view; |] -- [sql| REFRESH MATERIALIZED VIEW CONCURRENTLY context_node_ngrams_view; |]
...@@ -194,7 +194,10 @@ getContextsForNgramsTerms cId ngramsTerms = do ...@@ -194,7 +194,10 @@ getContextsForNgramsTerms cId ngramsTerms = do
-- | Query the `context_node_ngrams` table and return ngrams for given -- | Query the `context_node_ngrams` table and return ngrams for given
-- `context_id` and `list_id`. -- `context_id` and `list_id`.
-- WARNING: `context_node_ngrams` can be outdated. -- WARNING: `context_node_ngrams` can be outdated. This is because it
-- is expensive to keep all ngrams matching a given context and if
-- someone adds an ngram, we need to recompute its m2m relation to all
-- existing documents.
getContextNgrams :: HasNodeError err getContextNgrams :: HasNodeError err
=> NodeId => NodeId
-> NodeId -> NodeId
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment