[ngrams] remove materialized view, some refactorings

parent e42661a8
Pipeline #4159 passed with stages
in 74 minutes and 53 seconds
......@@ -68,8 +68,8 @@ main = do
let _secret = _gc_secretkey cfg
withDevEnv iniPath $ \env -> do
_ <- runCmdDev env addIndex
_ <- runCmdDev env refreshIndex
-- _ <- runCmdDev env addIndex
-- _ <- runCmdDev env refreshIndex
___
......@@ -77,34 +77,34 @@ main = do
___
pure ()
refreshIndex :: Cmd'' DevEnv IOException ()
refreshIndex = do
_ <- execPGSQuery [sql| REFRESH MATERIALIZED VIEW CONCURRENTLY context_node_ngrams_view; |] ()
pure ()
addIndex :: Cmd'' DevEnv IOException Int64
addIndex = do
execPGSQuery query ()
where
query = [sql|
CREATE MATERIALIZED VIEW IF NOT EXISTS context_node_ngrams_view AS
SELECT DISTINCT context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id
FROM nodes_contexts
JOIN context_node_ngrams
ON context_node_ngrams.context_id = nodes_contexts.context_id;
CREATE INDEX IF NOT EXISTS context_node_ngrams_context_id_ngrams_id_idx
ON context_node_ngrams(context_id, ngrams_id);
CREATE INDEX IF NOT EXISTS context_node_ngrams_view_context_id_idx
ON context_node_ngrams_view(context_id);
CREATE INDEX IF NOT EXISTS context_node_ngrams_view_ngrams_id_idx
ON context_node_ngrams_view(ngrams_id);
CREATE INDEX IF NOT EXISTS context_node_ngrams_view_node_id_idx
ON context_node_ngrams_view(node_id);
CREATE UNIQUE INDEX IF NOT EXISTS context_node_ngrams_view_context_ngrams_node_uniq_idx
ON context_node_ngrams_view (context_id, ngrams_id, node_id);
CREATE INDEX IF NOT EXISTS node_stories_ngrams_id_idx
ON node_stories(ngrams_id);
|]
-- refreshIndex :: Cmd'' DevEnv IOException ()
-- refreshIndex = do
-- _ <- execPGSQuery [sql| REFRESH MATERIALIZED VIEW CONCURRENTLY context_node_ngrams_view; |] ()
-- pure ()
-- addIndex :: Cmd'' DevEnv IOException Int64
-- addIndex = do
-- execPGSQuery query ()
-- where
-- query = [sql|
-- CREATE MATERIALIZED VIEW IF NOT EXISTS context_node_ngrams_view AS
-- SELECT DISTINCT context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id
-- FROM nodes_contexts
-- JOIN context_node_ngrams
-- ON context_node_ngrams.context_id = nodes_contexts.context_id;
-- CREATE INDEX IF NOT EXISTS context_node_ngrams_context_id_ngrams_id_idx
-- ON context_node_ngrams(context_id, ngrams_id);
-- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_context_id_idx
-- ON context_node_ngrams_view(context_id);
-- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_ngrams_id_idx
-- ON context_node_ngrams_view(ngrams_id);
-- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_node_id_idx
-- ON context_node_ngrams_view(node_id);
-- CREATE UNIQUE INDEX IF NOT EXISTS context_node_ngrams_view_context_ngrams_node_uniq_idx
-- ON context_node_ngrams_view (context_id, ngrams_id, node_id);
-- CREATE INDEX IF NOT EXISTS node_stories_ngrams_id_idx
-- ON node_stories(ngrams_id);
-- |]
......@@ -337,23 +337,23 @@ CREATE OR REPLACE function node_pos(int, int) returns bigint
--create index node_by_pos on nodes using btree(node_pos(id,typename));
-- Optimization for Ngrams Table View
CREATE MATERIALIZED VIEW IF NOT EXISTS context_node_ngrams_view AS
SELECT DISTINCT context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id
FROM nodes_contexts
JOIN context_node_ngrams
ON context_node_ngrams.context_id = nodes_contexts.context_id;
-- CREATE MATERIALIZED VIEW IF NOT EXISTS context_node_ngrams_view AS
-- SELECT DISTINCT context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id
-- FROM nodes_contexts
-- JOIN context_node_ngrams
-- ON context_node_ngrams.context_id = nodes_contexts.context_id;
-- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_context_id_idx
-- ON context_node_ngrams_view(context_id);
-- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_ngrams_id_idx
-- ON context_node_ngrams_view(ngrams_id);
-- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_node_id_idx
-- ON context_node_ngrams_view(node_id);
-- CREATE UNIQUE INDEX IF NOT EXISTS context_node_ngrams_view_context_ngrams_node_uniq_idx
-- ON context_node_ngrams_view (context_id, ngrams_id, node_id);
CREATE INDEX IF NOT EXISTS context_node_ngrams_context_id_ngrams_id_idx
ON context_node_ngrams(context_id, ngrams_id);
CREATE INDEX IF NOT EXISTS context_node_ngrams_view_context_id_idx
ON context_node_ngrams_view(context_id);
CREATE INDEX IF NOT EXISTS context_node_ngrams_view_ngrams_id_idx
ON context_node_ngrams_view(ngrams_id);
CREATE INDEX IF NOT EXISTS context_node_ngrams_view_node_id_idx
ON context_node_ngrams_view(node_id);
CREATE UNIQUE INDEX IF NOT EXISTS context_node_ngrams_view_context_ngrams_node_uniq_idx
ON context_node_ngrams_view (context_id, ngrams_id, node_id);
ON context_node_ngrams(context_id, ngrams_id);
CREATE INDEX IF NOT EXISTS node_stories_ngrams_id_idx
ON node_stories(ngrams_id);
-- create materialized view if not exists context_node_ngrams_view as
-- select context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id
-- from nodes_contexts
-- join context_node_ngrams
-- on context_node_ngrams.context_id = nodes_contexts.context_id;
create materialized view if not exists context_node_ngrams_view as
select context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id
from nodes_contexts
join context_node_ngrams
on context_node_ngrams.context_id = nodes_contexts.context_id;
-- create index if not exists context_node_ngrams_view_context_id_idx on context_node_ngrams_view(context_id);
-- create index if not exists context_node_ngrams_view_ngrams_id_idx on context_node_ngrams_view(ngrams_id);
-- create index if not exists context_node_ngrams_view_node_id_idx on context_node_ngrams_view(node_id);
create index if not exists context_node_ngrams_context_id_ngrams_id_idx on context_node_ngrams(context_id, ngrams_id);
create index if not exists context_node_ngrams_view_context_id_idx on context_node_ngrams_view(context_id);
create index if not exists context_node_ngrams_view_ngrams_id_idx on context_node_ngrams_view(ngrams_id);
create index if not exists context_node_ngrams_view_node_id_idx on context_node_ngrams_view(node_id);
create index if not exists node_stories_ngrams_id_idx on node_stories(ngrams_id);
......@@ -138,6 +138,7 @@ dbNodeContext context_id node_id = do
c <- lift $ getNodeContext (NodeId context_id) (NodeId node_id)
pure $ toNodeContextGQL <$> [c]
-- | Returns list of `ContextGQL` for given ngrams in given corpus id.
dbContextForNgrams
:: (CmdCommon env)
=> Int -> [Text] -> GqlM e env [ContextGQL]
......@@ -146,6 +147,7 @@ dbContextForNgrams node_id ngrams_terms = do
--lift $ printDebug "[dbContextForNgrams] contextsForNgramsTerms" contextsForNgramsTerms
pure $ toContextGQL <$> contextsForNgramsTerms
-- | Fetch ngrams matching given context in a given list id.
dbContextNgrams
:: (CmdCommon env)
=> Int -> Int -> GqlM e env [Text]
......
......@@ -138,7 +138,7 @@ filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
isMapTerm (l, maybeRoot) = case maybeRoot of
Nothing -> l == lt
Just r -> case HM.lookup r m of
Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterListWithRootHashMap, unknown key: " <> unNgramsTerm r
Just (l',_) -> l' == lt
filterListWithRoot :: [ListType]
......@@ -149,7 +149,7 @@ filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
isMapTerm (l, maybeRoot) = case maybeRoot of
Nothing -> elem l lt
Just r -> case HM.lookup r m of
Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterListWithRoot, unknown key: " <> unNgramsTerm r
Just (l',_) -> elem l' lt
groupNodesByNgrams :: ( At root_map
......
......@@ -18,7 +18,7 @@ module Gargantext.Database.Action.Metrics.NgramsByContext
-- import Debug.Trace (trace)
--import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Control.Monad (void)
-- import Control.Monad (void)
import Data.HashMap.Strict (HashMap)
import Data.Map.Strict (Map)
import Data.Set (Set)
......@@ -30,7 +30,7 @@ import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core
import Gargantext.Data.HashMap.Strict.Utils as HM
import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId, MasterCorpusId, NodeType(NodeDocument), UserCorpusId, DocId)
import Gargantext.Database.Prelude (Cmd, runPGSQuery, execPGSQuery)
import Gargantext.Database.Prelude (Cmd, runPGSQuery) -- , execPGSQuery)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM
......@@ -428,9 +428,9 @@ queryNgramsByContextMaster' = [sql|
-- - at reindex stage
-- - at the end of each text flow
refreshNgramsMaterialized :: Cmd err ()
refreshNgramsMaterialized = void $ execPGSQuery refreshNgramsMaterializedQuery ()
where
refreshNgramsMaterializedQuery :: DPS.Query
refreshNgramsMaterializedQuery =
[sql| REFRESH MATERIALIZED VIEW CONCURRENTLY context_node_ngrams_view; |]
-- refreshNgramsMaterialized :: Cmd err ()
-- refreshNgramsMaterialized = void $ execPGSQuery refreshNgramsMaterializedQuery ()
-- where
-- refreshNgramsMaterializedQuery :: DPS.Query
-- refreshNgramsMaterializedQuery =
-- [sql| REFRESH MATERIALIZED VIEW CONCURRENTLY context_node_ngrams_view; |]
......@@ -194,7 +194,10 @@ getContextsForNgramsTerms cId ngramsTerms = do
-- | Query the `context_node_ngrams` table and return ngrams for given
-- `context_id` and `list_id`.
-- WARNING: `context_node_ngrams` can be outdated.
-- WARNING: `context_node_ngrams` can be outdated. This is because it
-- is expensive to keep all ngrams matching a given context and if
-- someone adds an ngram, we need to recompute its m2m relation to all
-- existing documents.
getContextNgrams :: HasNodeError err
=> NodeId
-> NodeId
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment