Commit b0c4cf28 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[DB][Optim] Cooc without groups (wip).

parent e728a20d
Pipeline #679 failed with stage
...@@ -50,7 +50,7 @@ main = do ...@@ -50,7 +50,7 @@ main = do
initMaster = do initMaster = do
(masterUserId, masterRootId, masterCorpusId) <- getOrMk_RootWithCorpus userMaster (Left corpusMasterName) (Nothing :: Maybe HyperdataCorpus) (masterUserId, masterRootId, masterCorpusId) <- getOrMk_RootWithCorpus userMaster (Left corpusMasterName) (Nothing :: Maybe HyperdataCorpus)
masterListId <- getOrMkList masterCorpusId masterUserId masterListId <- getOrMkList masterCorpusId masterUserId
_ <- initTriggers masterListId _triggers <- initTriggers masterListId
pure (masterUserId, masterRootId, masterCorpusId, masterListId) pure (masterUserId, masterRootId, masterCorpusId, masterListId)
withDevEnv iniPath $ \env -> do withDevEnv iniPath $ \env -> do
......
...@@ -110,9 +110,9 @@ ALTER TABLE public.node_node_ngrams OWNER TO gargantua; ...@@ -110,9 +110,9 @@ ALTER TABLE public.node_node_ngrams OWNER TO gargantua;
CREATE TABLE public.node_node_ngrams2 ( CREATE TABLE public.node_node_ngrams2 (
node_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE, node_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
node_ngrams_id INTEGER NOT NULL REFERENCES public.node_ngrams (id) ON DELETE CASCADE, nodengrams_id INTEGER NOT NULL REFERENCES public.node_ngrams (id) ON DELETE CASCADE,
weight double precision, weight double precision,
PRIMARY KEY (node_id, node_ngrams_id, ngrams_field) PRIMARY KEY (node_id, nodengrams_id)
); );
ALTER TABLE public.node_node_ngrams2 OWNER TO gargantua; ALTER TABLE public.node_node_ngrams2 OWNER TO gargantua;
...@@ -164,7 +164,8 @@ CREATE UNIQUE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id, ...@@ -164,7 +164,8 @@ CREATE UNIQUE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id,
CREATE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id); CREATE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id);
CREATE INDEX ON public.node_node_ngrams USING btree (ngrams_id, node2_id); CREATE INDEX ON public.node_node_ngrams USING btree (ngrams_id, node2_id);
CREATE INDEX ON public.node_nodengrams_nodengrams USING btree (node_id, node_ngrams1_id, node_ngrams2_id);
CREATE INDEX ON public.node_node_ngrams2 USING btree (node_id, nodengrams_id);
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Ngrams Full DB Extraction Optim -- Ngrams Full DB Extraction Optim
-- TODO remove hard parameter and move elsewhere -- TODO remove hard parameter and move elsewhere
......
...@@ -37,8 +37,10 @@ module Gargantext.Database.Flow -- (flowDatabase, ngrams2list) ...@@ -37,8 +37,10 @@ module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
, flowAnnuaire , flowAnnuaire
) )
where where
import Prelude (String) import Prelude (String)
import Data.Either import Data.Either
import Data.Tuple.Extra (first, second)
import Data.Traversable (traverse) import Data.Traversable (traverse)
import Debug.Trace (trace) import Debug.Trace (trace)
import Control.Lens ((^.), view, _Just) import Control.Lens ((^.), view, _Just)
...@@ -63,7 +65,8 @@ import Gargantext.Database.Root (getRoot) ...@@ -63,7 +65,8 @@ import Gargantext.Database.Root (getRoot)
import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId) import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
import Gargantext.Database.Schema.Node -- (mkRoot, mkCorpus, getOrMkList, mkGraph, {-mkPhylo,-} mkDashboard, mkAnnuaire, getCorporaWithParentId, HasNodeError, NodeError(..), nodeError) import Gargantext.Database.Schema.Node -- (mkRoot, mkCorpus, getOrMkList, mkGraph, {-mkPhylo,-} mkDashboard, mkAnnuaire, getCorporaWithParentId, HasNodeError, NodeError(..), nodeError)
import Gargantext.Database.Schema.NodeNgrams (listInsertDb, getCgramsId)
import Gargantext.Database.Schema.NodeNodeNgrams2 -- (NodeNodeNgrams2, insertNodeNodeNgrams2)
import Gargantext.Database.Schema.User (getUser, UserLight(..)) import Gargantext.Database.Schema.User (getUser, UserLight(..))
import Gargantext.Database.TextSearch (searchInDatabase) import Gargantext.Database.TextSearch (searchInDatabase)
import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId) import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
...@@ -266,11 +269,27 @@ insertMasterDocs c lang hs = do ...@@ -266,11 +269,27 @@ insertMasterDocs c lang hs = do
maps <- mapNodeIdNgrams maps <- mapNodeIdNgrams
<$> documentIdWithNgrams (extractNgramsT $ withLang lang documentsWithId) documentsWithId <$> documentIdWithNgrams (extractNgramsT $ withLang lang documentsWithId) documentsWithId
lId <- getOrMkList masterCorpusId masterUserId
terms2id <- insertNgrams $ Map.keys maps terms2id <- insertNgrams $ Map.keys maps
-- to be removed
let indexedNgrams = Map.mapKeys (indexNgrams terms2id) maps let indexedNgrams = Map.mapKeys (indexNgrams terms2id) maps
-- new
lId <- getOrMkList masterCorpusId masterUserId
mapCgramsId <- listInsertDb lId toNodeNgramsW'
$ map (first _ngramsTerms . second Map.keys)
$ Map.toList maps
-- insertDocNgrams
_return <- insertNodeNodeNgrams2
$ catMaybes [ NodeNodeNgrams2 <$> Just nId
<*> getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms)
<*> Just (fromIntegral w :: Double)
| (terms, mapNgramsTypes) <- Map.toList maps
, (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
, (nId, w) <- Map.toList mapNodeIdWeight
]
_cooc <- mkNode NodeListCooc lId masterUserId _cooc <- mkNode NodeListCooc lId masterUserId
-- to be removed
_ <- insertDocNgrams lId indexedNgrams _ <- insertDocNgrams lId indexedNgrams
pure ids' pure ids'
......
...@@ -23,11 +23,13 @@ Portability : POSIX ...@@ -23,11 +23,13 @@ Portability : POSIX
module Gargantext.Database.Flow.List module Gargantext.Database.Flow.List
where where
import Data.Text (Text)
import Control.Monad (mapM_) import Control.Monad (mapM_)
import Data.Map (Map, toList) import Data.Map (Map, toList)
import Data.Maybe (Maybe(..), catMaybes) import Data.Maybe (Maybe(..), catMaybes)
import Gargantext.API.Ngrams (NgramsElement(..), putListNgrams) import Gargantext.API.Ngrams (NgramsElement(..), putListNgrams)
import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId) import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
import Gargantext.Core.Types.Main (ListType(CandidateTerm))
import Gargantext.Database.Schema.NodeNgrams (NodeNgramsPoly(..), NodeNgramsW, listInsertDb, getCgramsId) import Gargantext.Database.Schema.NodeNgrams (NodeNgramsPoly(..), NodeNgramsW, listInsertDb, getCgramsId)
import Gargantext.Database.Schema.Node_NodeNgramsNodeNgrams -- (insert_Node_NodeNgrams_NodeNgrams, Node_NodeNgrams_NodeNgrams(..)) import Gargantext.Database.Schema.Node_NodeNgramsNodeNgrams -- (insert_Node_NodeNgrams_NodeNgrams, Node_NodeNgrams_NodeNgrams(..))
import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId) import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
...@@ -74,16 +76,25 @@ flowList_DbRepo lId ngs = do ...@@ -74,16 +76,25 @@ flowList_DbRepo lId ngs = do
toNodeNgramsW :: ListId toNodeNgramsW :: ListId
-> [(NgramsType, [NgramsElement])] -> [(NgramsType, [NgramsElement])]
-> [NodeNgramsW] -> [NodeNgramsW]
toNodeNgramsW l ngs = List.concat $ map (toNodeNgramsW' l) ngs toNodeNgramsW l ngs = List.concat $ map (toNodeNgramsW'' l) ngs
where where
toNodeNgramsW' :: ListId toNodeNgramsW'' :: ListId
-> (NgramsType, [NgramsElement]) -> (NgramsType, [NgramsElement])
-> [NodeNgramsW] -> [NodeNgramsW]
toNodeNgramsW' l' (ngrams_type, elms) = toNodeNgramsW'' l' (ngrams_type, elms) =
[ NodeNgrams Nothing l' list_type ngrams_terms' ngrams_type Nothing Nothing Nothing 0 | [ NodeNgrams Nothing l' list_type ngrams_terms' ngrams_type Nothing Nothing Nothing 0 |
(NgramsElement ngrams_terms' _size list_type _occ _root _parent _children) <- elms (NgramsElement ngrams_terms' _size list_type _occ _root _parent _children) <- elms
] ]
toNodeNgramsW' :: ListId
-> [(Text, [NgramsType])]
-> [NodeNgramsW]
toNodeNgramsW' l'' ngs = [ NodeNgrams Nothing l'' CandidateTerm terms ngrams_type Nothing Nothing Nothing 0
| (terms, ngrams_types) <- ngs
, ngrams_type <- ngrams_types
]
listInsert :: FlowCmdM env err m listInsert :: FlowCmdM env err m
=> ListId => ListId
-> Map NgramsType [NgramsElement] -> Map NgramsType [NgramsElement]
......
...@@ -24,17 +24,19 @@ import Gargantext.Database.Utils (Cmd) ...@@ -24,17 +24,19 @@ import Gargantext.Database.Utils (Cmd)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Database.Triggers.Nodes (triggerSearchUpdate) import Gargantext.Database.Triggers.Nodes (triggerSearchUpdate)
import Gargantext.Database.Triggers.NodesNodes (triggerDeleteCount, triggerInsertCount, triggerUpdateAdd, triggerUpdateDel, MasterListId) import Gargantext.Database.Triggers.NodesNodes (triggerDeleteCount, triggerInsertCount, triggerUpdateAdd, triggerUpdateDel, MasterListId)
import Gargantext.Database.Triggers.NodeNodeNgrams (triggerCountInsert) import Gargantext.Database.Triggers.NodeNodeNgrams (triggerCountInsert, triggerCountInsert2, triggerCoocInsert)
------------------------------------------------------------------------ ------------------------------------------------------------------------
initTriggers :: MasterListId -> Cmd err [Int64] initTriggers :: MasterListId -> Cmd err [Int64]
initTriggers lId = do initTriggers lId = do
t0 <- triggerSearchUpdate t0 <- triggerSearchUpdate
t1 <- triggerCountInsert t1 <- triggerCountInsert
t1' <- triggerCountInsert2
t1'' <- triggerCoocInsert
t2 <- triggerDeleteCount lId t2 <- triggerDeleteCount lId
t3 <- triggerInsertCount lId t3 <- triggerInsertCount lId
t4 <- triggerUpdateAdd lId t4 <- triggerUpdateAdd lId
t5 <- triggerUpdateDel lId t5 <- triggerUpdateDel lId
pure [t0, t1,t2,t3,t4,t5] pure [t0,t1,t1',t1'',t2,t3,t4,t5]
...@@ -43,6 +43,8 @@ import Gargantext.Database.Utils ...@@ -43,6 +43,8 @@ import Gargantext.Database.Utils
import Gargantext.Database.Schema.Ngrams (NgramsType, ngramsTypeId, fromNgramsTypeId) import Gargantext.Database.Schema.Ngrams (NgramsType, ngramsTypeId, fromNgramsTypeId)
import Gargantext.Prelude import Gargantext.Prelude
type NodeNgramsId = Int
data NodeNgramsPoly id data NodeNgramsPoly id
node_id' node_id'
node_subtype node_subtype
......
{-|
Module : Gargantext.Database.Schema.NodeNodeNgrams
Description : TODO: remove this module and table in database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Schema.NodeNodeNgrams2
where
import Prelude
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Control.Lens.TH (makeLenses)
import Gargantext.Database.Utils (Cmd, mkCmd)
import Gargantext.Database.Schema.NodeNgrams (NodeNgramsId)
import Gargantext.Database.Schema.Node (pgNodeId)
import Gargantext.Database.Types.Node
import Opaleye
data NodeNodeNgrams2Poly node_id nodengrams_id w
= NodeNodeNgrams2 { _nnng2_node_id :: node_id
, _nnng2_nodengrams_id :: nodengrams_id
, _nnng2_weight :: w
} deriving (Show)
type NodeNodeNgrams2Write =
NodeNodeNgrams2Poly (Column PGInt4 )
(Column PGInt4 )
(Column PGFloat8)
type NodeNodeNgrams2Read =
NodeNodeNgrams2Poly (Column PGInt4 )
(Column PGInt4 )
(Column PGFloat8)
type NodeNodeNgrams2ReadNull =
NodeNodeNgrams2Poly (Column (Nullable PGInt4 ))
(Column (Nullable PGInt4 ))
(Column (Nullable PGFloat8))
type NodeNodeNgrams2 =
NodeNodeNgrams2Poly DocId NodeNgramsId Double
$(makeAdaptorAndInstance "pNodeNodeNgrams2" ''NodeNodeNgrams2Poly)
makeLenses ''NodeNodeNgrams2Poly
nodeNodeNgrams2Table :: Table NodeNodeNgrams2Write NodeNodeNgrams2Read
nodeNodeNgrams2Table = Table "node_node_ngrams2"
( pNodeNodeNgrams2 NodeNodeNgrams2
{ _nnng2_node_id = required "node_id"
, _nnng2_nodengrams_id = required "nodengrams_id"
, _nnng2_weight = required "weight"
}
)
queryNodeNodeNgrams2Table :: Query NodeNodeNgrams2Read
queryNodeNodeNgrams2Table = queryTable nodeNodeNgrams2Table
-- | Insert utils
insertNodeNodeNgrams2 :: [NodeNodeNgrams2] -> Cmd err Int
insertNodeNodeNgrams2 = insertNodeNodeNgrams2W
. map (\(NodeNodeNgrams2 n1 n2 w) ->
NodeNodeNgrams2 (pgNodeId n1)
(pgInt4 n2)
(pgDouble w)
)
insertNodeNodeNgrams2W :: [NodeNodeNgrams2Write] -> Cmd err Int
insertNodeNodeNgrams2W nnnw =
mkCmd $ \c -> fromIntegral <$> runInsert_ c insertNothing
where
insertNothing = (Insert { iTable = nodeNodeNgrams2Table
, iRows = nnnw
, iReturning = rCount
, iOnConflict = (Just DoNothing)
})
...@@ -63,4 +63,92 @@ triggerCountInsert = execPGSQuery query (nodeTypeId NodeDocument, nodeTypeId Nod ...@@ -63,4 +63,92 @@ triggerCountInsert = execPGSQuery query (nodeTypeId NodeDocument, nodeTypeId Nod
EXECUTE PROCEDURE set_ngrams_global_count(); EXECUTE PROCEDURE set_ngrams_global_count();
|] |]
triggerCountInsert2 :: Cmd err Int64
triggerCountInsert2 = execPGSQuery query (nodeTypeId NodeCorpus, nodeTypeId NodeDocument, nodeTypeId NodeList)
where
query :: DPS.Query
query = [sql|
CREATE OR REPLACE FUNCTION set_ngrams_global_count2() RETURNS trigger AS $$
BEGIN
IF pg_trigger_depth() <> 1 THEN
RETURN NEW;
END IF;
IF TG_OP = 'INSERT' THEN
INSERT INTO node_node_ngrams2 (node_id, nodengrams_id, weight)
SELECT corpus.id, nng.id, count(*) from NEW as new1
INNER JOIN node_ngrams nng ON nng.id = new1.nodengrams_id
INNER JOIN nodes list ON list.id = nng.node_id
INNER JOIN nodes_nodes nn ON nn.node2_id = new1.node_id
INNER JOIN nodes corpus ON corpus.id = nn.node1_id
INNER JOIN nodes doc ON doc.id = nn.node2_id
WHERE corpus.typename = ? -- 30 -- corpus
AND doc.typename = ? -- 4 -- maybe not mandatory
AND list.typename = ? -- 5 -- list
GROUP BY corpus.id, nng.id
ON CONFLICT (node_id, nodengrams_id)
DO UPDATE set weight = node_node_ngrams2.weight + excluded.weight
;
END IF;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_count_insert2 on node_node_ngrams2;
CREATE TRIGGER trigger_count_insert2 AFTER INSERT on node_node_ngrams2
REFERENCING NEW TABLE AS NEW
FOR EACH STATEMENT
EXECUTE PROCEDURE set_ngrams_global_count2();
|]
-- TODO add the groups
triggerCoocInsert :: Cmd err Int64
triggerCoocInsert = execPGSQuery query (nodeTypeId NodeCorpus, nodeTypeId NodeDocument, nodeTypeId NodeList)
where
query :: DPS.Query
query = [sql|
CREATE OR REPLACE FUNCTION set_cooc() RETURNS trigger AS $$
BEGIN
IF pg_trigger_depth() <> 1 THEN
RETURN NEW;
END IF;
IF TG_OP = 'INSERT' THEN
INSERT INTO node_nodengrams_nodengrams (node_id, node_ngrams1_id, node_ngrams2_id, weight)
SELECT corpus.id, nng1.id, nng2.id, count(*) from NEW as new1
INNER JOIN node_ngrams nng1 ON nng1.id = new1.nodengrams_id
INNER JOIN nodes list ON list.id = nng1.node_id
INNER JOIN nodes_nodes nn ON nn.node2_id = new1.node_id
INNER JOIN nodes corpus ON corpus.id = nn.node1_id
INNER JOIN nodes doc ON doc.id = nn.node2_id
INNER JOIN node_node_ngrams2 nnng2 ON nnng2.node_id = doc.id
INNER JOIN node_ngrams nng2 ON nng2.id = nnng2.nodengrams_id
WHERE corpus.typename = ? -- 30 -- corpus
AND doc.typename = ? -- 4 -- maybe not mandatory
AND list.typename = ? -- 5 -- list
AND nng2.node_id = list.id
-- AND nng1.id <> nng2.id
GROUP BY corpus.id, nng1.id, nng2.id
ON CONFLICT (node_id, node_ngrams1_id, node_ngrams2_id)
DO UPDATE set weight = node_nodengrams_nodengrams.weight + excluded.weight
;
END IF;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_cooc on node_node_ngrams2;
CREATE TRIGGER trigger_cooc_insert AFTER INSERT on node_node_ngrams2
REFERENCING NEW TABLE AS NEW
FOR EACH STATEMENT
EXECUTE PROCEDURE set_cooc();
|]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment