Commit b37bd924 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[DB][Optim] Cooc without groups (wip).

parent f9c86cac
......@@ -50,7 +50,7 @@ main = do
initMaster = do
(masterUserId, masterRootId, masterCorpusId) <- getOrMk_RootWithCorpus userMaster (Left corpusMasterName) (Nothing :: Maybe HyperdataCorpus)
masterListId <- getOrMkList masterCorpusId masterUserId
_ <- initTriggers masterListId
_triggers <- initTriggers masterListId
pure (masterUserId, masterRootId, masterCorpusId, masterListId)
withDevEnv iniPath $ \env -> do
......
......@@ -110,9 +110,9 @@ ALTER TABLE public.node_node_ngrams OWNER TO gargantua;
CREATE TABLE public.node_node_ngrams2 (
node_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
node_ngrams_id INTEGER NOT NULL REFERENCES public.node_ngrams (id) ON DELETE CASCADE,
nodengrams_id INTEGER NOT NULL REFERENCES public.node_ngrams (id) ON DELETE CASCADE,
weight double precision,
PRIMARY KEY (node_id, node_ngrams_id, ngrams_field)
PRIMARY KEY (node_id, nodengrams_id)
);
ALTER TABLE public.node_node_ngrams2 OWNER TO gargantua;
......@@ -164,7 +164,8 @@ CREATE UNIQUE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id,
CREATE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id);
CREATE INDEX ON public.node_node_ngrams USING btree (ngrams_id, node2_id);
CREATE INDEX ON public.node_nodengrams_nodengrams USING btree (node_id, node_ngrams1_id, node_ngrams2_id);
CREATE INDEX ON public.node_node_ngrams2 USING btree (node_id, nodengrams_id);
------------------------------------------------------------------------
-- Ngrams Full DB Extraction Optim
-- TODO remove hard parameter and move elsewhere
......
......@@ -37,8 +37,10 @@ module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
, flowAnnuaire
)
where
import Prelude (String)
import Data.Either
import Data.Tuple.Extra (first, second)
import Data.Traversable (traverse)
import Debug.Trace (trace)
import Control.Lens ((^.), view, _Just)
......@@ -63,7 +65,8 @@ import Gargantext.Database.Root (getRoot)
import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
import Gargantext.Database.Schema.Node -- (mkRoot, mkCorpus, getOrMkList, mkGraph, {-mkPhylo,-} mkDashboard, mkAnnuaire, getCorporaWithParentId, HasNodeError, NodeError(..), nodeError)
import Gargantext.Database.Schema.NodeNgrams (listInsertDb, getCgramsId)
import Gargantext.Database.Schema.NodeNodeNgrams2 -- (NodeNodeNgrams2, insertNodeNodeNgrams2)
import Gargantext.Database.Schema.User (getUser, UserLight(..))
import Gargantext.Database.TextSearch (searchInDatabase)
import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
......@@ -266,11 +269,27 @@ insertMasterDocs c lang hs = do
maps <- mapNodeIdNgrams
<$> documentIdWithNgrams (extractNgramsT $ withLang lang documentsWithId) documentsWithId
lId <- getOrMkList masterCorpusId masterUserId
terms2id <- insertNgrams $ Map.keys maps
-- to be removed
let indexedNgrams = Map.mapKeys (indexNgrams terms2id) maps
-- new
lId <- getOrMkList masterCorpusId masterUserId
mapCgramsId <- listInsertDb lId toNodeNgramsW'
$ map (first _ngramsTerms . second Map.keys)
$ Map.toList maps
-- insertDocNgrams
_return <- insertNodeNodeNgrams2
$ catMaybes [ NodeNodeNgrams2 <$> Just nId
<*> getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms)
<*> Just (fromIntegral w :: Double)
| (terms, mapNgramsTypes) <- Map.toList maps
, (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
, (nId, w) <- Map.toList mapNodeIdWeight
]
_cooc <- mkNode NodeListCooc lId masterUserId
-- to be removed
_ <- insertDocNgrams lId indexedNgrams
pure ids'
......
......@@ -23,11 +23,13 @@ Portability : POSIX
module Gargantext.Database.Flow.List
where
import Data.Text (Text)
import Control.Monad (mapM_)
import Data.Map (Map, toList)
import Data.Maybe (Maybe(..), catMaybes)
import Gargantext.API.Ngrams (NgramsElement(..), putListNgrams)
import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
import Gargantext.Core.Types.Main (ListType(CandidateTerm))
import Gargantext.Database.Schema.NodeNgrams (NodeNgramsPoly(..), NodeNgramsW, listInsertDb, getCgramsId)
import Gargantext.Database.Schema.Node_NodeNgramsNodeNgrams -- (insert_Node_NodeNgrams_NodeNgrams, Node_NodeNgrams_NodeNgrams(..))
import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
......@@ -74,16 +76,25 @@ flowList_DbRepo lId ngs = do
toNodeNgramsW :: ListId
-> [(NgramsType, [NgramsElement])]
-> [NodeNgramsW]
toNodeNgramsW l ngs = List.concat $ map (toNodeNgramsW' l) ngs
toNodeNgramsW l ngs = List.concat $ map (toNodeNgramsW'' l) ngs
where
toNodeNgramsW' :: ListId
toNodeNgramsW'' :: ListId
-> (NgramsType, [NgramsElement])
-> [NodeNgramsW]
toNodeNgramsW' l' (ngrams_type, elms) =
toNodeNgramsW'' l' (ngrams_type, elms) =
[ NodeNgrams Nothing l' list_type ngrams_terms' ngrams_type Nothing Nothing Nothing 0 |
(NgramsElement ngrams_terms' _size list_type _occ _root _parent _children) <- elms
]
toNodeNgramsW' :: ListId
-> [(Text, [NgramsType])]
-> [NodeNgramsW]
toNodeNgramsW' l'' ngs = [ NodeNgrams Nothing l'' CandidateTerm terms ngrams_type Nothing Nothing Nothing 0
| (terms, ngrams_types) <- ngs
, ngrams_type <- ngrams_types
]
listInsert :: FlowCmdM env err m
=> ListId
-> Map NgramsType [NgramsElement]
......
......@@ -24,17 +24,19 @@ import Gargantext.Database.Utils (Cmd)
import Gargantext.Prelude
import Gargantext.Database.Triggers.Nodes (triggerSearchUpdate)
import Gargantext.Database.Triggers.NodesNodes (triggerDeleteCount, triggerInsertCount, triggerUpdateAdd, triggerUpdateDel, MasterListId)
import Gargantext.Database.Triggers.NodeNodeNgrams (triggerCountInsert)
import Gargantext.Database.Triggers.NodeNodeNgrams (triggerCountInsert, triggerCountInsert2, triggerCoocInsert)
------------------------------------------------------------------------
initTriggers :: MasterListId -> Cmd err [Int64]
initTriggers lId = do
t0 <- triggerSearchUpdate
t1 <- triggerCountInsert
t1' <- triggerCountInsert2
t1'' <- triggerCoocInsert
t2 <- triggerDeleteCount lId
t3 <- triggerInsertCount lId
t4 <- triggerUpdateAdd lId
t5 <- triggerUpdateDel lId
pure [t0, t1,t2,t3,t4,t5]
pure [t0,t1,t1',t1'',t2,t3,t4,t5]
......@@ -43,6 +43,8 @@ import Gargantext.Database.Utils
import Gargantext.Database.Schema.Ngrams (NgramsType, ngramsTypeId, fromNgramsTypeId)
import Gargantext.Prelude
type NodeNgramsId = Int
data NodeNgramsPoly id
node_id'
node_subtype
......
{-|
Module : Gargantext.Database.Schema.NodeNodeNgrams
Description : TODO: remove this module and table in database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Schema.NodeNodeNgrams2
where
import Prelude
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Control.Lens.TH (makeLenses)
import Gargantext.Database.Utils (Cmd, mkCmd)
import Gargantext.Database.Schema.NodeNgrams (NodeNgramsId)
import Gargantext.Database.Schema.Node (pgNodeId)
import Gargantext.Database.Types.Node
import Opaleye
data NodeNodeNgrams2Poly node_id nodengrams_id w
= NodeNodeNgrams2 { _nnng2_node_id :: node_id
, _nnng2_nodengrams_id :: nodengrams_id
, _nnng2_weight :: w
} deriving (Show)
type NodeNodeNgrams2Write =
NodeNodeNgrams2Poly (Column PGInt4 )
(Column PGInt4 )
(Column PGFloat8)
type NodeNodeNgrams2Read =
NodeNodeNgrams2Poly (Column PGInt4 )
(Column PGInt4 )
(Column PGFloat8)
type NodeNodeNgrams2ReadNull =
NodeNodeNgrams2Poly (Column (Nullable PGInt4 ))
(Column (Nullable PGInt4 ))
(Column (Nullable PGFloat8))
type NodeNodeNgrams2 =
NodeNodeNgrams2Poly DocId NodeNgramsId Double
$(makeAdaptorAndInstance "pNodeNodeNgrams2" ''NodeNodeNgrams2Poly)
makeLenses ''NodeNodeNgrams2Poly
nodeNodeNgrams2Table :: Table NodeNodeNgrams2Write NodeNodeNgrams2Read
nodeNodeNgrams2Table = Table "node_node_ngrams2"
( pNodeNodeNgrams2 NodeNodeNgrams2
{ _nnng2_node_id = required "node_id"
, _nnng2_nodengrams_id = required "nodengrams_id"
, _nnng2_weight = required "weight"
}
)
queryNodeNodeNgrams2Table :: Query NodeNodeNgrams2Read
queryNodeNodeNgrams2Table = queryTable nodeNodeNgrams2Table
-- | Insert utils
insertNodeNodeNgrams2 :: [NodeNodeNgrams2] -> Cmd err Int
insertNodeNodeNgrams2 = insertNodeNodeNgrams2W
. map (\(NodeNodeNgrams2 n1 n2 w) ->
NodeNodeNgrams2 (pgNodeId n1)
(pgInt4 n2)
(pgDouble w)
)
insertNodeNodeNgrams2W :: [NodeNodeNgrams2Write] -> Cmd err Int
insertNodeNodeNgrams2W nnnw =
mkCmd $ \c -> fromIntegral <$> runInsert_ c insertNothing
where
insertNothing = (Insert { iTable = nodeNodeNgrams2Table
, iRows = nnnw
, iReturning = rCount
, iOnConflict = (Just DoNothing)
})
......@@ -63,4 +63,91 @@ triggerCountInsert = execPGSQuery query (nodeTypeId NodeDocument, nodeTypeId Nod
EXECUTE PROCEDURE set_ngrams_global_count();
|]
triggerCountInsert2 :: Cmd err Int64
triggerCountInsert2 = execPGSQuery query (nodeTypeId NodeCorpus, nodeTypeId NodeDocument, nodeTypeId NodeList)
where
query :: DPS.Query
query = [sql|
CREATE OR REPLACE FUNCTION set_ngrams_global_count2() RETURNS trigger AS $$
BEGIN
IF pg_trigger_depth() <> 1 THEN
RETURN NEW;
END IF;
IF TG_OP = 'INSERT' THEN
INSERT INTO node_node_ngrams2 (node_id, nodengrams_id, weight)
SELECT corpus.id, nng.id, count(*) from NEW as new1
INNER JOIN node_ngrams nng ON nng.id = new1.nodengrams_id
INNER JOIN nodes list ON list.id = nng.node_id
INNER JOIN nodes_nodes nn ON nn.node2_id = new1.node_id
INNER JOIN nodes corpus ON corpus.id = nn.node1_id
INNER JOIN nodes doc ON doc.id = nn.node2_id
WHERE corpus.typename = ? -- 30 -- corpus
AND doc.typename = ? -- 4 -- maybe not mandatory
AND list.typename = ? -- 5 -- list
GROUP BY corpus.id, nng.id
ON CONFLICT (node_id, nodengrams_id)
DO UPDATE set weight = node_node_ngrams2.weight + excluded.weight
;
END IF;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_count_insert2 on node_node_ngrams2;
CREATE TRIGGER trigger_count_insert2 AFTER INSERT on node_node_ngrams2
REFERENCING NEW TABLE AS NEW
FOR EACH STATEMENT
EXECUTE PROCEDURE set_ngrams_global_count2();
|]
-- TODO add the groups
triggerCoocInsert :: Cmd err Int64
triggerCoocInsert = execPGSQuery query (nodeTypeId NodeCorpus, nodeTypeId NodeDocument, nodeTypeId NodeList)
where
query :: DPS.Query
query = [sql|
CREATE OR REPLACE FUNCTION set_cooc() RETURNS trigger AS $$
BEGIN
IF pg_trigger_depth() <> 1 THEN
RETURN NEW;
END IF;
IF TG_OP = 'INSERT' THEN
INSERT INTO node_nodengrams_nodengrams (node_id, node_ngrams1_id, node_ngrams2_id, weight)
SELECT corpus.id, nng1.id, nng2.id, count(*) from NEW as new1
INNER JOIN node_ngrams nng1 ON nng1.id = new1.nodengrams_id
INNER JOIN nodes list ON list.id = nng1.node_id
INNER JOIN nodes_nodes nn ON nn.node2_id = new1.node_id
INNER JOIN nodes corpus ON corpus.id = nn.node1_id
INNER JOIN nodes doc ON doc.id = nn.node2_id
INNER JOIN node_node_ngrams2 nnng2 ON nnng2.node_id = doc.id
INNER JOIN node_ngrams nng2 ON nng2.id = nnng2.nodengrams_id
WHERE corpus.typename = ? -- 30 -- corpus
AND doc.typename = ? -- 4 -- maybe not mandatory
AND list.typename = ? -- 5 -- list
AND nng2.node_id = list.id
-- AND nng1.id <> nng2.id
GROUP BY corpus.id, nng1.id, nng2.id
ON CONFLICT (node_id, node_ngrams1_id, node_ngrams2_id)
DO UPDATE set weight = node_nodengrams_nodengrams.weight + excluded.weight
;
END IF;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_cooc on node_node_ngrams2;
CREATE TRIGGER trigger_cooc_insert AFTER INSERT on node_node_ngrams2
REFERENCING NEW TABLE AS NEW
FOR EACH STATEMENT
EXECUTE PROCEDURE set_cooc();
|]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment