1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog;
COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language';
CREATE EXTENSION IF NOT EXISTS tsm_system_rows;
CREATE EXTENSION pgcrypto;
-----------------------------------------------------------------
CREATE TABLE public.auth_user (
id SERIAL,
password CHARACTER varying(128) NOT NULL,
last_login TIMESTAMP with time zone,
is_superuser BOOLEAN NOT NULL,
username CHARACTER varying(150) NOT NULL,
first_name CHARACTER varying(30) NOT NULL,
last_name CHARACTER varying(30) NOT NULL,
email CHARACTER varying(254) NOT NULL,
is_staff BOOLEAN NOT NULL,
is_active BOOLEAN NOT NULL,
date_joined TIMESTAMP with time zone DEFAULT now() NOT NULL,
forgot_password_uuid TEXT,
PRIMARY KEY (id)
);
ALTER TABLE public.auth_user OWNER TO gargantua;
-----------------------------------------------------------------
-- TODO add publication_date
-- TODO typename -> type_id
CREATE TABLE public.nodes (
id SERIAL,
hash_id CHARACTER varying(66) DEFAULT ''::character varying NOT NULL,
typename INTEGER NOT NULL,
user_id INTEGER NOT NULL,
parent_id INTEGER REFERENCES public.nodes(id) ON DELETE CASCADE ,
name CHARACTER varying(255) DEFAULT ''::character varying NOT NULL,
date TIMESTAMP with time zone DEFAULT now() NOT NULL,
hyperdata jsonb DEFAULT '{}'::jsonb NOT NULL,
search tsvector,
PRIMARY KEY (id),
FOREIGN KEY (user_id) REFERENCES public.auth_user(id) ON DELETE CASCADE
);
ALTER TABLE public.nodes OWNER TO gargantua;
--------------------------------------------------------------
-- TODO add publication_date
-- TODO typename -> type_id
CREATE TABLE public.contexts (
id SERIAL,
hash_id CHARACTER varying(66) DEFAULT ''::character varying NOT NULL,
typename INTEGER NOT NULL,
user_id INTEGER NOT NULL,
parent_id INTEGER REFERENCES public.contexts(id) ON DELETE CASCADE ,
name CHARACTER varying(255) DEFAULT ''::character varying NOT NULL,
date TIMESTAMP with time zone DEFAULT now() NOT NULL,
hyperdata jsonb DEFAULT '{}'::jsonb NOT NULL,
search tsvector,
PRIMARY KEY (id),
FOREIGN KEY (user_id) REFERENCES public.auth_user(id) ON DELETE CASCADE
);
ALTER TABLE public.contexts OWNER TO gargantua;
--------------------------------------------------------------
-- | Ngrams
CREATE TABLE public.ngrams (
id SERIAL,
terms CHARACTER varying(255),
n INTEGER,
PRIMARY KEY (id)
);
ALTER TABLE public.ngrams OWNER TO gargantua;
-- | Ngrams PosTag
CREATE TABLE public.ngrams_postag (
id SERIAL ,
lang_id INTEGER ,
algo_id INTEGER ,
postag CHARACTER varying(5) ,
ngrams_id INTEGER NOT NULL ,
lemm_id INTEGER NOT NULL ,
score INTEGER DEFAULT 1 ::integer NOT NULL ,
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE,
FOREIGN KEY (lemm_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
);
ALTER TABLE public.ngrams_postag OWNER TO gargantua;
--------------------------------------------------------------
-- Node here should have type NodeList
CREATE TABLE public.node_ngrams (
id SERIAL ,
node_id INTEGER NOT NULL ,
node_subtype INTEGER ,
ngrams_id INTEGER NOT NULL ,
ngrams_type INTEGER , -- change to ngrams_field? (no for pedagogic reason)
ngrams_field INTEGER ,
ngrams_tag INTEGER ,
ngrams_class INTEGER ,
weight double precision ,
PRIMARY KEY (id) ,
FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE ,
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
);
ALTER TABLE public.node_ngrams OWNER TO gargantua;
--CREATE TABLE public.context_nodengrams_nodengrams (
-- context_id INTEGER NOT NULL ,
-- node_ngrams1_id INTEGER NOT NULL ,
-- node_ngrams2_id INTEGER NOT NULL ,
-- weight double precision ,
-- FOREIGN KEY (node_id) REFERENCES public.contexts(id) ON DELETE CASCADE ,
-- FOREIGN KEY (node_ngrams1_id) REFERENCES public.node_ngrams(id) ON DELETE CASCADE,
-- FOREIGN KEY (node_ngrams2_id) REFERENCES public.node_ngrams(id) ON DELETE CASCADE,
-- PRIMARY KEY (node_id, node_ngrams1_id, node_ngrams2_id)
--);
--ALTER TABLE public.context_nodengrams_nodengrams OWNER TO gargantua;
--------------------------------------------------------------
--------------------------------------------------------------
--
--CREATE TABLE public.nodes_ngrams_ngrams (
-- node_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
-- ngram1_id integer NOT NULL REFERENCES public.ngrams(id) ON DELETE CASCADE,
-- ngram2_id integer NOT NULL REFERENCES public.ngrams(id) ON DELETE CASCADE,
-- weight double precision,
-- PRIMARY KEY (node_id,ngram1_id,ngram2_id)
--);
--
--ALTER TABLE public.nodes_ngrams_ngrams OWNER TO gargantua;
---------------------------------------------------------------
-- TODO nodes_nodes(node1_id int, node2_id int, edge_type int , weight real)
CREATE TABLE public.nodes_nodes (
node1_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
node2_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
score REAL ,
category INTEGER ,
PRIMARY KEY (node1_id, node2_id)
);
ALTER TABLE public.nodes_nodes OWNER TO gargantua;
-- To attach contexts to a Corpus
CREATE TABLE public.nodes_contexts (
id SERIAL ,
node_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
context_id INTEGER NOT NULL REFERENCES public.contexts(id) ON DELETE CASCADE,
score REAL ,
category INTEGER ,
PRIMARY KEY (id)
);
ALTER TABLE public.nodes_contexts OWNER TO gargantua;
CREATE TABLE public.nodescontexts_nodescontexts (
nodescontexts1 INTEGER NOT NULL REFERENCES public.nodes_contexts(id) ON DELETE CASCADE,
nodescontexts2 INTEGER NOT NULL REFERENCES public.nodes_contexts(id) ON DELETE CASCADE,
PRIMARY KEY (nodescontexts1, nodescontexts2)
);
ALTER TABLE public.nodescontexts_nodescontexts OWNER TO gargantua;
---------------------------------------------------------------
CREATE TABLE public.context_node_ngrams (
context_id INTEGER NOT NULL REFERENCES public.contexts (id) ON DELETE CASCADE,
node_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
ngrams_id INTEGER NOT NULL REFERENCES public.ngrams (id) ON DELETE CASCADE,
ngrams_type INTEGER ,
weight double precision,
doc_count INTEGER DEFAULT 0,
PRIMARY KEY (context_id, node_id, ngrams_id, ngrams_type)
);
ALTER TABLE public.context_node_ngrams OWNER TO gargantua;
CREATE TABLE public.context_node_ngrams2 (
context_id INTEGER NOT NULL REFERENCES public.contexts (id) ON DELETE CASCADE,
nodengrams_id INTEGER NOT NULL REFERENCES public.node_ngrams (id) ON DELETE CASCADE,
weight double precision,
PRIMARY KEY (context_id, nodengrams_id)
);
ALTER TABLE public.context_node_ngrams2 OWNER TO gargantua;
--------------------------------------------------------------------
CREATE TABLE public.node_node_ngrams (
node1_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
node2_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
ngrams_id INTEGER NOT NULL REFERENCES public.ngrams (id) ON DELETE CASCADE,
ngrams_type INTEGER,
weight double precision,
PRIMARY KEY (node1_id, node2_id, ngrams_id, ngrams_type)
);
ALTER TABLE public.node_node_ngrams OWNER TO gargantua;
CREATE TABLE public.node_node_ngrams2 (
node_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
nodengrams_id INTEGER NOT NULL REFERENCES public.node_ngrams (id) ON DELETE CASCADE,
weight double precision,
PRIMARY KEY (node_id, nodengrams_id)
);
ALTER TABLE public.node_node_ngrams2 OWNER TO gargantua;
--------------------------------------------------------------
--CREATE TABLE public.nodes_ngrams_repo (
-- version integer NOT NULL,
-- patches jsonb DEFAULT '{}'::jsonb NOT NULL,
-- PRIMARY KEY (version)
--);
--ALTER TABLE public.nodes_ngrams_repo OWNER TO gargantua;
---------------------------------------------------------
-- If needed for rights management at row level
-- CREATE EXTENSION IF NOT EXISTS acl WITH SCHEMA public;
CREATE TABLE public.rights (
user_id INTEGER NOT NULL REFERENCES public.auth_user(id) ON DELETE CASCADE,
node_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
rights INTEGER NOT NULL,
PRIMARY KEY (user_id, node_id)
);
ALTER TABLE public.rights OWNER TO gargantua;
------------------------------------------------------------
-- Node Story
CREATE TABLE public.node_stories (
id SERIAL,
node_id INTEGER NOT NULL,
version INTEGER NOT NULL,
ngrams_type_id INTEGER NOT NULL,
ngrams_id INTEGER NOT NULL,
--children TEXT[],
ngrams_repo_element jsonb DEFAULT '{}'::jsonb NOT NULL,
PRIMARY KEY (id),
FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
);
ALTER TABLE public.node_stories OWNER TO gargantua;
CREATE UNIQUE INDEX ON public.node_stories USING btree (node_id, ngrams_type_id, ngrams_id);
create table public.node_story_archive_history (
id SERIAL,
node_id INTEGER NOT NULL,
ngrams_type_id INTEGER NOT NULL,
ngrams_id INTEGER NOT NULL,
patch jsonb DEFAULT '{}'::jsonb NOT NULL,
version INTEGER NOT NULL DEFAULT 0,
PRIMARY KEY (id),
FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
);
ALTER TABLE public.node_story_archive_history OWNER TO gargantua;
------------------------------------------------------------
-- INDEXES
CREATE INDEX ON public.auth_user USING btree (username varchar_pattern_ops);
CREATE UNIQUE INDEX ON public.auth_user USING btree (username);
CREATE INDEX ON public.rights USING btree (user_id,node_id);
CREATE INDEX ON public.nodes USING gin (hyperdata);
CREATE INDEX ON public.nodes USING btree (user_id, typename, parent_id);
CREATE INDEX ON public.nodes USING btree (id, typename, date ASC);
CREATE INDEX ON public.nodes USING btree (id, typename, date DESC);
CREATE INDEX ON public.nodes USING btree (typename, id);
CREATE UNIQUE INDEX IF NOT EXISTS ON public.nodes USING btree (hash_id);
CREATE INDEX ON public.contexts USING gin (hyperdata);
CREATE INDEX ON public.contexts USING btree (user_id, typename, parent_id);
CREATE INDEX ON public.contexts USING btree (id, typename, date ASC);
CREATE INDEX ON public.contexts USING btree (id, typename, date DESC);
CREATE INDEX ON public.contexts USING btree (typename, id);
CREATE UNIQUE INDEX IF NOT EXISTS ON public.contexts USING btree (hash_id);
CREATE INDEX ON public.nodescontexts_nodescontexts USING btree (nodescontexts1, nodescontexts2);
-- CREATE UNIQUE INDEX ON public.nodes USING btree (((hyperdata ->> 'uniqId'::text)));
-- CREATE UNIQUE INDEX ON public.nodes USING btree (((hyperdata ->> 'uniqIdBdd'::text)));
-- CREATE UNIQUE INDEX ON public.nodes USING btree (typename, parent_id, ((hyperdata ->> 'uniqId'::text)));
CREATE UNIQUE INDEX ON public.ngrams (terms); -- TEST GIN
CREATE INDEX ON public.ngrams USING btree (id, terms);
CREATE UNIQUE INDEX ON public.ngrams_postag (lang_id,algo_id,postag,ngrams_id,lemm_id);
-- To save the Node Ngrams Repo
CREATE INDEX ON public.node_ngrams USING btree (node_id,node_subtype);
CREATE UNIQUE INDEX ON public.node_ngrams USING btree (node_id,node_subtype, ngrams_id);
-- To make the links between Nodes in Tree/Forest
CREATE UNIQUE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id);
CREATE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id, category);
-- To make the links between Corpus Node and its contexts
CREATE UNIQUE INDEX ON public.nodes_contexts USING btree (node_id, context_id);
CREATE INDEX ON public.nodes_contexts USING btree (node_id, context_id, category);
------------------------------------------------------------------------
CREATE UNIQUE INDEX ON public.context_node_ngrams USING btree (context_id, node_id, ngrams_id, ngrams_type);
CREATE INDEX ON public.context_node_ngrams USING btree (context_id, node_id);
CREATE INDEX ON public.context_node_ngrams USING btree (ngrams_id, node_id);
CREATE INDEX ON public.context_node_ngrams USING btree (ngrams_type);
CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id);
CREATE INDEX ON public.context_node_ngrams2 USING btree (nodengrams_id);
CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id, nodengrams_id);
CREATE UNIQUE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id, ngrams_id, ngrams_type);
CREATE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id);
CREATE INDEX ON public.node_node_ngrams USING btree (ngrams_id, node2_id);
CREATE INDEX ON public.node_node_ngrams USING btree (ngrams_type);
CREATE INDEX ON public.node_node_ngrams2 USING btree (node_id);
CREATE INDEX ON public.node_node_ngrams2 USING btree (nodengrams_id);
CREATE INDEX ON public.node_node_ngrams2 USING btree (node_id, nodengrams_id);
-- CREATE INDEX ON public.context_nodengrams_nodengrams USING btree (context_id, node_ngrams1_id, node_ngrams2_id);
-- CREATE INDEX ON public.context_nodengrams_nodengrams USING btree (node_ngrams1_id);
-- CREATE INDEX ON public.context_nodengrams_nodengrams USING btree (node_ngrams2_id);
------------------------------------------------------------------------
-- Ngrams Full DB Extraction Optim
-- TODO remove hard parameter and move elsewhere
CREATE OR REPLACE function node_pos(int, int) returns bigint
AS 'SELECT count(id) from nodes
WHERE id < $1
AND typename = $2
'
LANGUAGE SQL immutable;
--drop index node_by_pos;
--create index node_by_pos on nodes using btree(node_pos(id,typename));
-- Optimization for Ngrams Table View
-- CREATE MATERIALIZED VIEW IF NOT EXISTS context_node_ngrams_view AS
-- SELECT DISTINCT context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id
-- FROM nodes_contexts
-- JOIN context_node_ngrams
-- ON context_node_ngrams.context_id = nodes_contexts.context_id;
-- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_context_id_idx
-- ON context_node_ngrams_view(context_id);
-- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_ngrams_id_idx
-- ON context_node_ngrams_view(ngrams_id);
-- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_node_id_idx
-- ON context_node_ngrams_view(node_id);
-- CREATE UNIQUE INDEX IF NOT EXISTS context_node_ngrams_view_context_ngrams_node_uniq_idx
-- ON context_node_ngrams_view (context_id, ngrams_id, node_id);
CREATE INDEX IF NOT EXISTS context_node_ngrams_context_id_ngrams_id_idx
ON context_node_ngrams(context_id, ngrams_id);
CREATE INDEX IF NOT EXISTS node_stories_ngrams_id_idx
ON node_stories(ngrams_id);
----
-- Version 0.0.6.9.9.6.4.sql
-- ADD triggers
CREATE OR REPLACE FUNCTION check_node_stories_json()
RETURNS TRIGGER AS $$
DECLARE
missing_ngrams_exist boolean;
BEGIN
WITH child_ngrams as
(SELECT jsonb_array_elements_text(NEW.ngrams_repo_element->'children') AS term),
parent_ngrams AS
(SELECT NEW.ngrams_repo_element->>'root' AS term),
ngrams_child_parent AS
(SELECT child_ngrams.term, ngrams.terms
FROM child_ngrams
LEFT JOIN ngrams ON child_ngrams.term = ngrams.terms
WHERE ngrams.terms IS NULL
UNION
SELECT parent_ngrams.term, ngrams.terms
FROM parent_ngrams
LEFT JOIN ngrams ON parent_ngrams.term = ngrams.terms
WHERE ngrams.terms IS NULL
AND parent_ngrams.term IS NOT NULL)
SELECT EXISTS(SELECT * FROM ngrams_child_parent) INTO missing_ngrams_exist;
IF missing_ngrams_exist THEN
RAISE EXCEPTION 'node_stories: ngrams are missing: %', row_to_json(NEW);
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE TRIGGER check_node_stories_json_trg
AFTER INSERT OR UPDATE
ON node_stories
FOR EACH ROW
EXECUTE PROCEDURE check_node_stories_json();
CREATE OR REPLACE FUNCTION check_ngrams_json()
RETURNS TRIGGER AS $$
DECLARE
missing_ngrams_exist boolean;
BEGIN
WITH child_ngrams as
(SELECT jsonb_array_elements_text(ngrams_repo_element->'children') AS term
FROM node_stories
WHERE term = OLD.terms),
parent_ngrams AS
(SELECT ngrams_repo_element->>'root' AS term
FROM node_stories
WHERE term = OLD.terms),
child_parent_ngrams AS
(SELECT * FROM child_ngrams
UNION SELECT * FROM parent_ngrams)
SELECT EXISTS(SELECT * FROM child_parent_ngrams) INTO missing_ngrams_exist;
IF missing_ngrams_exist THEN
RAISE EXCEPTION 'ngrams are missing: %', row_to_json(OLD);
END IF;
RETURN OLD;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE TRIGGER check_ngrams_json_trg
AFTER DELETE
ON ngrams
FOR EACH ROW
EXECUTE PROCEDURE check_ngrams_json();