schema.sql 17.3 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog;
COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language';

CREATE EXTENSION IF NOT EXISTS tsm_system_rows;
CREATE EXTENSION pgcrypto;

-----------------------------------------------------------------
CREATE TABLE public.auth_user (
    id SERIAL,
    password     CHARACTER varying(128) NOT NULL,
    last_login   TIMESTAMP with time zone,
    is_superuser BOOLEAN NOT NULL,
    username     CHARACTER varying(150) NOT NULL,
    first_name   CHARACTER varying(30) NOT NULL,
    last_name    CHARACTER varying(30) NOT NULL,
    email        CHARACTER varying(254) NOT NULL,
    is_staff     BOOLEAN NOT NULL,
    is_active    BOOLEAN NOT NULL,
    date_joined  TIMESTAMP with time zone DEFAULT now() NOT NULL,
    forgot_password_uuid TEXT,
    PRIMARY KEY (id)
);
ALTER TABLE public.auth_user OWNER TO gargantua;
-----------------------------------------------------------------

-- TODO add publication_date
-- TODO typename -> type_id
CREATE TABLE public.nodes (
    id        SERIAL,
    hash_id   CHARACTER varying(66) DEFAULT ''::character varying NOT NULL,
    typename  INTEGER NOT NULL,
    user_id   INTEGER NOT NULL,
    parent_id INTEGER REFERENCES public.nodes(id) ON DELETE CASCADE ,
    name      CHARACTER varying(255) DEFAULT ''::character varying NOT NULL,
    date      TIMESTAMP with time zone DEFAULT now() NOT NULL,
    hyperdata jsonb DEFAULT '{}'::jsonb NOT NULL,
    search tsvector,
    PRIMARY KEY (id),
    FOREIGN KEY (user_id)  REFERENCES public.auth_user(id) ON DELETE CASCADE
);
ALTER TABLE public.nodes OWNER TO gargantua;
--------------------------------------------------------------

-- TODO add publication_date
-- TODO typename -> type_id
CREATE TABLE public.contexts (
    id        SERIAL,
    hash_id   CHARACTER varying(66) DEFAULT ''::character varying NOT NULL,
    typename  INTEGER NOT NULL,
    user_id   INTEGER NOT NULL,
    parent_id INTEGER REFERENCES public.contexts(id) ON DELETE CASCADE ,
    name      CHARACTER varying(255) DEFAULT ''::character varying NOT NULL,
    date      TIMESTAMP with time zone DEFAULT now() NOT NULL,
    hyperdata jsonb DEFAULT '{}'::jsonb NOT NULL,
    search tsvector,
    PRIMARY KEY (id),
    FOREIGN KEY (user_id)  REFERENCES public.auth_user(id) ON DELETE CASCADE
);
ALTER TABLE public.contexts OWNER TO gargantua;

--------------------------------------------------------------
-- | Ngrams
CREATE TABLE public.ngrams (
    id SERIAL,
    terms CHARACTER varying(255),
    n INTEGER,
    PRIMARY KEY (id)
);
ALTER TABLE public.ngrams OWNER TO gargantua;

-- | Ngrams PosTag
CREATE TABLE public.ngrams_postag (
    id        SERIAL                  ,
    lang_id   INTEGER                 ,
    algo_id   INTEGER                 ,
    postag    CHARACTER varying(5)    ,
    ngrams_id INTEGER NOT NULL        ,
    lemm_id   INTEGER NOT NULL        ,
    score     INTEGER DEFAULT 1 ::integer NOT NULL                        ,
    FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE,
    FOREIGN KEY (lemm_id)   REFERENCES public.ngrams(id) ON DELETE CASCADE
);
ALTER TABLE public.ngrams_postag OWNER TO gargantua;

--------------------------------------------------------------
-- Node here should have type NodeList
CREATE TABLE public.node_ngrams (
    id SERIAL                          ,
    node_id        INTEGER NOT NULL    ,
    node_subtype   INTEGER             ,
    ngrams_id      INTEGER NOT NULL    ,
    ngrams_type    INTEGER             , -- change to ngrams_field? (no for pedagogic reason)
    ngrams_field   INTEGER             ,
    ngrams_tag     INTEGER             ,
    ngrams_class   INTEGER             ,
    weight double precision            ,
    PRIMARY KEY (id)                   ,
    FOREIGN KEY (node_id)   REFERENCES public.nodes(id)  ON DELETE CASCADE ,
    FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
);
ALTER TABLE public.node_ngrams OWNER TO gargantua;

--CREATE TABLE public.context_nodengrams_nodengrams (
--    context_id         INTEGER NOT NULL   ,
--    node_ngrams1_id INTEGER NOT NULL   ,
--    node_ngrams2_id INTEGER NOT NULL   ,
--    weight double   precision          ,
--    FOREIGN KEY (node_id) REFERENCES public.contexts(id) ON DELETE CASCADE              ,
--    FOREIGN KEY (node_ngrams1_id) REFERENCES public.node_ngrams(id) ON DELETE CASCADE,
--    FOREIGN KEY (node_ngrams2_id) REFERENCES public.node_ngrams(id) ON DELETE CASCADE,
--    PRIMARY KEY (node_id, node_ngrams1_id, node_ngrams2_id)
--);
--ALTER TABLE public.context_nodengrams_nodengrams OWNER TO gargantua;

--------------------------------------------------------------
--------------------------------------------------------------
--
--CREATE TABLE public.nodes_ngrams_ngrams (
--    node_id   integer NOT NULL REFERENCES public.nodes(id)  ON DELETE CASCADE,
--    ngram1_id integer NOT NULL REFERENCES public.ngrams(id) ON DELETE CASCADE,
--    ngram2_id integer NOT NULL REFERENCES public.ngrams(id) ON DELETE CASCADE,
--    weight double precision,
--    PRIMARY KEY (node_id,ngram1_id,ngram2_id)
--);
--
--ALTER TABLE public.nodes_ngrams_ngrams OWNER TO gargantua;
---------------------------------------------------------------
-- TODO nodes_nodes(node1_id int, node2_id int, edge_type int , weight real)
CREATE TABLE public.nodes_nodes (
    node1_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
    node2_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
    score    REAL    ,
    category INTEGER ,
    PRIMARY KEY (node1_id, node2_id)
);
ALTER TABLE public.nodes_nodes OWNER TO gargantua;



-- To attach contexts to a Corpus
CREATE TABLE public.nodes_contexts (
    id SERIAL                          ,
    node_id    INTEGER NOT NULL REFERENCES public.nodes(id)    ON DELETE CASCADE,
    context_id INTEGER NOT NULL REFERENCES public.contexts(id) ON DELETE CASCADE,
    score    REAL    ,
    category INTEGER ,
    PRIMARY KEY (id)
);
ALTER TABLE public.nodes_contexts OWNER TO gargantua;

CREATE TABLE public.nodescontexts_nodescontexts (
    nodescontexts1 INTEGER NOT NULL REFERENCES public.nodes_contexts(id) ON DELETE CASCADE,
    nodescontexts2 INTEGER NOT NULL REFERENCES public.nodes_contexts(id) ON DELETE CASCADE,

    PRIMARY KEY (nodescontexts1, nodescontexts2)
);
ALTER TABLE public.nodescontexts_nodescontexts OWNER TO gargantua;


---------------------------------------------------------------
CREATE TABLE public.context_node_ngrams (
    context_id    INTEGER NOT NULL REFERENCES public.contexts (id) ON DELETE CASCADE,
    node_id       INTEGER NOT NULL REFERENCES public.nodes    (id) ON DELETE CASCADE,
    ngrams_id     INTEGER NOT NULL REFERENCES public.ngrams   (id) ON DELETE CASCADE,
    ngrams_type   INTEGER  ,
    weight double precision,
    doc_count     INTEGER DEFAULT 0,
    PRIMARY KEY (context_id, node_id, ngrams_id, ngrams_type)
  );
ALTER TABLE public.context_node_ngrams OWNER TO gargantua;

CREATE TABLE public.context_node_ngrams2 (
    context_id      INTEGER NOT NULL REFERENCES public.contexts     (id) ON DELETE CASCADE,
    nodengrams_id   INTEGER NOT NULL REFERENCES public.node_ngrams  (id) ON DELETE CASCADE,
    weight double   precision,
    PRIMARY KEY (context_id, nodengrams_id)
);
ALTER TABLE public.context_node_ngrams2 OWNER TO gargantua;


--------------------------------------------------------------------
CREATE TABLE public.node_node_ngrams (
node1_id   INTEGER NOT NULL REFERENCES public.nodes  (id) ON DELETE CASCADE,
node2_id   INTEGER NOT NULL REFERENCES public.nodes  (id) ON DELETE CASCADE,
ngrams_id  INTEGER NOT NULL REFERENCES public.ngrams (id) ON DELETE CASCADE,
ngrams_type INTEGER,
weight double precision,
PRIMARY KEY (node1_id, node2_id, ngrams_id, ngrams_type)
);
ALTER TABLE public.node_node_ngrams OWNER TO gargantua;

CREATE TABLE public.node_node_ngrams2 (
node_id         INTEGER NOT NULL REFERENCES public.nodes  (id) ON DELETE CASCADE,
nodengrams_id   INTEGER NOT NULL REFERENCES public.node_ngrams  (id) ON DELETE CASCADE,
weight double precision,
PRIMARY KEY (node_id, nodengrams_id)
);
ALTER TABLE public.node_node_ngrams2 OWNER TO gargantua;


--------------------------------------------------------------

--CREATE TABLE public.nodes_ngrams_repo (
--    version integer NOT NULL,
--    patches jsonb DEFAULT '{}'::jsonb NOT NULL,
--    PRIMARY KEY (version)
--);
--ALTER TABLE public.nodes_ngrams_repo OWNER TO gargantua;

---------------------------------------------------------

-- If needed for rights management at row level
-- CREATE EXTENSION IF NOT EXISTS acl WITH SCHEMA public;
CREATE TABLE public.rights (
  user_id INTEGER NOT NULL REFERENCES public.auth_user(id) ON DELETE CASCADE,
  node_id INTEGER NOT NULL REFERENCES public.nodes(id)     ON DELETE CASCADE,
  rights  INTEGER NOT NULL,
  PRIMARY KEY (user_id, node_id)
);
ALTER TABLE public.rights OWNER TO gargantua;

------------------------------------------------------------
-- Node Story

CREATE TABLE public.node_stories (
  id SERIAL,
  node_id INTEGER NOT NULL,
  version INTEGER NOT NULL,
  ngrams_type_id INTEGER NOT NULL,
  ngrams_id INTEGER NOT NULL,
  --children TEXT[],
  ngrams_repo_element jsonb DEFAULT '{}'::jsonb NOT NULL,
  PRIMARY KEY (id),
  FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
  FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
);
ALTER TABLE public.node_stories OWNER TO gargantua;

CREATE UNIQUE INDEX ON public.node_stories USING btree (node_id, ngrams_type_id, ngrams_id);


create table public.node_story_archive_history (
  id SERIAL,
  node_id INTEGER NOT NULL,
  ngrams_type_id INTEGER NOT NULL,
  ngrams_id INTEGER NOT NULL,
  patch jsonb DEFAULT '{}'::jsonb NOT NULL,
  version INTEGER NOT NULL DEFAULT 0,
  PRIMARY KEY (id),
  FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
  FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
);
ALTER TABLE public.node_story_archive_history OWNER TO gargantua;


------------------------------------------------------------
-- INDEXES
CREATE INDEX        ON public.auth_user USING btree (username varchar_pattern_ops);
CREATE UNIQUE INDEX ON public.auth_user USING btree (username);

CREATE INDEX        ON public.rights USING btree (user_id,node_id);

CREATE INDEX        ON public.nodes USING gin (hyperdata);
CREATE INDEX        ON public.nodes USING btree (user_id, typename, parent_id);
CREATE INDEX        ON public.nodes USING btree (id, typename, date ASC);
CREATE INDEX        ON public.nodes USING btree (id, typename, date DESC);
CREATE INDEX        ON public.nodes USING btree (typename, id);
CREATE UNIQUE INDEX IF NOT EXISTS ON public.nodes USING btree (hash_id);

CREATE INDEX        ON public.contexts USING gin (hyperdata);
CREATE INDEX        ON public.contexts USING btree (user_id, typename, parent_id);
CREATE INDEX        ON public.contexts USING btree (id, typename, date ASC);
CREATE INDEX        ON public.contexts USING btree (id, typename, date DESC);
CREATE INDEX        ON public.contexts USING btree (typename, id);
CREATE UNIQUE INDEX IF NOT EXISTS ON public.contexts USING btree (hash_id);

CREATE INDEX ON public.nodescontexts_nodescontexts USING btree (nodescontexts1, nodescontexts2);
-- CREATE UNIQUE INDEX ON public.nodes USING btree (((hyperdata ->> 'uniqId'::text)));
-- CREATE UNIQUE INDEX ON public.nodes USING btree (((hyperdata ->> 'uniqIdBdd'::text)));
-- CREATE UNIQUE INDEX ON public.nodes USING btree (typename, parent_id, ((hyperdata ->> 'uniqId'::text)));

CREATE UNIQUE INDEX ON public.ngrams (terms); -- TEST GIN
CREATE        INDEX ON public.ngrams USING btree (id, terms);
CREATE UNIQUE INDEX ON public.ngrams_postag (lang_id,algo_id,postag,ngrams_id,lemm_id);

-- To save the Node Ngrams Repo
CREATE        INDEX ON public.node_ngrams USING btree (node_id,node_subtype);
CREATE UNIQUE INDEX ON public.node_ngrams USING btree (node_id,node_subtype, ngrams_id);



-- To make the links between Nodes in Tree/Forest
CREATE UNIQUE INDEX ON public.nodes_nodes  USING btree (node1_id, node2_id);
CREATE INDEX        ON public.nodes_nodes  USING btree (node1_id, node2_id, category);


-- To make the links between Corpus Node and its contexts
CREATE UNIQUE INDEX ON public.nodes_contexts  USING btree (node_id, context_id);
CREATE INDEX        ON public.nodes_contexts  USING btree (node_id, context_id, category);


------------------------------------------------------------------------
CREATE UNIQUE INDEX ON public.context_node_ngrams USING btree (context_id, node_id, ngrams_id, ngrams_type);
CREATE        INDEX ON public.context_node_ngrams USING btree (context_id,  node_id);
CREATE        INDEX ON public.context_node_ngrams USING btree (ngrams_id, node_id);
CREATE        INDEX ON public.context_node_ngrams USING btree (ngrams_type);

CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id);
CREATE INDEX ON public.context_node_ngrams2 USING btree (nodengrams_id);
CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id, nodengrams_id);


CREATE UNIQUE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id, ngrams_id, ngrams_type);
CREATE        INDEX ON public.node_node_ngrams USING btree (node1_id,  node2_id);
CREATE        INDEX ON public.node_node_ngrams USING btree (ngrams_id, node2_id);
CREATE        INDEX ON public.node_node_ngrams USING btree (ngrams_type);
CREATE INDEX ON public.node_node_ngrams2 USING btree (node_id);
CREATE INDEX ON public.node_node_ngrams2 USING btree (nodengrams_id);
CREATE INDEX ON public.node_node_ngrams2 USING btree (node_id, nodengrams_id);

-- CREATE INDEX ON public.context_nodengrams_nodengrams USING btree (context_id, node_ngrams1_id, node_ngrams2_id);
-- CREATE INDEX ON public.context_nodengrams_nodengrams USING btree (node_ngrams1_id);
-- CREATE INDEX ON public.context_nodengrams_nodengrams USING btree (node_ngrams2_id);


------------------------------------------------------------------------
-- Ngrams Full DB Extraction Optim
-- TODO remove hard parameter and move elsewhere
CREATE OR REPLACE function node_pos(int, int) returns bigint
   AS 'SELECT count(id) from nodes
      WHERE  id < $1
      AND typename = $2
      '
   LANGUAGE SQL immutable;

--drop index node_by_pos;
--create index node_by_pos on nodes using btree(node_pos(id,typename));

-- Optimization for Ngrams Table View
-- CREATE MATERIALIZED VIEW IF NOT EXISTS context_node_ngrams_view AS
--   SELECT DISTINCT context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id
--   FROM nodes_contexts
--   JOIN context_node_ngrams
--   ON context_node_ngrams.context_id = nodes_contexts.context_id;

-- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_context_id_idx
--   ON context_node_ngrams_view(context_id);
-- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_ngrams_id_idx
--   ON context_node_ngrams_view(ngrams_id);
-- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_node_id_idx
--   ON context_node_ngrams_view(node_id);
-- CREATE UNIQUE INDEX IF NOT EXISTS context_node_ngrams_view_context_ngrams_node_uniq_idx
--   ON context_node_ngrams_view (context_id, ngrams_id, node_id);

CREATE INDEX IF NOT EXISTS context_node_ngrams_context_id_ngrams_id_idx
ON context_node_ngrams(context_id, ngrams_id);

CREATE INDEX IF NOT EXISTS node_stories_ngrams_id_idx
  ON node_stories(ngrams_id);
----



-- Version 0.0.6.9.9.6.4.sql
-- ADD triggers
 CREATE OR REPLACE FUNCTION check_node_stories_json()
  RETURNS TRIGGER AS $$
  DECLARE
    missing_ngrams_exist boolean;
  BEGIN
      WITH child_ngrams as
        (SELECT jsonb_array_elements_text(NEW.ngrams_repo_element->'children') AS term),
        parent_ngrams AS
        (SELECT NEW.ngrams_repo_element->>'root' AS term),

        ngrams_child_parent AS
            (SELECT child_ngrams.term, ngrams.terms
         FROM child_ngrams
         LEFT JOIN ngrams ON child_ngrams.term = ngrams.terms
         WHERE ngrams.terms IS NULL

        UNION

        SELECT parent_ngrams.term, ngrams.terms
        FROM parent_ngrams
        LEFT JOIN ngrams ON parent_ngrams.term = ngrams.terms
        WHERE ngrams.terms IS NULL
        AND parent_ngrams.term IS NOT NULL)

            SELECT EXISTS(SELECT * FROM ngrams_child_parent) INTO missing_ngrams_exist;

            IF missing_ngrams_exist THEN
              RAISE EXCEPTION 'node_stories: ngrams are missing: %', row_to_json(NEW);
        END IF;

            RETURN NEW;
  END;
  $$ LANGUAGE plpgsql;

CREATE OR REPLACE TRIGGER check_node_stories_json_trg
AFTER INSERT OR UPDATE
ON node_stories
FOR EACH ROW
EXECUTE PROCEDURE check_node_stories_json();


   CREATE OR REPLACE FUNCTION check_ngrams_json()
    RETURNS TRIGGER AS $$
    DECLARE
      missing_ngrams_exist boolean;
    BEGIN
        WITH child_ngrams as
          (SELECT jsonb_array_elements_text(ngrams_repo_element->'children') AS term
           FROM node_stories
           WHERE term = OLD.terms),
          parent_ngrams AS
          (SELECT ngrams_repo_element->>'root' AS term
           FROM node_stories
           WHERE term = OLD.terms),
          child_parent_ngrams AS
          (SELECT * FROM child_ngrams
           UNION SELECT * FROM parent_ngrams)

       SELECT EXISTS(SELECT * FROM child_parent_ngrams) INTO missing_ngrams_exist;

       IF missing_ngrams_exist THEN
          RAISE EXCEPTION 'ngrams are missing: %', row_to_json(OLD);
       END IF;

       RETURN OLD;
    END;
    $$ LANGUAGE plpgsql;

  CREATE OR REPLACE TRIGGER check_ngrams_json_trg
  AFTER DELETE
  ON ngrams
  FOR EACH ROW
  EXECUTE PROCEDURE check_ngrams_json();