1. 05 Apr, 2017 1 commit
  2. 07 Feb, 2017 3 commits
  3. 06 Feb, 2017 3 commits
  4. 31 Jan, 2017 4 commits
  5. 30 Jan, 2017 3 commits
  6. 20 Jan, 2017 4 commits
  7. 18 Jan, 2017 16 commits
    • CHANGELOG · 2f24d6a8
      delanoe committed
    • [TAG] Version 3.0.6.4 · a0995ba2
      delanoe committed
    • [FIX] SQL query for cooc fix. · f549dcb3
      delanoe committed
    • [SQL] reset mem option. · 994ca0e4
      delanoe committed
    • [FIX] Using OS datetime. · 3b62b24d
      delanoe committed
    • [FIX] Cooc opti. · 85a1d373
      delanoe committed
    • [FIX] Cooc SQL. · 65a988b7
      delanoe committed
    • [TAG] Version 3.0.6.3 · e70cbd2f
      delanoe committed
    • updating changelog · 160beb76
      delanoe committed
    • [FIX] 2 MAJOR BUGS on COOC SQL QUERY · 96d847ef
          - [OLD] Performance regression
              -> lengthening and slowing the toolchain queue
              -> 2 secondes on 21 Europresse, documents is too much for instance)
          - [OLD] Some ngrams included whereas there are not in the corpus
          + [NEW] Clarity in the query
          + [NEW] Improved: 2000 ms before less than 500 ms after (factor 4
          optimization on a very small corpus); should be ok in bigger corpora
      
      New behavior of the query tested with real corpus and this simple
      example; copy paste these lines in test.sql and run it in a test
      database (createdb test).
      
      -- let be:
      
      drop table nodes_ngrams;
      drop table synonyms;
      drop table ngrams;
      drop table nodes;
      
      create table nodes (
          id serial PRIMARY KEY not null
      );
      
      create table ngrams (
          id serial PRIMARY KEY not null,
          text varchar(50)
      );
      
      create table synonyms (
          id serial PRIMARY KEY not null,
          node_id INTEGER not null,
          ngram1_id INTEGER not null references ngrams(id),
          ngram2_id INTEGER not null references ngrams(id)
      );
      
      create table nodes_ngrams (
          id serial PRIMARY KEY not null,
          node_id INTEGER not null references nodes(id),
          ngram_id INTEGER not null references ngrams(id)
      );
      
      insert into nodes (id) values(1);
      insert into nodes (id) values(2);
      insert into nodes (id) values(3);
      
      insert into ngrams (text) values('object');
      insert into ngrams (text) values('table');
      insert into ngrams (text) values('animal');
      insert into ngrams (text) values('cat');
      insert into ngrams (text) values('dog');
      insert into ngrams (text) values('other');
      insert into ngrams (text) values('abc');
      insert into ngrams (text) values('xyz');
      --select * from ngrams;
      ---- id |  text
      --------+--------
      ----  1 | object
      ----  2 | table
      ----  3 | animal
      ----  4 | cat
      ----  5 | dog
      ----  6 | other
      ----  7 | abc
      ----  8 | xyz
      
      insert into synonyms (node_id,ngram1_id,ngram2_id) values(1,1,2);
      insert into synonyms (node_id,ngram1_id,ngram2_id) values(1,3,4);
      insert into synonyms (node_id,ngram1_id,ngram2_id) values(1,3,5);
      --select * from synonyms;
      -- id | node_id | ngram1_id | ngram2_id
      ------+---------+-----------+-----------
      --  1 |       1 |         1 |         2
      --  2 |       1 |         3 |         4
      --  3 |       1 |         3 |         5
      
      insert into nodes_ngrams (node_id, ngram_id) values(1,1);
      insert into nodes_ngrams (node_id, ngram_id) values(1,6);
      insert into nodes_ngrams (node_id, ngram_id) values(1,2);
      insert into nodes_ngrams (node_id, ngram_id) values(2,4);
      insert into nodes_ngrams (node_id, ngram_id) values(2,5);
      insert into nodes_ngrams (node_id, ngram_id) values(3,4);
      insert into nodes_ngrams (node_id, ngram_id) values(3,5);
      insert into nodes_ngrams (node_id, ngram_id) values(3,6);
      --select * from nodes_ngrams;
      -- id | node_id | ngram_id
      ------+---------+----------
      --  1 |       1 |        1
      --  2 |       1 |        6
      --  3 |       1 |        2
      --  4 |       2 |        4
      --  5 |       2 |        5
      --  6 |       3 |        4
      --  7 |       3 |        5
      --  8 |       3 |        6
      
      select n1.ngram_id, n2.ngram_id, count(*)
      from nodes n
      INNER JOIN nodes_ngrams n1 ON n1.node_id = n.id
      INNER JOIN nodes_ngrams n2 ON n2.node_id = n.id
      where
      n1.ngram_id <= n2.ngram_id
      --AND
      --n1.node_id = n2.node_id
      group by 1,2
      order BY n1.ngram_id ASC
      ;
      -- ngram_id | ngram_id | count
      ------------+----------+-------
      --        5 |        6 |     1
      --        1 |        6 |     1
      --        4 |        6 |     1
      --        2 |        2 |     1
      --        4 |        4 |     2
      --        1 |        1 |     1
      --        1 |        2 |     1
      --        6 |        6 |     2
      --        2 |        6 |     1
      --        4 |        5 |     2
      --        5 |        5 |     2
      --(11 lignes)
      
      select coalesce(n11.id, n1.ngram_id), coalesce(n22.id,n2.ngram_id), count(*)
      from nodes n
      INNER JOIN nodes_ngrams n1 ON n1.node_id = n.id
      LEFT JOIN synonyms s1 on n1.ngram_id = s1.ngram2_id AND s1.node_id=1
      LEFT JOIN ngrams n11 on s1.ngram1_id = n11.id
      
      INNER JOIN nodes_ngrams n2 ON n2.node_id = n.id
      LEFT JOIN synonyms s2 on n2.ngram_id = s2.ngram2_id AND s2.node_id=1
      LEFT JOIN ngrams n22 on s2.ngram1_id = n22.id
      where
      n1.ngram_id <= n2.ngram_id
      AND
      n1.node_id = n2.node_id
      group by 1,2
      ;
      -- coalesce | coalesce | count
      ------------+----------+-------
      --        1 |        6 |     2
      --        3 |        3 |     6
      --        1 |        1 |     3
      --        3 |        6 |     2
      --        6 |        6 |     2
      --(5 lignes)
      --> les sommes comptées correspondent
      delanoe committed
  8. 30 Nov, 2016 2 commits
  9. 29 Nov, 2016 4 commits