- 18 Jan, 2017 9 commits
-
-
delanoe authored
-
delanoe authored
-
delanoe authored
-
delanoe authored
-
delanoe authored
-
delanoe authored
-
delanoe authored
-
delanoe authored
-
delanoe authored
- [OLD] Performance regression -> lengthening and slowing the toolchain queue -> 2 secondes on 21 Europresse, documents is too much for instance) - [OLD] Some ngrams included whereas there are not in the corpus + [NEW] Clarity in the query + [NEW] Improved: 2000 ms before less than 500 ms after (factor 4 optimization on a very small corpus); should be ok in bigger corpora New behavior of the query tested with real corpus and this simple example; copy paste these lines in test.sql and run it in a test database (createdb test). -- let be: drop table nodes_ngrams; drop table synonyms; drop table ngrams; drop table nodes; create table nodes ( id serial PRIMARY KEY not null ); create table ngrams ( id serial PRIMARY KEY not null, text varchar(50) ); create table synonyms ( id serial PRIMARY KEY not null, node_id INTEGER not null, ngram1_id INTEGER not null references ngrams(id), ngram2_id INTEGER not null references ngrams(id) ); create table nodes_ngrams ( id serial PRIMARY KEY not null, node_id INTEGER not null references nodes(id), ngram_id INTEGER not null references ngrams(id) ); insert into nodes (id) values(1); insert into nodes (id) values(2); insert into nodes (id) values(3); insert into ngrams (text) values('object'); insert into ngrams (text) values('table'); insert into ngrams (text) values('animal'); insert into ngrams (text) values('cat'); insert into ngrams (text) values('dog'); insert into ngrams (text) values('other'); insert into ngrams (text) values('abc'); insert into ngrams (text) values('xyz'); --select * from ngrams; ---- id | text --------+-------- ---- 1 | object ---- 2 | table ---- 3 | animal ---- 4 | cat ---- 5 | dog ---- 6 | other ---- 7 | abc ---- 8 | xyz insert into synonyms (node_id,ngram1_id,ngram2_id) values(1,1,2); insert into synonyms (node_id,ngram1_id,ngram2_id) values(1,3,4); insert into synonyms (node_id,ngram1_id,ngram2_id) values(1,3,5); --select * from synonyms; -- id | node_id | ngram1_id | ngram2_id ------+---------+-----------+----------- -- 1 | 1 | 1 | 2 -- 2 | 1 | 3 | 4 -- 3 | 1 | 3 | 5 insert into nodes_ngrams (node_id, ngram_id) values(1,1); insert into nodes_ngrams (node_id, ngram_id) values(1,6); insert into nodes_ngrams (node_id, ngram_id) values(1,2); insert into nodes_ngrams (node_id, ngram_id) values(2,4); insert into nodes_ngrams (node_id, ngram_id) values(2,5); insert into nodes_ngrams (node_id, ngram_id) values(3,4); insert into nodes_ngrams (node_id, ngram_id) values(3,5); insert into nodes_ngrams (node_id, ngram_id) values(3,6); --select * from nodes_ngrams; -- id | node_id | ngram_id ------+---------+---------- -- 1 | 1 | 1 -- 2 | 1 | 6 -- 3 | 1 | 2 -- 4 | 2 | 4 -- 5 | 2 | 5 -- 6 | 3 | 4 -- 7 | 3 | 5 -- 8 | 3 | 6 select n1.ngram_id, n2.ngram_id, count(*) from nodes n INNER JOIN nodes_ngrams n1 ON n1.node_id = n.id INNER JOIN nodes_ngrams n2 ON n2.node_id = n.id where n1.ngram_id <= n2.ngram_id --AND --n1.node_id = n2.node_id group by 1,2 order BY n1.ngram_id ASC ; -- ngram_id | ngram_id | count ------------+----------+------- -- 5 | 6 | 1 -- 1 | 6 | 1 -- 4 | 6 | 1 -- 2 | 2 | 1 -- 4 | 4 | 2 -- 1 | 1 | 1 -- 1 | 2 | 1 -- 6 | 6 | 2 -- 2 | 6 | 1 -- 4 | 5 | 2 -- 5 | 5 | 2 --(11 lignes) select coalesce(n11.id, n1.ngram_id), coalesce(n22.id,n2.ngram_id), count(*) from nodes n INNER JOIN nodes_ngrams n1 ON n1.node_id = n.id LEFT JOIN synonyms s1 on n1.ngram_id = s1.ngram2_id AND s1.node_id=1 LEFT JOIN ngrams n11 on s1.ngram1_id = n11.id INNER JOIN nodes_ngrams n2 ON n2.node_id = n.id LEFT JOIN synonyms s2 on n2.ngram_id = s2.ngram2_id AND s2.node_id=1 LEFT JOIN ngrams n22 on s2.ngram1_id = n22.id where n1.ngram_id <= n2.ngram_id AND n1.node_id = n2.node_id group by 1,2 ; -- coalesce | coalesce | count ------------+----------+------- -- 1 | 6 | 2 -- 3 | 3 | 6 -- 1 | 1 | 3 -- 3 | 6 | 2 -- 6 | 6 | 2 --(5 lignes) --> les sommes comptées correspondent
-
- 30 Nov, 2016 1 commit
-
-
delanoe authored
-
- 29 Nov, 2016 3 commits
- 28 Nov, 2016 1 commit
-
-
delanoe authored
[COMMUNITY] Adding code of Conduct (each developer participating to the development has to sign it).
-
- 17 Nov, 2016 1 commit
-
-
delanoe authored
-
- 15 Nov, 2016 4 commits
- 09 Nov, 2016 2 commits
- 08 Nov, 2016 5 commits
- 03 Nov, 2016 4 commits
- 02 Nov, 2016 1 commit
-
-
c24b authored
-
- 28 Oct, 2016 9 commits