Commit 281cfe0d authored by Administrator's avatar Administrator

nouveau fichier : init/sql/mainList.sql

Raw SQL to create lists.

	nouveau fichier : init/sql/cooccurrences.sql
Raw SQL to create cooccurrence from lists.

All theses SQL need to be checked before production.
parent f6a14f94
---- BASIQUE calcul des cooccurrences en ne tenant pas compte des stems équivalents
--
-- SELECT
-- -- %d as node_id,
-- ngX.id,
-- ngY.id,
-- COUNT(*) AS score
--FROM
-- node_node AS n -- the nodes who are direct children of the corpus
--
--INNER JOIN
-- node_node_ngram AS nngX ON nngX.node_id = n.id -- list of ngrams contained in the node
--INNER JOIN
-- node_node_ngram AS mainlistX ON mainlistX.ngram_id = nngX.ngram_id -- list of ngrams contained in the mainlist and in the node
--INNER JOIN
-- node_ngram AS ngX ON ngX.id = mainlistX.ngram_id -- ngrams which are in both
--
--INNER JOIN
-- node_node_ngram AS nngY ON nngY.node_id = n.id
--INNER JOIN
-- node_node_ngram AS mainlistY ON mainlistY.ngram_id = nngY.ngram_id
--INNER JOIN
-- node_ngram AS ngY ON ngY.id = mainlistY.ngram_id
--
--WHERE
-- n.parent_id = 1298
--AND
-- n.type_id = 5
--AND
-- mainlistX.node_id = 1382
--AND
-- mainlistY.node_id = 1382
--AND
-- nngX.ngram_id < nngY.ngram_id -- so we only get distinct pairs of ngrams
--
--GROUP BY
-- ngX.id,
-- ngX.terms,
-- ngY.id,
-- ngY.terms
--
--ORDER BY score DESC
--LIMIT 3
--;
--
-- calcul des cooccurrences en tenant compte des stems équivalents
SELECT
-- %d as node_id,
ngX.id,
ngY.id,
COUNT(*) AS score
FROM
node_node AS n -- the nodes who are direct children of the corpus
INNER JOIN
node_node_ngram AS nngX ON nngX.node_id = n.id -- list of ngrams contained in the node
INNER JOIN
node_node_ngram AS mainlistX ON mainlistX.ngram_id = nngX.ngram_id -- list of ngrams contained in the mainlist and in the node
INNER JOIN
node_ngram AS ngX ON ngX.id = mainlistX.ngram_id -- ngrams which are in both
LEFT JOIN
node_nodengramngram AS nggXX ON nggXX.node_id = 94
AND nggXX.ngramx_id = ngX.id
LEFT JOIN
node_nodengramngram AS nggXY ON nggXY.node_id = 94
AND nggXY.ngramy_id = nggXY.ngramy_id
AND nggXY.ngramx_id < nggXY.ngramx_id
INNER JOIN
node_node_ngram AS nngY ON nngY.node_id = n.id
INNER JOIN
node_node_ngram AS mainlistY ON mainlistY.ngram_id = nngY.ngram_id
INNER JOIN
node_ngram AS ngY ON ngY.id = mainlistY.ngram_id
LEFT JOIN
node_nodengramngram AS nggYX ON nggYX.node_id = 94
AND nggYX.ngramx_id = ngY.id
LEFT JOIN
node_nodengramngram AS nggYY ON nggYY.node_id = 94
AND nggYX.ngramy_id = nggYY.ngramy_id
AND nggYX.ngramx_id < nggYY.ngramx_id
WHERE
n.parent_id = 1298
AND
n.type_id = 5
AND
mainlistX.node_id = 1382
AND
mainlistY.node_id = 1382
AND
nngX.ngram_id < nngY.ngram_id -- so we only get distinct pairs of ngrams
--AND
-- nggYY.id is NULL
--AND
-- nggXY.id is NULL
GROUP BY
ngX.id,
ngX.terms,
ngY.id,
ngY.terms
ORDER BY score DESC
LIMIT 3
;
-- select tous les ngrams distincts de la miam list
SELECT count(*) FROM
(
SELECT ngram_id FROM node_node_ngram
WHERE node_id = 1380 --> node.id de la miam list
GROUP BY ngram_id
) as global
;
-- select tous les ngrams d'un corpus ayant un stem
SELECT count(*) FROM
(
SELECT ngramx_id FROM node_nodengramngram as ng
INNER JOIN node_node_ngram as nn
ON nn.ngram_id = ng.ngramx_id
INNER JOIN node_node as n
ON n.id = nn.node_id
AND n.parent_id = 1298 --> node.id du corpus
WHERE ng.node_id = 94 --> node.id de la stem list
GROUP BY ng.ngramx_id
) as global
;
--- select uniquement tous les ngrams distincts qui ont des stems équivalents
-- LEFT JOIN inclusif des ngrams qui on un stem
-- LEFT JOIN exclusif des ngrams qui on un stem en commun
select count(*) from
(
SELECT ngram_id FROM node_node_ngram as nn
INNER JOIN node_node as n
ON nn.node_id = n.id
AND n.parent_id = 1298 --> node.id du corpus
LEFT JOIN node_nodengramngram AS nx
ON nx.node_id = 94 --> node.id Stem
AND nx.ngramx_id = nn.ngram_id
LEFT JOIN node_nodengramngram AS ny
ON nx.ngramy_id = ny.ngramy_id
AND nx.node_id = 94 --> node.id Stem
AND nx.ngramx_id < ny.ngramx_id --> pour supprimer les doublons
WHERE nn.node_id = 1380 --> node.id de la miam list
-- AND ny.id is NULL
GROUP BY nn.ngram_id, nx.ngramx_id --, ny.ngramx_id
) as global
;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment