Commit 39033a12 authored by Romain Loth's avatar Romain Loth

we cascade the LEFT JOIN-ed queries to ensure correct cardinality of records...

we cascade the LEFT JOIN-ed queries to ensure correct cardinality of records (probably not necessary when joining affiliations + keywords because affiliations are 1 to 1, but definitely needed when joining affs + kws + hashtags)
parent a255158f
...@@ -268,33 +268,41 @@ else { ...@@ -268,33 +268,41 @@ else {
$filter = ""; $filter = "";
} }
$sql = <<< END_QUERY $sql = <<< END_QUERY
SELECT * FROM SELECT * FROM (
(SELECT SELECT
scholars.*, scholars_affiliations_and_keywords.*,
affiliations.*,
COUNT(keywords.kwid) AS keywords_nb,
GROUP_CONCAT(keywords.kwid) AS keywords_ids,
GROUP_CONCAT(kwstr) AS keywords_list,
GROUP_CONCAT(hashtags.htid) AS hashtags_ids,
GROUP_CONCAT(htstr) AS hashtags_list GROUP_CONCAT(htstr) AS hashtags_list
FROM scholars FROM (
LEFT JOIN sch_kw SELECT
ON sch_kw.uid = luid scholars_and_affiliations.*,
LEFT JOIN keywords GROUP_CONCAT(kwstr) AS keywords_list
ON sch_kw.kwid = keywords.kwid FROM (
SELECT
scholars.*,
affiliations.*
FROM scholars
LEFT JOIN affiliations
ON scholars.affiliation_id = affiliations.affid
WHERE (record_status = 'active'
OR (record_status = 'legacy' AND valid_date >= NOW()))
) AS scholars_and_affiliations
LEFT JOIN sch_kw
ON sch_kw.uid = scholars_and_affiliations.luid
LEFT JOIN keywords
ON sch_kw.kwid = keywords.kwid
GROUP BY luid
) AS scholars_affiliations_and_keywords
LEFT JOIN sch_ht LEFT JOIN sch_ht
ON sch_ht.uid = luid ON sch_ht.uid = luid
LEFT JOIN hashtags LEFT JOIN hashtags
ON sch_ht.htid = hashtags.htid ON sch_ht.htid = hashtags.htid
LEFT JOIN affiliations GROUP BY luid
ON affiliation_id = affid ) AS full_scholars_info
WHERE (record_status = 'active'
OR (record_status = 'legacy' AND valid_date >= NOW()))
GROUP BY luid) AS full_scholars_info
{$filter} {$filter}
END_QUERY; END_QUERY;
// debug // debug
// echo '<p style="color:white">query:'. $sql ."<p>"; // echo '<p style="color:white">query:'. $sql ."<p>";
......
...@@ -115,21 +115,25 @@ foreach ($scholar_id_array as $scholar_id){ ...@@ -115,21 +115,25 @@ foreach ($scholar_id_array as $scholar_id){
// £TODO do it at once with previous SELECT !! // £TODO do it at once with previous SELECT !!
$sql = <<< END_QUERY $sql = <<< END_QUERY
SELECT SELECT
scholars.*, scholars_and_affiliations.*,
affiliations.*,
COUNT(keywords.kwid) AS keywords_nb, COUNT(keywords.kwid) AS keywords_nb,
GROUP_CONCAT(keywords.kwid) AS keywords_ids, GROUP_CONCAT(keywords.kwid) AS keywords_ids,
GROUP_CONCAT(kwstr) AS keywords_list GROUP_CONCAT(kwstr) AS keywords_list
FROM scholars FROM (
JOIN sch_kw SELECT
ON luid = uid scholars.*,
JOIN keywords affiliations.*
FROM scholars
LEFT JOIN affiliations
ON scholars.affiliation_id = affiliations.affid
WHERE (record_status = 'active'
OR (record_status = 'legacy' AND valid_date >= NOW()))
) AS scholars_and_affiliations
LEFT JOIN sch_kw
ON sch_kw.uid = scholars_and_affiliations.luid
LEFT JOIN keywords
ON sch_kw.kwid = keywords.kwid ON sch_kw.kwid = keywords.kwid
LEFT JOIN affiliations
ON affiliation_id = affid
WHERE luid = "{$scholar_id}"
AND (record_status = 'active'
OR (record_status = 'legacy' AND valid_date >= NOW()))
GROUP BY luid GROUP BY luid
END_QUERY; END_QUERY;
......
<?php <?php
/* /*
* Génère le json des scholars à partir de la base mysql * Génère les agrégations pour les filtres à partir de la base mysql
* £TODO remplacer les appels à search_filter par des appels à api/aggs
*/ */
include("php_library/parametres.php"); include("php_library/parametres.php");
include("php_library/normalize.php"); include("php_library/normalize.php");
...@@ -52,39 +53,51 @@ function filter_word($value) { ...@@ -52,39 +53,51 @@ function filter_word($value) {
// $req = "SELECT ".$cat." AS clef, count(".$cat.") AS value FROM scholars WHERE ".$cat." ".$query." GROUP BY ".$cat." ORDER BY value DESC"; // $req = "SELECT ".$cat." AS clef, count(".$cat.") AS value FROM scholars WHERE ".$cat." ".$query." GROUP BY ".$cat." ORDER BY value DESC";
// TODO differentiate req's target cols earlier: above in "if ($category == X)" // TODO differentiate req's target cols earlier: above in "if ($category == X)"
$req = <<<END_QUERY
$req = <<<END_SQL
SELECT
{$cat} AS clef,
count({$cat}) AS value
FROM (
SELECT SELECT
{$cat} AS clef, scholars_affiliations_and_keywords.*,
count({$cat}) AS value GROUP_CONCAT(htstr) AS hashtags_list
FROM ( FROM (
SELECT SELECT
-- we create all needed cats for the outer select scholars_and_affiliations.*,
-- ============================================== GROUP_CONCAT(kwstr) AS keywords_list
scholars.luid, FROM (
scholars.country, SELECT
affiliations.org, scholars.luid,
affiliations.team_lab, scholars.country,
GROUP_CONCAT(kwstr) AS keywords_list, affiliations.org,
GROUP_CONCAT(htstr) AS hashtags_list affiliations.team_lab
FROM scholars FROM scholars
LEFT JOIN affiliations
ON scholars.affiliation_id = affiliations.affid
WHERE (record_status = 'active'
OR (record_status = 'legacy' AND valid_date >= NOW()))
) AS scholars_and_affiliations
LEFT JOIN sch_kw LEFT JOIN sch_kw
ON sch_kw.uid = luid ON sch_kw.uid = scholars_and_affiliations.luid
LEFT JOIN keywords LEFT JOIN keywords
ON sch_kw.kwid = keywords.kwid ON sch_kw.kwid = keywords.kwid
LEFT JOIN sch_ht
ON sch_ht.uid = luid
LEFT JOIN hashtags
ON sch_ht.htid = hashtags.htid
LEFT JOIN affiliations
ON scholars.affiliation_id = affiliations.affid
WHERE (record_status = 'active'
OR (record_status = 'legacy' AND valid_date >= NOW()))
GROUP BY luid GROUP BY luid
) AS full_scholars_info
WHERE {$cat} {$query} -- <== our filter ) AS scholars_affiliations_and_keywords
GROUP BY $cat LEFT JOIN sch_ht
ORDER BY value DESC ; ON sch_ht.uid = luid
END_QUERY; LEFT JOIN hashtags
ON sch_ht.htid = hashtags.htid
GROUP BY luid
) AS scholars_info
WHERE {$cat} {$query} -- <== our filter
GROUP BY $cat
ORDER BY value DESC ;
END_SQL;
// echo $req; // echo $req;
$results = array(); $results = array();
......
...@@ -295,21 +295,28 @@ class MyExtractor: ...@@ -295,21 +295,28 @@ class MyExtractor:
for scholar_id in scholar_array: for scholar_id in scholar_array:
sql3=''' sql3='''
SELECT
scholars_and_affiliations.*,
COUNT(keywords.kwid) AS keywords_nb,
GROUP_CONCAT(keywords.kwid) AS keywords_ids,
GROUP_CONCAT(kwstr) AS keywords_list
FROM (
SELECT SELECT
scholars.*, scholars.*,
affiliations.*, affiliations.*
COUNT(keywords.kwid) AS keywords_nb,
GROUP_CONCAT(keywords.kwid) AS keywords_ids,
GROUP_CONCAT(kwstr) AS keywords_list
FROM scholars FROM scholars
LEFT JOIN sch_kw
ON uid = luid
LEFT JOIN keywords
ON sch_kw.kwid = keywords.kwid
LEFT JOIN affiliations LEFT JOIN affiliations
ON affiliation_id = affid ON scholars.affiliation_id = affiliations.affid
WHERE luid = "%s" WHERE (record_status = 'active'
GROUP BY luid ; OR (record_status = 'legacy' AND valid_date >= NOW()))
) AS scholars_and_affiliations
LEFT JOIN sch_kw
ON sch_kw.uid = scholars_and_affiliations.luid
LEFT JOIN keywords
ON sch_kw.kwid = keywords.kwid
WHERE luid = %s
GROUP BY luid ;
''' % scholar_id ''' % scholar_id
# debug # debug
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment