Commit 39033a12 authored by Romain Loth's avatar Romain Loth

we cascade the LEFT JOIN-ed queries to ensure correct cardinality of records...

we cascade the LEFT JOIN-ed queries to ensure correct cardinality of records (probably not necessary when joining affiliations + keywords because affiliations are 1 to 1, but definitely needed when joining affs + kws + hashtags)
parent a255158f
......@@ -268,33 +268,41 @@ else {
$filter = "";
}
$sql = <<< END_QUERY
SELECT * FROM
(SELECT
scholars.*,
affiliations.*,
COUNT(keywords.kwid) AS keywords_nb,
GROUP_CONCAT(keywords.kwid) AS keywords_ids,
GROUP_CONCAT(kwstr) AS keywords_list,
GROUP_CONCAT(hashtags.htid) AS hashtags_ids,
SELECT * FROM (
SELECT
scholars_affiliations_and_keywords.*,
GROUP_CONCAT(htstr) AS hashtags_list
FROM scholars
LEFT JOIN sch_kw
ON sch_kw.uid = luid
LEFT JOIN keywords
ON sch_kw.kwid = keywords.kwid
FROM (
SELECT
scholars_and_affiliations.*,
GROUP_CONCAT(kwstr) AS keywords_list
FROM (
SELECT
scholars.*,
affiliations.*
FROM scholars
LEFT JOIN affiliations
ON scholars.affiliation_id = affiliations.affid
WHERE (record_status = 'active'
OR (record_status = 'legacy' AND valid_date >= NOW()))
) AS scholars_and_affiliations
LEFT JOIN sch_kw
ON sch_kw.uid = scholars_and_affiliations.luid
LEFT JOIN keywords
ON sch_kw.kwid = keywords.kwid
GROUP BY luid
) AS scholars_affiliations_and_keywords
LEFT JOIN sch_ht
ON sch_ht.uid = luid
LEFT JOIN hashtags
ON sch_ht.htid = hashtags.htid
LEFT JOIN affiliations
ON affiliation_id = affid
WHERE (record_status = 'active'
OR (record_status = 'legacy' AND valid_date >= NOW()))
GROUP BY luid) AS full_scholars_info
GROUP BY luid
) AS full_scholars_info
{$filter}
END_QUERY;
// debug
// echo '<p style="color:white">query:'. $sql ."<p>";
......
......@@ -115,21 +115,25 @@ foreach ($scholar_id_array as $scholar_id){
// £TODO do it at once with previous SELECT !!
$sql = <<< END_QUERY
SELECT
scholars.*,
affiliations.*,
scholars_and_affiliations.*,
COUNT(keywords.kwid) AS keywords_nb,
GROUP_CONCAT(keywords.kwid) AS keywords_ids,
GROUP_CONCAT(kwstr) AS keywords_list
FROM scholars
JOIN sch_kw
ON luid = uid
JOIN keywords
FROM (
SELECT
scholars.*,
affiliations.*
FROM scholars
LEFT JOIN affiliations
ON scholars.affiliation_id = affiliations.affid
WHERE (record_status = 'active'
OR (record_status = 'legacy' AND valid_date >= NOW()))
) AS scholars_and_affiliations
LEFT JOIN sch_kw
ON sch_kw.uid = scholars_and_affiliations.luid
LEFT JOIN keywords
ON sch_kw.kwid = keywords.kwid
LEFT JOIN affiliations
ON affiliation_id = affid
WHERE luid = "{$scholar_id}"
AND (record_status = 'active'
OR (record_status = 'legacy' AND valid_date >= NOW()))
GROUP BY luid
END_QUERY;
......
<?php
/*
* Génère le json des scholars à partir de la base mysql
* Génère les agrégations pour les filtres à partir de la base mysql
* £TODO remplacer les appels à search_filter par des appels à api/aggs
*/
include("php_library/parametres.php");
include("php_library/normalize.php");
......@@ -52,39 +53,51 @@ function filter_word($value) {
// $req = "SELECT ".$cat." AS clef, count(".$cat.") AS value FROM scholars WHERE ".$cat." ".$query." GROUP BY ".$cat." ORDER BY value DESC";
// TODO differentiate req's target cols earlier: above in "if ($category == X)"
$req = <<<END_QUERY
$req = <<<END_SQL
SELECT
{$cat} AS clef,
count({$cat}) AS value
FROM (
SELECT
{$cat} AS clef,
count({$cat}) AS value
scholars_affiliations_and_keywords.*,
GROUP_CONCAT(htstr) AS hashtags_list
FROM (
SELECT
-- we create all needed cats for the outer select
-- ==============================================
scholars.luid,
scholars.country,
affiliations.org,
affiliations.team_lab,
GROUP_CONCAT(kwstr) AS keywords_list,
GROUP_CONCAT(htstr) AS hashtags_list
FROM scholars
scholars_and_affiliations.*,
GROUP_CONCAT(kwstr) AS keywords_list
FROM (
SELECT
scholars.luid,
scholars.country,
affiliations.org,
affiliations.team_lab
FROM scholars
LEFT JOIN affiliations
ON scholars.affiliation_id = affiliations.affid
WHERE (record_status = 'active'
OR (record_status = 'legacy' AND valid_date >= NOW()))
) AS scholars_and_affiliations
LEFT JOIN sch_kw
ON sch_kw.uid = luid
ON sch_kw.uid = scholars_and_affiliations.luid
LEFT JOIN keywords
ON sch_kw.kwid = keywords.kwid
LEFT JOIN sch_ht
ON sch_ht.uid = luid
LEFT JOIN hashtags
ON sch_ht.htid = hashtags.htid
LEFT JOIN affiliations
ON scholars.affiliation_id = affiliations.affid
WHERE (record_status = 'active'
OR (record_status = 'legacy' AND valid_date >= NOW()))
GROUP BY luid
) AS full_scholars_info
WHERE {$cat} {$query} -- <== our filter
GROUP BY $cat
ORDER BY value DESC ;
END_QUERY;
) AS scholars_affiliations_and_keywords
LEFT JOIN sch_ht
ON sch_ht.uid = luid
LEFT JOIN hashtags
ON sch_ht.htid = hashtags.htid
GROUP BY luid
) AS scholars_info
WHERE {$cat} {$query} -- <== our filter
GROUP BY $cat
ORDER BY value DESC ;
END_SQL;
// echo $req;
$results = array();
......
......@@ -295,21 +295,28 @@ class MyExtractor:
for scholar_id in scholar_array:
sql3='''
SELECT
scholars_and_affiliations.*,
COUNT(keywords.kwid) AS keywords_nb,
GROUP_CONCAT(keywords.kwid) AS keywords_ids,
GROUP_CONCAT(kwstr) AS keywords_list
FROM (
SELECT
scholars.*,
affiliations.*,
COUNT(keywords.kwid) AS keywords_nb,
GROUP_CONCAT(keywords.kwid) AS keywords_ids,
GROUP_CONCAT(kwstr) AS keywords_list
affiliations.*
FROM scholars
LEFT JOIN sch_kw
ON uid = luid
LEFT JOIN keywords
ON sch_kw.kwid = keywords.kwid
LEFT JOIN affiliations
ON affiliation_id = affid
WHERE luid = "%s"
GROUP BY luid ;
ON scholars.affiliation_id = affiliations.affid
WHERE (record_status = 'active'
OR (record_status = 'legacy' AND valid_date >= NOW()))
) AS scholars_and_affiliations
LEFT JOIN sch_kw
ON sch_kw.uid = scholars_and_affiliations.luid
LEFT JOIN keywords
ON sch_kw.kwid = keywords.kwid
WHERE luid = %s
GROUP BY luid ;
''' % scholar_id
# debug
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment