Commit ac885b90 authored by Romain Loth's avatar Romain Loth

dbdatapi: extract loop replaced by WHERE..IN (25 x faster)

parent 07eeea46
...@@ -585,58 +585,58 @@ class BipartiteExtractor: ...@@ -585,58 +585,58 @@ class BipartiteExtractor:
# mlog("DEBUG", "MySQL extract scholar_array:", scholar_array) # mlog("DEBUG", "MySQL extract scholar_array:", scholar_array)
# scholar_array = list(scholar_array.keys())[0:3] # scholar_array = list(scholar_array.keys())[0:3]
# TODO loop could be after SELECT
for scholar_id in scholar_array: sql3='''
sql3=''' SELECT
scholars_and_orgs.*,
COUNT(keywords.kwid) AS keywords_nb,
GROUP_CONCAT(keywords.kwid) AS keywords_ids,
GROUP_CONCAT(kwstr) AS keywords_list
FROM (
SELECT SELECT
scholars_and_orgs.*, scholars_and_insts.*,
COUNT(keywords.kwid) AS keywords_nb, -- small serializations here to avoid 2nd query
GROUP_CONCAT(keywords.kwid) AS keywords_ids, GROUP_CONCAT(
GROUP_CONCAT(kwstr) AS keywords_list JSON_ARRAY(labs.name, labs.acro, labs.locname)
) AS labs_list
FROM ( FROM (
SELECT SELECT
scholars_and_insts.*, scholars.*,
-- small serializations here to avoid 2nd query
GROUP_CONCAT( GROUP_CONCAT(
JSON_ARRAY(labs.name, labs.acro, labs.locname) JSON_ARRAY(insts.name, insts.acro, insts.locname)
) AS labs_list ) AS insts_list
FROM ( FROM
SELECT scholars
scholars.*, LEFT JOIN sch_org ON luid = sch_org.uid
GROUP_CONCAT( LEFT JOIN (
JSON_ARRAY(insts.name, insts.acro, insts.locname) SELECT * FROM orgs WHERE class = 'inst'
) AS insts_list ) AS insts ON sch_org.orgid = insts.orgid
FROM WHERE (record_status = 'active'
scholars OR (record_status = 'legacy' AND valid_date >= NOW()))
LEFT JOIN sch_org ON luid = sch_org.uid
LEFT JOIN (
SELECT * FROM orgs WHERE class = 'inst'
) AS insts ON sch_org.orgid = insts.orgid
WHERE (record_status = 'active'
OR (record_status = 'legacy' AND valid_date >= NOW()))
GROUP BY luid
) AS scholars_and_insts
LEFT JOIN sch_org ON luid = sch_org.uid
LEFT JOIN (
SELECT * FROM orgs WHERE class = 'lab'
) AS labs ON sch_org.orgid = labs.orgid
GROUP BY luid GROUP BY luid
) AS scholars_and_orgs ) AS scholars_and_insts
LEFT JOIN sch_org ON luid = sch_org.uid
LEFT JOIN sch_kw LEFT JOIN (
ON sch_kw.uid = scholars_and_orgs.luid SELECT * FROM orgs WHERE class = 'lab'
LEFT JOIN keywords ) AS labs ON sch_org.orgid = labs.orgid
ON sch_kw.kwid = keywords.kwid GROUP BY luid
WHERE luid = %s ) AS scholars_and_orgs
GROUP BY luid ;
''' % scholar_id LEFT JOIN sch_kw
ON sch_kw.uid = scholars_and_orgs.luid
# debug LEFT JOIN keywords
# mlog("DEBUG", "db.extract: sql3="+sql3) ON sch_kw.kwid = keywords.kwid
WHERE luid IN %s
try: GROUP BY luid ;
self.cursor.execute(sql3) ''' % ('('+','.join(map(str, list(scholar_array.keys())))+')')
res3=self.cursor.fetchone()
# debug
mlog("DEBUG", "db.extract: sql3="+sql3)
try:
self.cursor.execute(sql3)
for res3 in self.cursor:
info = {}; info = {};
# semantic short ID # semantic short ID
...@@ -696,15 +696,14 @@ class BipartiteExtractor: ...@@ -696,15 +696,14 @@ class BipartiteExtractor:
if info['keywords_nb']>0: if info['keywords_nb']>0:
self.scholars[ide] = info; self.scholars[ide] = info;
except Exception as error: except Exception as error:
mlog("ERROR", "===== extract ERROR ====") mlog("ERROR", "===== extract ERROR ====")
mlog("ERROR", "extract on scholar no %s" % str(scholar_id)) mlog("ERROR", "extract on scholar no %s" % str(scholar_id))
if sql3 != None: if sql3 != None:
mlog("ERROR", "extract attempted SQL query:\t"+sql3) mlog("ERROR", "extract attempted SQL query:\t"+sql3)
mlog("ERROR", repr(error) + "("+error.__doc__+")") mlog("ERROR", repr(error) + "("+error.__doc__+")")
mlog("ERROR", "stack (\n\t"+"\t".join(format_tb(error.__traceback__))+"\n)") mlog("ERROR", "stack (\n\t"+"\t".join(format_tb(error.__traceback__))+"\n)")
mlog("ERROR", "===== /extract ERROR ====") mlog("ERROR", "===== /extract ERROR ====")
# génère le gexf # génère le gexf
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment