Commit 160f8070 authored by Romain Loth's avatar Romain Loth

SQL terms statement: removing chunking strategy altogether (not necessary with...

SQL terms statement: removing chunking strategy altogether (not necessary with our db size, restore if scale up)
parent d44a53af
...@@ -122,19 +122,6 @@ class extract: ...@@ -122,19 +122,6 @@ class extract:
print(error) print(error)
def chunks(self,l, n):
"""
l is a dict.keys(): iterable but not subscriptable
"""
i = 0
chunk = []
for key in l:
i+=1
chunk.append(key)
if i % n == 0:
yield chunk
chunk = []
def extract(self,scholar_array): def extract(self,scholar_array):
""" """
Adding each connected scholar per unique_id Adding each connected scholar per unique_id
...@@ -236,29 +223,20 @@ class extract: ...@@ -236,29 +223,20 @@ class extract:
## print(sql) ## print(sql)
## print("nb terms:",len(termsMatrix)) ## print("nb terms:",len(termsMatrix))
sqlarray = [] query = "SELECT term,id,occurrences FROM terms WHERE id IN "
chunkedTerms = list(self.chunks(termsMatrix.keys(), 500)) conditions = ' (' + ','.join(sorted(list(termsMatrix))) + ')'
for chunk_i in chunkedTerms:
if len(chunk_i)>0: # debug
# TODO temporary table + JOIN would be faster than IN # print("SQL query ===============================")
query = "SELECT term,id,occurrences FROM terms WHERE id IN " # print(query+conditions)
conditions = ' (' + ','.join(sorted(chunk_i)) + ')' # print("/SQL query ==============================")
sqlarray.append(query+conditions) for res in self.cursor.execute(query+conditions):
idT = res['id']
info = {}
for sql in sqlarray: info['id'] = idT
# debug info['occurrences'] = res['occurrences']
# print("SQL query ===============================") info['term'] = res['term']
# print(sql) self.terms_array[idT] = info
# print("/SQL query ==============================")
for res in self.cursor.execute(sql):
print(res)
idT = res['id']
info = {}
info['id'] = idT
info['occurrences'] = res['occurrences']
info['term'] = res['term']
self.terms_array[idT] = info
count=1 count=1
for term in self.terms_array: for term in self.terms_array:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment