Commit 11a413f1 authored by PkSM3's avatar PkSM3

[BUGFIX] math domain error

parent a4c676fe
...@@ -374,41 +374,42 @@ def compute_tfidf(corpus): ...@@ -374,41 +374,42 @@ def compute_tfidf(corpus):
''' % (Node.__table__.name, Node_Ngram.__table__.name, corpus.id, )) ''' % (Node.__table__.name, Node_Ngram.__table__.name, corpus.id, ))
cursor.execute('SELECT COUNT(*) FROM tmp__st') cursor.execute('SELECT COUNT(*) FROM tmp__st')
D = cursor.fetchone()[0] D = cursor.fetchone()[0]
lnD = log(D) if D>0:
cursor.execute('UPDATE tmp__idf SET idf = idf + %f' % (lnD, )) lnD = log(D)
# show off cursor.execute('UPDATE tmp__idf SET idf = idf + %f' % (lnD, ))
dbg.show('insert tfidf for %d documents' % D) # show off
cursor.execute(''' dbg.show('insert tfidf for %d documents' % D)
INSERT INTO cursor.execute('''
%s (nodex_id, nodey_id, ngram_id, score) INSERT INTO
SELECT %s (nodex_id, nodey_id, ngram_id, score)
%d AS nodex_id, SELECT
tf.node_id AS nodey_id, %d AS nodex_id,
tf.ngram_id AS ngram_id, tf.node_id AS nodey_id,
(tf.frequency * idf.idf) AS score tf.ngram_id AS ngram_id,
FROM (tf.frequency * idf.idf) AS score
tmp__idf AS idf FROM
INNER JOIN tmp__idf AS idf
tmp__tf AS tf ON tf.ngram_id = idf.ngram_id INNER JOIN
''' % (NodeNodeNgram.__table__.name, corpus.id, )) tmp__tf AS tf ON tf.ngram_id = idf.ngram_id
# # show off ''' % (NodeNodeNgram.__table__.name, corpus.id, ))
# cursor.execute(''' # # show off
# SELECT # cursor.execute('''
# node.name, # SELECT
# ngram.terms, # node.name,
# node_node_ngram.score AS tfidf # ngram.terms,
# FROM # node_node_ngram.score AS tfidf
# %s AS node_node_ngram # FROM
# INNER JOIN # %s AS node_node_ngram
# %s AS node ON node.id = node_node_ngram.nodey_id # INNER JOIN
# INNER JOIN # %s AS node ON node.id = node_node_ngram.nodey_id
# %s AS ngram ON ngram.id = node_node_ngram.ngram_id # INNER JOIN
# WHERE # %s AS ngram ON ngram.id = node_node_ngram.ngram_id
# node_node_ngram.nodex_id = %d # WHERE
# ORDER BY # node_node_ngram.nodex_id = %d
# score DESC # ORDER BY
# ''' % (NodeNodeNgram.__table__.name, Node.__table__.name, Ngram.__table__.name, corpus.id, )) # score DESC
# for row in cursor.fetchall(): # ''' % (NodeNodeNgram.__table__.name, Node.__table__.name, Ngram.__table__.name, corpus.id, ))
# print(row) # for row in cursor.fetchall():
# the end! # print(row)
db.commit() # the end!
db.commit()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment