[BUG FIX] TFIDF computation with sqlalchemy (need optimization with session).

2ad6bbf2 · Administrator · 63e9bfb1 · 2ad6bbf2
Commit 2ad6bbf2 authored Jan 15, 2015 by Administrator
Hide whitespace changes
Inline Side-by-side

Showing with 23 additions and 23 deletions

functions.py analysis/functions.py +23 -23

No files found.
--- a/analysis/functions.py
+++ b/analysis/functions.py
@@ -244,31 +244,31 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150
    return data
-def tfidf(corpus, document, ngram):
+#def tfidf(corpus, document, ngram):
-    '''
+#    '''
-    Compute TF-IDF (Term Frequency - Inverse Document Frequency)
+#    Compute TF-IDF (Term Frequency - Inverse Document Frequency)
-    See: http://en.wikipedia.org/wiki/Tf%E2%80%93idf
+#    See: http://en.wikipedia.org/wiki/Tf%E2%80%93idf
-    '''
+#    '''
-    try:
+#    try:
-        occurences_of_ngram = Node_Ngram.objects.get(node=document, ngram=ngram).weight
+#        occurences_of_ngram = Node_Ngram.objects.get(node=document, ngram=ngram).weight
-        ngrams_by_document = sum([ x.weight for x in Node_Ngram.objects.filter(node=document)])
+#        ngrams_by_document = sum([ x.weight for x in Node_Ngram.objects.filter(node=document)])
-        term_frequency = occurences_of_ngram / ngrams_by_document
+#        term_frequency = occurences_of_ngram / ngrams_by_document
+#    
-        xx = Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")).count()
+#        xx = Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")).count()
-        yy = Node_Ngram.objects.filter(ngram=ngram).count() # filter: ON node.parent=corpus
+#        yy = Node_Ngram.objects.filter(ngram=ngram).count() # filter: ON node.parent=corpus
-        inverse_document_frequency= log(xx/yy)
+#        inverse_document_frequency= log(xx/yy)
+#        
-        # result = tf * idf
+#        # result = tf * idf
-        result = term_frequency * inverse_document_frequency
+#        result = term_frequency * inverse_document_frequency
-    except Exception as error:
+#    except Exception as error:
-        print(error, ngram)
+#        print(error, ngram)
-        result = 0
+#        result = 0
-    return result
+#    return result
+from analysis.tfidf import tfidf
 def do_tfidf(corpus, reset=True):
+    print("doing tfidf")
    with transaction.atomic():
        if reset==True:
            NodeNodeNgram.objects.filter(nodex=corpus).delete()