Commit f4fceb54 authored by PkSM3's avatar PkSM3

[UPDATE] table by occs finished!

parent 30b37b48
......@@ -80,8 +80,8 @@ urlpatterns = patterns('',
url(r'^tests/project/(\d+)/ISTEXquery/go$', pubmedscrapper.testISTEX),
url(r'^tests/paginator/corpus/(\d+)/$', views.newpaginatorJSON),
url(r'^tests/move2trash/$' , views.move_to_trash_multiple ),
url(r'^project/(\d+)/corpus/(\d+)/ngrams/ngrams.json$', samtest.test_ngrams)
# url(r'^project/(\d+)/corpus/(\d+)/ngrams$', views.get_ngrams),
url(r'^project/(\d+)/corpus/(\d+)/ngrams/ngrams.json$', samtest.test_ngrams),
url(r'^project/(\d+)/corpus/(\d+)/ngrams$', samtest.get_ngrams)
)
......
This diff is collapsed.
......@@ -174,9 +174,11 @@ def test_ngrams(request , project_id, corpus_id ):
# ## Getting the unique number of OCCS /> ##
Sum = 0
NgramTFIDF = session.query(NodeNodeNgram).filter( NodeNodeNgram.nodex_id==corpus_id ).all()
for ngram in NgramTFIDF:
Ngrams_Scores[ngram.ngram_id]["scores"]["tfidf_sum"] += ngram.score
Sum += Ngrams_Scores[ngram.ngram_id]["scores"]["occ_uniq"]
# print( "docid:", ngram.nodey_id , ngram.ngram_id , ngram.score)
......@@ -195,25 +197,34 @@ def test_ngrams(request , project_id, corpus_id ):
ngrams_ids = Ngrams_Scores.keys()
import math
occs_threshold = math.sqrt(Sum / len(ngrams_ids))
print("excluding ngrams with OCCs <",occs_threshold)
Metrics = {
"ngrams":[],
"scores": {
"nb_docs":len(documents),
"nb_ngrams":len(ngrams_ids)
}
"scores": {}
}
query = session.query(Ngram).filter(Ngram.id.in_( ngrams_ids ))
ngrams_data = query.all()
for ngram in ngrams_data:
Ngrams_Scores[ngram.id]["name"] = ngram.terms
Ngrams_Scores[ngram.id]["id"] = ngram.id
Metrics["ngrams"].append( Ngrams_Scores[ngram.id] )
if Ngrams_Scores[ngram.id]["scores"]["occ_uniq"] > occs_threshold:
Ngrams_Scores[ngram.id]["name"] = ngram.terms
Ngrams_Scores[ngram.id]["id"] = ngram.id
Metrics["ngrams"].append( Ngrams_Scores[ngram.id] )
Metrics["scores"] = {
"nb_docs":len(documents),
"orig_nb_ngrams":len(ngrams_ids),
"nb_ngrams":len(Metrics["ngrams"]),
"occs_threshold":occs_threshold
}
return JsonHttpResponse(Metrics)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment