Commit ea7b4d95 authored by Administrator's avatar Administrator

Merge branch 'stable' into tina

parents b28dd82b 5c7d269d
...@@ -244,31 +244,31 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150 ...@@ -244,31 +244,31 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150
return data return data
def tfidf(corpus, document, ngram): #def tfidf(corpus, document, ngram):
''' # '''
Compute TF-IDF (Term Frequency - Inverse Document Frequency) # Compute TF-IDF (Term Frequency - Inverse Document Frequency)
See: http://en.wikipedia.org/wiki/Tf%E2%80%93idf # See: http://en.wikipedia.org/wiki/Tf%E2%80%93idf
''' # '''
try: # try:
occurences_of_ngram = Node_Ngram.objects.get(node=document, ngram=ngram).weight # occurences_of_ngram = Node_Ngram.objects.get(node=document, ngram=ngram).weight
ngrams_by_document = sum([ x.weight for x in Node_Ngram.objects.filter(node=document)]) # ngrams_by_document = sum([ x.weight for x in Node_Ngram.objects.filter(node=document)])
term_frequency = occurences_of_ngram / ngrams_by_document # term_frequency = occurences_of_ngram / ngrams_by_document
#
xx = Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")).count() # xx = Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")).count()
yy = Node_Ngram.objects.filter(ngram=ngram).count() # filter: ON node.parent=corpus # yy = Node_Ngram.objects.filter(ngram=ngram).count() # filter: ON node.parent=corpus
inverse_document_frequency= log(xx/yy) # inverse_document_frequency= log(xx/yy)
#
# result = tf * idf # # result = tf * idf
result = term_frequency * inverse_document_frequency # result = term_frequency * inverse_document_frequency
except Exception as error: # except Exception as error:
print(error, ngram) # print(error, ngram)
result = 0 # result = 0
return result # return result
from analysis.tfidf import tfidf
def do_tfidf(corpus, reset=True): def do_tfidf(corpus, reset=True):
print("doing tfidf")
with transaction.atomic(): with transaction.atomic():
if reset==True: if reset==True:
NodeNodeNgram.objects.filter(nodex=corpus).delete() NodeNodeNgram.objects.filter(nodex=corpus).delete()
......
...@@ -424,10 +424,11 @@ def subcorpus(request, project_id, corpus_id, start , end ): ...@@ -424,10 +424,11 @@ def subcorpus(request, project_id, corpus_id, start , end ):
project = Node.objects.get(id=project_id) project = Node.objects.get(id=project_id)
corpus = Node.objects.get(id=corpus_id) corpus = Node.objects.get(id=corpus_id)
type_document = NodeType.objects.get(name="Document")
# retrieving all the documents # retrieving all the documents
documents = corpus.children.all() # documents = corpus.children.all()
number = corpus.children.count() documents = corpus.__class__.objects.filter(parent_id=corpus_id , type = type_document )
number = len(documents)
filtered_docs = [] filtered_docs = []
# filtering documents by range-date # filtering documents by range-date
...@@ -492,10 +493,11 @@ def subcorpusJSON(request, project_id, corpus_id, start , end ): ...@@ -492,10 +493,11 @@ def subcorpusJSON(request, project_id, corpus_id, start , end ):
project = Node.objects.get(id=project_id) project = Node.objects.get(id=project_id)
corpus = Node.objects.get(id=corpus_id) corpus = Node.objects.get(id=corpus_id)
type_document = NodeType.objects.get(name="Document")
# retrieving all the documents # retrieving all the documents
documents = corpus.children.all() # documents = corpus.children.all()
number = corpus.children.count() documents = corpus.__class__.objects.filter(parent_id=corpus_id , type = type_document )
number = len(documents)
filtered_docs = [] filtered_docs = []
# filtering documents by range-date # filtering documents by range-date
......
...@@ -20,8 +20,10 @@ def notify_user(username, email, password): ...@@ -20,8 +20,10 @@ def notify_user(username, email, password):
''' % (username, password) ''' % (username, password)
#send_mail('[Gargantext] Votre compte', message, 'alexandre.delanoe@mines-paristech.fr', [email], fail_silently=False ) send_mail('[Gargantext] Création de votre compte', message, 'alexandre.delanoe@mines-paristech.fr', [email], fail_silently=False )
send_mail('[Gargantext] Votre compte', message, 'alexandre.delanoe@mines-paristech.fr', [email], ['alexandre@delanoe.org'] ) #send_mail('[Gargantext] Votre compte', message, 'alexandre.delanoe@mines-paristech.fr', [email], ['alexandre@delanoe.org'] )
# add option for mass sending email # add option for mass sending email
def create_user(username, email, password=None, active=False, notify=True): def create_user(username, email, password=None, active=False, notify=True):
...@@ -58,7 +60,7 @@ def mines_account_creation(fichier=None): ...@@ -58,7 +60,7 @@ def mines_account_creation(fichier=None):
accounts = open(fichier, "r") accounts = open(fichier, "r")
for line in accounts.readlines(): for line in accounts.readlines():
username, email, password = line.split(',') username, email, password = line.split(',')
#create_user(username, email, password=password, notify=True) create_user(username, email, password=password, notify=True)
delete_user(username) #delete_user(username)
fichier.close() fichier.close()
...@@ -172,7 +172,7 @@ function dateToInt(todayTime) { ...@@ -172,7 +172,7 @@ function dateToInt(todayTime) {
// I've clicked "Read Documents": // I've clicked "Read Documents":
function updateDocuments(pagenumber,pagenav) { function updateDocuments(pagenumber,pagenav) {
pagenav = (pagenav)?pagenav:true; pagenav = (pagenav)?pagenav:true;
pagenumber = (pagenumber)?pagenumber:1; pagenumber = (pagenumber)?pagenumber:1;
pr("in here pagenav:"+pagenav+" - pagenumber:"+pagenumber) pr("in here pagenav:"+pagenav+" - pagenumber:"+pagenumber)
...@@ -230,7 +230,7 @@ function updateDocuments(pagenumber,pagenav) { ...@@ -230,7 +230,7 @@ function updateDocuments(pagenumber,pagenav) {
var current_docs = {} var current_docs = {}
var BIS_dict = {} var BIS_dict = {}
var corpusid = window.location.href.split("corpus")[1].replace(/\//g, '')//replace all the slashes var corpusid = window.location.href.split("corpus")[1].replace(/\//g, '')//replace all the slashes
var theurl = "http://localhost:8000/api/nodes/"+corpusid+"/children/duplicates?keys=title&limit=9999" var theurl = "/api/nodes/"+corpusid+"/children/duplicates?keys=title&limit=9999"
$.ajax({ $.ajax({
url: theurl, url: theurl,
success: function(data) { success: function(data) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment