Commit 35f1f480 authored by delanoe's avatar delanoe

[FIX] simple score for cooc.

parent 6cfb53e5
......@@ -18,7 +18,7 @@ def do_cooc(corpus=None
, start=None, end=None
, limit=1000
, isMonopartite=True
, apax = 1):
, hapax = 3):
'''
Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id
For the moment list of paramters are not supported because, lists need to
......@@ -68,12 +68,13 @@ def do_cooc(corpus=None
doc_id = cache.NodeType['Document'].id
NodeNgramX = aliased(NodeNgram)
cooc_score = func.count(NodeNgramX.node_id).label('cooc_score')
#cooc_score = func.sqrt(func.sum(NodeNgramX.weight * NodeNgramY.weight)).label('cooc_score')
#print([n for n in test_query])
if isMonopartite :
NodeNgramX = aliased(NodeNgram)
NodeNgramY = aliased(NodeNgram)
cooc_score = func.sum(NodeNgramX.weight + NodeNgramY.weight).label('cooc_score')
#cooc_score = func.sqrt(func.sum(NodeNgramX.weight * NodeNgramY.weight)).label('cooc_score')
cooc_query = (session.query(NodeNgramX.ngram_id, NodeNgramY.ngram_id, cooc_score)
.join(Node, Node.id == NodeNgramX.node_id)
......@@ -82,7 +83,6 @@ def do_cooc(corpus=None
)
else :
NodeNgramY = aliased(NodeNgram)
cooc_score = func.sqrt(func.sum(NodeHyperdataNgram.score) * func.sum(NodeNgramY.weight)).label('cooc_score')
cooc_query = (session.query(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id, cooc_score)
.join(Node, Node.id == NodeHyperdataNgram.node_id)
......@@ -151,7 +151,7 @@ def do_cooc(corpus=None
# Cooc is symetric, take only the main cooccurrences and cut at the limit
cooc_query = cooc_query.filter(NodeNgramX.ngram_id < NodeNgramY.ngram_id)
cooc_query = cooc_query.having(cooc_score > apax)
cooc_query = cooc_query.having(cooc_score > hapax)
if isMonopartite:
cooc_query = cooc_query.group_by(NodeNgramX.ngram_id, NodeNgramY.ngram_id)
......
......@@ -117,7 +117,7 @@ def get_cooc(request=None, corpus=None
, field1='ngrams', field2='ngrams'
, cooc_id=None, type='node_link', size=1000
, start=None, end=None
, apax=1
, hapax=1
):
'''
get_ccoc : to compute the graph.
......@@ -140,7 +140,7 @@ def get_cooc(request=None, corpus=None
#cooc_id = get_or_create_node(nodetype='Cooccurrence', corpus=corpus).id
cooc_id = do_cooc(corpus=corpus, field1="ngrams", field2="ngrams"
, miam_id=miam_id, group_id=group_id, stop_id=stop_id, limit=size
, isMonopartite=True, start=start , end=end , apax=apax)
, isMonopartite=True, start=start , end=end , hapax=hapax)
G, partition, ids, weight = do_distance(cooc_id, field1="ngrams", field2="ngrams", isMonopartite=True)
......
......@@ -21,21 +21,21 @@ class Graph(APIView):
format_ = request.GET.get('format', 'json')
type_ = request.GET.get('type', 'node_link')
apax = request.GET.get('apax', 1)
hapax = request.GET.get('hapax', 1)
corpus = session.query(Node).filter(Node.id==corpus_id).first()
accepted_field1 = ['ngrams', 'journal', 'source', 'authors']
accepted_field2 = ['ngrams',]
options = ['start', 'end', 'apax']
options = ['start', 'end', 'hapax']
if field1 in accepted_field1 :
if field2 in accepted_field2 :
if start is not None and end is not None :
data = get_cooc(corpus=corpus,field1=field1, field2=field2, start=start, end=end, apax=apax)
data = get_cooc(corpus=corpus,field1=field1, field2=field2, start=start, end=end, hapax=hapax)
else:
data = get_cooc(corpus=corpus,field1=field1, field2=field2, apax=apax)
data = get_cooc(corpus=corpus,field1=field1, field2=field2, hapax=hapax)
if format_ == 'json':
return JsonHttpResponse(data)
else:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment