Commit 19d5b9d9 authored by PkSM3's avatar PkSM3

Merge branch 'unstable' of ssh://delanoe.org:1979/gargantext into samuel

parents 8a0c074f 22bff135
...@@ -18,7 +18,7 @@ def do_cooc(corpus=None ...@@ -18,7 +18,7 @@ def do_cooc(corpus=None
, start=None, end=None , start=None, end=None
, limit=1000 , limit=1000
, isMonopartite=True , isMonopartite=True
, apax = 1): , hapax = 3):
''' '''
Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id
For the moment list of paramters are not supported because, lists need to For the moment list of paramters are not supported because, lists need to
...@@ -67,13 +67,14 @@ def do_cooc(corpus=None ...@@ -67,13 +67,14 @@ def do_cooc(corpus=None
session.commit() session.commit()
doc_id = cache.NodeType['Document'].id doc_id = cache.NodeType['Document'].id
NodeNgramX = aliased(NodeNgram)
cooc_score = func.count(NodeNgramX.node_id).label('cooc_score')
#cooc_score = func.sqrt(func.sum(NodeNgramX.weight * NodeNgramY.weight)).label('cooc_score')
#print([n for n in test_query]) #print([n for n in test_query])
if isMonopartite : if isMonopartite :
NodeNgramX = aliased(NodeNgram)
NodeNgramY = aliased(NodeNgram) NodeNgramY = aliased(NodeNgram)
cooc_score = func.sum(NodeNgramX.weight + NodeNgramY.weight).label('cooc_score')
#cooc_score = func.sqrt(func.sum(NodeNgramX.weight * NodeNgramY.weight)).label('cooc_score')
cooc_query = (session.query(NodeNgramX.ngram_id, NodeNgramY.ngram_id, cooc_score) cooc_query = (session.query(NodeNgramX.ngram_id, NodeNgramY.ngram_id, cooc_score)
.join(Node, Node.id == NodeNgramX.node_id) .join(Node, Node.id == NodeNgramX.node_id)
...@@ -82,7 +83,6 @@ def do_cooc(corpus=None ...@@ -82,7 +83,6 @@ def do_cooc(corpus=None
) )
else : else :
NodeNgramY = aliased(NodeNgram) NodeNgramY = aliased(NodeNgram)
cooc_score = func.sqrt(func.sum(NodeHyperdataNgram.score) * func.sum(NodeNgramY.weight)).label('cooc_score')
cooc_query = (session.query(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id, cooc_score) cooc_query = (session.query(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id, cooc_score)
.join(Node, Node.id == NodeHyperdataNgram.node_id) .join(Node, Node.id == NodeHyperdataNgram.node_id)
...@@ -151,7 +151,7 @@ def do_cooc(corpus=None ...@@ -151,7 +151,7 @@ def do_cooc(corpus=None
# Cooc is symetric, take only the main cooccurrences and cut at the limit # Cooc is symetric, take only the main cooccurrences and cut at the limit
cooc_query = cooc_query.filter(NodeNgramX.ngram_id < NodeNgramY.ngram_id) cooc_query = cooc_query.filter(NodeNgramX.ngram_id < NodeNgramY.ngram_id)
cooc_query = cooc_query.having(cooc_score > apax) cooc_query = cooc_query.having(cooc_score > hapax)
if isMonopartite: if isMonopartite:
cooc_query = cooc_query.group_by(NodeNgramX.ngram_id, NodeNgramY.ngram_id) cooc_query = cooc_query.group_by(NodeNgramX.ngram_id, NodeNgramY.ngram_id)
......
...@@ -117,7 +117,7 @@ def get_cooc(request=None, corpus=None ...@@ -117,7 +117,7 @@ def get_cooc(request=None, corpus=None
, field1='ngrams', field2='ngrams' , field1='ngrams', field2='ngrams'
, cooc_id=None, type='node_link', size=1000 , cooc_id=None, type='node_link', size=1000
, start=None, end=None , start=None, end=None
, apax=1 , hapax=1
): ):
''' '''
get_ccoc : to compute the graph. get_ccoc : to compute the graph.
...@@ -140,7 +140,7 @@ def get_cooc(request=None, corpus=None ...@@ -140,7 +140,7 @@ def get_cooc(request=None, corpus=None
#cooc_id = get_or_create_node(nodetype='Cooccurrence', corpus=corpus).id #cooc_id = get_or_create_node(nodetype='Cooccurrence', corpus=corpus).id
cooc_id = do_cooc(corpus=corpus, field1="ngrams", field2="ngrams" cooc_id = do_cooc(corpus=corpus, field1="ngrams", field2="ngrams"
, miam_id=miam_id, group_id=group_id, stop_id=stop_id, limit=size , miam_id=miam_id, group_id=group_id, stop_id=stop_id, limit=size
, isMonopartite=True, start=start , end=end , apax=apax) , isMonopartite=True, start=start , end=end , hapax=hapax)
G, partition, ids, weight = do_distance(cooc_id, field1="ngrams", field2="ngrams", isMonopartite=True) G, partition, ids, weight = do_distance(cooc_id, field1="ngrams", field2="ngrams", isMonopartite=True)
......
...@@ -95,7 +95,7 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem'): ...@@ -95,7 +95,7 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem'):
) )
ngrams = [n for n in ngrams if n not in stops] ngrams = [n for n in ngrams if n not in stops]
print(ngrams) #print(ngrams)
#group = defaultdict(lambda : defaultdict()) #group = defaultdict(lambda : defaultdict())
ids_dict = dict() ids_dict = dict()
mainform_dict = dict() mainform_dict = dict()
......
...@@ -21,21 +21,21 @@ class Graph(APIView): ...@@ -21,21 +21,21 @@ class Graph(APIView):
format_ = request.GET.get('format', 'json') format_ = request.GET.get('format', 'json')
type_ = request.GET.get('type', 'node_link') type_ = request.GET.get('type', 'node_link')
apax = request.GET.get('apax', 1) hapax = request.GET.get('hapax', 1)
corpus = session.query(Node).filter(Node.id==corpus_id).first() corpus = session.query(Node).filter(Node.id==corpus_id).first()
accepted_field1 = ['ngrams', 'journal', 'source', 'authors'] accepted_field1 = ['ngrams', 'journal', 'source', 'authors']
accepted_field2 = ['ngrams',] accepted_field2 = ['ngrams',]
options = ['start', 'end', 'apax'] options = ['start', 'end', 'hapax']
if field1 in accepted_field1 : if field1 in accepted_field1 :
if field2 in accepted_field2 : if field2 in accepted_field2 :
if start is not None and end is not None : if start is not None and end is not None :
data = get_cooc(corpus=corpus,field1=field1, field2=field2, start=start, end=end, apax=apax) data = get_cooc(corpus=corpus,field1=field1, field2=field2, start=start, end=end, hapax=hapax)
else: else:
data = get_cooc(corpus=corpus,field1=field1, field2=field2, apax=apax) data = get_cooc(corpus=corpus,field1=field1, field2=field2, hapax=hapax)
if format_ == 'json': if format_ == 'json':
return JsonHttpResponse(data) return JsonHttpResponse(data)
else: else:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment