Commit ad9f05e7 authored by Romain Loth's avatar Romain Loth

merge unstable into romain (1/2)

parents ca20590a 19fe1edb
from admin.utils import PrintException from admin.utils import PrintException
from gargantext_web.db import * from gargantext_web.db import *
from gargantext_web.db import get_or_create_node
from collections import defaultdict from collections import defaultdict
from operator import itemgetter from operator import itemgetter
...@@ -8,18 +9,13 @@ from django.db import connection, transaction ...@@ -8,18 +9,13 @@ from django.db import connection, transaction
import math import math
from math import log,sqrt from math import log,sqrt
import numpy as np
import scipy import scipy
from gargantext_web.db import get_or_create_node
import pandas as pd import pandas as pd
from copy import copy from copy import copy
import numpy as np
import scipy
import networkx as nx import networkx as nx
from networkx.readwrite import json_graph from networkx.readwrite import json_graph
from rest_v1_0.api import JsonHttpResponse
from analysis.louvain import best_partition, generate_dendogram, partition_at_level from analysis.louvain import best_partition, generate_dendogram, partition_at_level
......
select nn.ngram_id from node_node_ngram as nn
JOIN node_node as n on nn.node_id = n.id
JOIN node_ngram as ng ON ng.id = nn.ngram_id
JOIN node_node_ngram as nng ON nng.ngram_id = nn.ngram_id
JOIN node_node as nodeList on nodeList.id = nn.node_id
JOIN node_nodetype as typ on typ.id = nodeList.type_id
WHERE n.parent_id = 1452569
AND ng.terms = 'moral support'
AND typ.name = 'MiamList'
GROUP BY nn.ngram_id
;
select * from node_nodengramngram as nnn
JOIN node_node as n on nnn.node_id = n.id
JOIN node_ngram as ng ON ng.id = nnn.ngramy_id
JOIN node_node as nodeList on nodeList.id = nnn.node_id
JOIN node_nodetype as typ on typ.id = nodeList.type_id
WHERE n.parent_id = 1452569
AND ng.terms = 'moral support'
AND typ.name = 'GroupList'
;
...@@ -70,7 +70,6 @@ def compute_mapList(corpus,limit=500,n=1): ...@@ -70,7 +70,6 @@ def compute_mapList(corpus,limit=500,n=1):
.filter(NodeNgramNgram.node_id == node_group.id) .filter(NodeNgramNgram.node_id == node_group.id)
.all() .all()
) )
#print([t for t in top_ngrams])
node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus) node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus)
session.query(NodeNgram).filter(NodeNgram.node_id==node_mapList.id).delete() session.query(NodeNgram).filter(NodeNgram.node_id==node_mapList.id).delete()
...@@ -79,7 +78,7 @@ def compute_mapList(corpus,limit=500,n=1): ...@@ -79,7 +78,7 @@ def compute_mapList(corpus,limit=500,n=1):
data = zip( data = zip(
[node_mapList.id for i in range(1,limit)] [node_mapList.id for i in range(1,limit)]
, [n[0] for n in list(top_multigrams) + list(top_monograms) , [n[0] for n in list(top_multigrams) + list(top_monograms)
if (n[0],) not in list(stop_ngrams) + list(grouped_ngrams) if (n[0],) not in list(stop_ngrams)
] ]
, [1 for i in range(1,limit)] , [1 for i in range(1,limit)]
) )
......
from .NgramsExtractor import NgramsExtractor from .NgramsExtractor import NgramsExtractor
from ..Taggers import TurboTagger #NltkTagger from ..Taggers import TurboTagger
# from ..Taggers import NltkTagger
class TurboNgramsExtractor(NgramsExtractor): class TurboNgramsExtractor(NgramsExtractor):
......
...@@ -35,11 +35,11 @@ class Graph(APIView): ...@@ -35,11 +35,11 @@ class Graph(APIView):
if field1 in accepted_field1 : if field1 in accepted_field1 :
if field2 in accepted_field2 : if field2 in accepted_field2 :
if start is not None and end is not None : if start is not None and end is not None :
data = get_cooc(corpus=corpus,field1=field1, field2=field2 data = get_cooc(corpus=corpus, field1=field1, field2=field2
, start=start, end=end , start=start, end=end
, hapax=hapax, distance=distance) , hapax=hapax, distance=distance)
else: else:
data = get_cooc(corpus=corpus,field1=field1, field2=field2 data = get_cooc(corpus=corpus, field1=field1, field2=field2
, hapax=hapax, distance = distance, bridgeness=bridgeness) , hapax=hapax, distance = distance, bridgeness=bridgeness)
if format_ == 'json': if format_ == 'json':
return JsonHttpResponse(data) return JsonHttpResponse(data)
......
...@@ -91,17 +91,30 @@ class List(APIView): ...@@ -91,17 +91,30 @@ class List(APIView):
"id": node.id, "id": node.id,
"name": node.terms, "name": node.terms,
"scores": { "scores": {
"tfidf": 0 "tfidf": 0,
"occs":0
} }
} }
# occ_list = get_or_create_node(nodetype='Occurrences', corpus_id=parent_id).id # occ_list = get_or_create_node(nodetype='Occurrences', corpus_id=parent_id).id
# print( occ_list ) # print( occ_list )
tfidf_list = get_or_create_node(nodetype='Tfidf (global)', corpus_id=parent_id).id try:
ngram_tfidf = session.query(NodeNodeNgram.ngram_id,NodeNodeNgram.score).filter( NodeNodeNgram.nodex_id==tfidf_list , NodeNodeNgram.ngram_id.in_( list(ngram_ids.keys()) )).all() tfidf_list = get_or_create_node(nodetype='Tfidf (global)', corpus_id=parent_id).id
for n in ngram_tfidf: ngram_tfidf = session.query(NodeNodeNgram.ngram_id,NodeNodeNgram.score).filter( NodeNodeNgram.nodex_id==tfidf_list , NodeNodeNgram.ngram_id.in_( list(ngram_ids.keys()) )).all()
if n.ngram_id in ngram_ids: for n in ngram_tfidf:
ngram_ids[n.ngram_id]["scores"]["tfidf"] += n.score if n.ngram_id in ngram_ids:
ngram_ids[n.ngram_id]["scores"]["tfidf"] += n.score
except:
pass
try:
occ_list = get_or_create_node(nodetype='Occurrences', corpus_id=parent_id).id
ngram_occs = session.query(NodeNodeNgram.ngram_id,NodeNodeNgram.score).filter( NodeNodeNgram.nodex_id==occ_list , NodeNodeNgram.ngram_id.in_( list(ngram_ids.keys()) )).all()
for n in ngram_occs:
if n.ngram_id in ngram_ids:
ngram_ids[n.ngram_id]["scores"]["occs"] += round(n.score)
except:
pass
end_ = time.time() end_ = time.time()
......
...@@ -97,14 +97,16 @@ var LineChart = dc.lineChart("#monthly-move-chart"); ...@@ -97,14 +97,16 @@ var LineChart = dc.lineChart("#monthly-move-chart");
var volumeChart = dc.barChart("#monthly-volume-chart"); var volumeChart = dc.barChart("#monthly-volume-chart");
// Just for Garg // Get all projects and corpuses of the user
function GetUserPortfolio() { function GetUserPortfolio() {
//http://localhost:8000/api/corpusintersection/1a50317a50145 //http://localhost:8000/api/corpusintersection/1a50317a50145
var project_id = getIDFromURL("project") var project_id = getIDFromURL("project")
var corpus_id = getIDFromURL("corpus") var corpus_id = getIDFromURL("corpus")
if( Object.keys( corpusesList ).length > 0 ) if( Object.keys( corpusesList ).length > 0 ) {
return true; $('#corpuses').modal('show');
return true;
}
var query_url = window.location.origin+'/api/userportfolio/project/'+project_id+'/corpuses' var query_url = window.location.origin+'/api/userportfolio/project/'+project_id+'/corpuses'
$.ajax({ $.ajax({
...@@ -158,8 +160,6 @@ function GetUserPortfolio() { ...@@ -158,8 +160,6 @@ function GetUserPortfolio() {
}); });
$('#corpuses').modal('show'); $('#corpuses').modal('show');
}, },
error: function(){ error: function(){
pr('Page Not found: TestFunction()'); pr('Page Not found: TestFunction()');
...@@ -167,6 +167,7 @@ function GetUserPortfolio() { ...@@ -167,6 +167,7 @@ function GetUserPortfolio() {
}); });
} }
//Getting a corpusB-list and intersecting it with current corpusA-miamlist.
function printCorpuses() { function printCorpuses() {
console.log( "!!!!!!!! in printCorpuses() !!!!!!!! " ) console.log( "!!!!!!!! in printCorpuses() !!!!!!!! " )
pr(corpusesList) pr(corpusesList)
...@@ -585,6 +586,7 @@ function SaveGlobalChanges_Form( nodes2del) { ...@@ -585,6 +586,7 @@ function SaveGlobalChanges_Form( nodes2del) {
$("#pre_savechanges").modal("show") $("#pre_savechanges").modal("show")
} }
// Save changes to all corpusA-lists
function SaveLocalChanges() { function SaveLocalChanges() {
console.clear() console.clear()
...@@ -713,7 +715,7 @@ $("#Save_All").click(function(){ ...@@ -713,7 +715,7 @@ $("#Save_All").click(function(){
// } // }
}); });
// For lists, all http-requests
function CRUD( parent_id , action , nodes , args , http_method , callback) { function CRUD( parent_id , action , nodes , args , http_method , callback) {
var the_url = window.location.origin+"/api/node/"+parent_id+"/ngrams"+action+"/"+nodes.join("+"); var the_url = window.location.origin+"/api/node/"+parent_id+"/ngrams"+action+"/"+nodes.join("+");
the_url = the_url.replace(/\/$/, ""); //remove trailing slash the_url = the_url.replace(/\/$/, ""); //remove trailing slash
...@@ -1060,6 +1062,7 @@ function getIDFromURL( item ) { ...@@ -1060,6 +1062,7 @@ function getIDFromURL( item ) {
return pageurl[cid+1]; return pageurl[cid+1];
} }
// For lists, only GET requests
function GET_( url , callback ) { function GET_( url , callback ) {
$.ajax({ $.ajax({
...@@ -1107,18 +1110,23 @@ var url = [ ...@@ -1107,18 +1110,23 @@ var url = [
GET_( url[0] , function(result) { GET_( url[0] , function(result) {
// = = = = MIAM = = = = // // = = = = MIAM = = = = //
if(result!=false) { if(result!=false) {
NGrams["main"] = { NGrams["main"] = {
"ngrams": [], "ngrams": [],
"scores": { "scores": {
"initial":"tfidf", "initial":"occs",
"nb_docs":result.length, "nb_docs":result.length,
"orig_nb_ngrams":1, "orig_nb_ngrams":1,
"nb_ngrams":result.length, "nb_ngrams":result.length,
} }
} }
for(var i in result) var occs_sum = 0
for(var i in result) {
NGrams["main"].ngrams.push(result[i]) NGrams["main"].ngrams.push(result[i])
occs_sum += result[i].scores.occs
}
if(occs_sum==0)
NGrams["main"]["scores"]["initial"] = "tfidf";
} }
// = = = = /MIAM = = = = // // = = = = /MIAM = = = = //
......
...@@ -54,7 +54,7 @@ th a { ...@@ -54,7 +54,7 @@ th a {
<div class="row"> <div class="row">
<div id="monthly-move-chart"> <div id="monthly-move-chart">
<center> <center>
Blue bars: all, Green line for zooming : select a time range to zoom in Select a time range in the chart with blue bars to zoom in
<p align="center"> <p align="center">
<a class="btn btn-xs btn-default" role="button" href="/chart/corpus/{{ corpus.id }}/data.csv">Save</a> <a class="btn btn-xs btn-default" role="button" href="/chart/corpus/{{ corpus.id }}/data.csv">Save</a>
<a class="btn btn-xs btn-default" href="javascript:volumeChart.filterAll();dc.redrawAll();">Reset</a></p> <a class="btn btn-xs btn-default" href="javascript:volumeChart.filterAll();dc.redrawAll();">Reset</a></p>
......
...@@ -119,8 +119,7 @@ input[type=radio]:checked + label { ...@@ -119,8 +119,7 @@ input[type=radio]:checked + label {
<div class="row"> <div class="row">
<div id="monthly-move-chart"> <div id="monthly-move-chart">
<center> <center>
<strong>Title</strong> (Blue bars: all, Green line: zoom) Select a time range in the chart with blue bars to zoom in
Select a time range to zoom in
<p align="center"> <p align="center">
<a class="btn btn-xs btn-default" role="button" href="/chart/corpus/{{ corpus.id }}/data.csv">Save</a> <a class="btn btn-xs btn-default" role="button" href="/chart/corpus/{{ corpus.id }}/data.csv">Save</a>
<a class="btn btn-xs btn-default" href="javascript:volumeChart.filterAll();dc.redrawAll();">Reset</a></p> <a class="btn btn-xs btn-default" href="javascript:volumeChart.filterAll();dc.redrawAll();">Reset</a></p>
......
...@@ -49,7 +49,7 @@ ...@@ -49,7 +49,7 @@
<a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.id }}/">Add documents</a></p> <a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.id }}/">Add documents</a></p>
{% endif %} {% endif %}
{% if nb_groups != None and nb_groups > 0 %} {% if nb_groups != None and nb_groups > 0 %}
<a style="visibility: hidden;" id="share_button" class="btn btn-primary btn-lg" role="button" >Share!!!</a></p> <a id="share_button" class="btn btn-primary btn-lg" role="button" >Share!!!</a></p>
{% endif %} {% endif %}
</div> </div>
</div> </div>
......
...@@ -135,7 +135,7 @@ input[type=radio]:checked + label { ...@@ -135,7 +135,7 @@ input[type=radio]:checked + label {
<div class="row"> <div class="row">
<div id="monthly-move-chart"> <div id="monthly-move-chart">
<center> <center>
Blue bars: all, Green line for zooming : select a time range to zoom in Select a time range in the chart with blue bars to zoom in
<p align="center"> <p align="center">
<a class="btn btn-xs btn-default" role="button" href="/chart/corpus/{{ corpus.id }}/data.csv">Save</a> <a class="btn btn-xs btn-default" role="button" href="/chart/corpus/{{ corpus.id }}/data.csv">Save</a>
<a class="btn btn-xs btn-default" href="javascript:volumeChart.filterAll();dc.redrawAll();">Reset</a></p> <a class="btn btn-xs btn-default" href="javascript:volumeChart.filterAll();dc.redrawAll();">Reset</a></p>
......
...@@ -252,7 +252,7 @@ def graph_share(request, generic=100, specific=100): ...@@ -252,7 +252,7 @@ def graph_share(request, generic=100, specific=100):
def node_link_share(request): def node_link_share(request):
data = {"hola":"mundo"} data = { "request" : "error" }
if request.method== 'GET' and "token" in request.GET: if request.method== 'GET' and "token" in request.GET:
import json import json
le_token = json.loads(request.GET["token"])[0] le_token = json.loads(request.GET["token"])[0]
......
from admin.env import *
from gargantext_web.db import session, cache, get_or_create_node
from gargantext_web.db import Node, NodeHyperdata, Hyperdata, Ngram, NodeNgram, NodeNgramNgram, NodeHyperdataNgram
from sqlalchemy import func, alias, asc, desc
import sqlalchemy as sa
from sqlalchemy.orm import aliased
from ngram.group import compute_groups, getStemmer
# corpus = Corpus(272)
corpus_id = 540420
corpus = session.query(Node).filter(Node.id==corpus_id).first()
#group = get_or_create_node(corpus=corpus, nodetype="Group")
stop_id = get_or_create_node(nodetype='StopList',corpus=corpus).id
miam_id = get_or_create_node(nodetype='MiamList',corpus=corpus).id
somme = sa.func.count(NodeNgram.weight)
ngrams = (session.query(Ngram.id, Ngram.terms, somme )
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.join(Node, Node.id == NodeNgram.node_id)
.filter(Node.parent_id==corpus_id, Node.type_id==cache.NodeType['Document'].id)
.group_by(Ngram.id)
.order_by(desc(somme))
.limit(100000)
)
stops = (session.query(Ngram.id, Ngram.terms)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.filter(NodeNgram.node_id == stop_id)
.all()
)
miams = (session.query(Ngram.id, Ngram.terms, somme)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.filter(NodeNgram.node_id == miam_id)
.group_by(Ngram.id, Ngram.terms)
.order_by(desc(somme))
.all()
)
stemmer = getStemmer(corpus)
ws = ['honeybee', 'honeybees']
print(stemmer(ws[0]) == stemmer(ws[1]))
#
#for n in miams:
# if n[1] == 'bees':
# print("!" * 30)
# print(n)
# print("-" * 30)
# else:
# print(n)
#
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment