Commit 47f6061a authored by PkSM3's avatar PkSM3

Merge branch 'unstable' of ssh://delanoe.org:1979/gargantext into samuel

parents b7d18edb 8099d381
......@@ -47,13 +47,12 @@ def PrintException():
class WorkflowTracking:
def __init__( self ):
self.hola = "mundo"
def processing_(self , corpus , step):
def processing_(self , corpus_id , step):
try:
the_query = """ UPDATE node_node SET hyperdata=\'{ \"%s\" : \"%s\"}\' WHERE id=%d """ % ( "Processing", step , corpus.id )
the_query = """ UPDATE node_node SET hyperdata=\'{ \"%s\" : \"%s\"}\' WHERE id=%d """ % ( "Processing", step , corpus_id )
cursor = connection.cursor()
try:
cursor.execute(the_query)
......@@ -61,4 +60,4 @@ class WorkflowTracking:
finally:
connection.close()
except :
PrintException()
\ No newline at end of file
PrintException()
......@@ -5,7 +5,7 @@ from sqlalchemy.sql import func
from gargantext_web.db import Node, Ngram, NodeNgram, NodeNgramNgram, \
NodeNodeNgram, NodeHyperdataNgram, NodeHyperdata, Hyperdata
from gargantext_web.db import session, cache, get_or_create_node, bulk_insert
from gargantext_web.db import get_session, cache, get_or_create_node, bulk_insert
from analysis.lists import WeightedMatrix, UnweightedList, Translations
import inspect
import datetime
......@@ -18,7 +18,8 @@ def do_cooc(corpus=None
, start=None, end=None
, limit=1000
, isMonopartite=True
, hapax = 3):
, hapax = 3
, mysession=None):
'''
Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id
For the moment list of paramters are not supported because, lists need to
......@@ -40,30 +41,35 @@ def do_cooc(corpus=None
# Security test
field1,field2 = str(field1), str(field2)
if mysession is None:
from gargantext_web.db import session
mysession = session
# Get node
node_cooc = get_or_create_node(nodetype='Cooccurrence', corpus=corpus
, name_str="Cooccurrences corpus " \
+ str(corpus.id) + "list_id: " + str(miam_id)
#, hyperdata={'field1': field1, 'field2':field2}
)
, mysession=mysession)
# BEGIN
# Saving the parameters of the analysis in the Node JSONB hyperdata field
args, _, _, parameters = inspect.getargvalues(inspect.currentframe())
hyperdata = dict()
for parameter in parameters.keys():
if parameter != 'corpus' and parameter != 'node_cooc':
hyperdata[parameter] = parameters[parameter]
node_cooc.hyperdata = hyperdata
session.add(node_cooc)
session.commit()
# hyperdata = dict()
#
# for parameter in parameters.keys():
# if parameter != 'corpus' and parameter != 'node_cooc':
# hyperdata[parameter] = parameters[parameter]
#
# node_cooc.hyperdata = hyperdata
#
mysession.add(node_cooc)
mysession.commit()
# END
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==node_cooc.id).delete()
session.commit()
mysession.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==node_cooc.id).delete()
mysession.commit()
doc_id = cache.NodeType['Document'].id
......@@ -75,7 +81,7 @@ def do_cooc(corpus=None
if isMonopartite :
NodeNgramY = aliased(NodeNgram)
cooc_query = (session.query(NodeNgramX.ngram_id, NodeNgramY.ngram_id, cooc_score)
cooc_query = (mysession.query(NodeNgramX.ngram_id, NodeNgramY.ngram_id, cooc_score)
.join(Node, Node.id == NodeNgramX.node_id)
.join(NodeNgramY, NodeNgramY.node_id == Node.id)
.filter(Node.parent_id==corpus.id, Node.type_id==doc_id)
......@@ -83,7 +89,7 @@ def do_cooc(corpus=None
else :
NodeNgramY = aliased(NodeNgram)
cooc_query = (session.query(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id, cooc_score)
cooc_query = (mysession.query(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id, cooc_score)
.join(Node, Node.id == NodeHyperdataNgram.node_id)
.join(NodeNgramY, NodeNgramY.node_id == Node.id)
.join(Hyperdata, Hyperdata.id == NodeHyperdataNgram.hyperdata_id)
......@@ -167,7 +173,7 @@ def do_cooc(corpus=None
# Select according some scores
if cvalue_id is not None :
#miam = get_or_create_node(nodetype='Cvalue', corpus=corpus)
cvalue_list = UnweightedList(session.query(NodeNodeNgram.ngram_id)
cvalue_list = UnweightedList(mysession.query(NodeNodeNgram.ngram_id)
.filter(NodeNodeNgram.nodex_id == cvalue_id).all()
)
......
......@@ -9,7 +9,6 @@ import numpy as np
import collections
session = get_session()
def result2dict(query):
results = dict()
......@@ -27,6 +26,7 @@ def diachronic_specificity(corpus_id, terms, order=True):
Values are measure to indicate diachronic specificity.
Nowadays, the measure is rather simple: distance of frequency of period from mean of frequency of all corpus.
'''
# implicit global session
ngram_frequency_query = (session
.query(Node.hyperdata['publication_year'], func.count('*'))
.join(NodeNgram, Node.id == NodeNgram.node_id)
......@@ -64,7 +64,6 @@ def diachronic_specificity(corpus_id, terms, order=True):
else:
return relative_terms_count
# For tests
# diachronic_specificity(102750, "bayer", order=True)
# diachronic_specificity(26128, "bee", order=True)
from admin.utils import PrintException
from gargantext_web.db import *
from gargantext_web.db import get_or_create_node
from gargantext_web.db import get_or_create_node, session,get_session
from collections import defaultdict
from operator import itemgetter
......@@ -31,6 +31,8 @@ def do_distance(cooc_id, field1=None, field2=None, isMonopartite=True, distance=
do_distance :: Int -> (Graph, Partition, {ids}, {weight})
'''
# implicit global session
authorized = ['conditional', 'distributional', 'cosine']
if distance not in authorized:
distance = 'conditional'
......@@ -203,7 +205,6 @@ def do_distance(cooc_id, field1=None, field2=None, isMonopartite=True, distance=
def getWeight(item):
return item[1]
#
......
......@@ -9,7 +9,7 @@ from math import log
import scipy
from gargantext_web.db import get_or_create_node
from gargantext_web.db import session,get_session, get_or_create_node,session
from analysis.cooccurrences import do_cooc
from analysis.distance import do_distance
......@@ -39,12 +39,14 @@ def get_cooc(request=None, corpus=None
'''
get_ccoc : to compute the graph.
'''
# implicit global session
data = {}
#if session.query(Node).filter(Node.type_id==type_cooc_id, Node.parent_id==corpus_id).first() is None:
print("Cooccurrences do not exist yet, creating it.")
miam_id = get_or_create_node(nodetype='MapList', corpus=corpus).id
stop_id = get_or_create_node(nodetype='StopList', corpus=corpus).id
group_id = get_or_create_node(nodetype='Group', corpus=corpus).id
miam_id = get_or_create_node(nodetype='MapList', corpus=corpus, mysession=session).id
stop_id = get_or_create_node(nodetype='StopList', corpus=corpus, mysession=session).id
group_id = get_or_create_node(nodetype='Group', corpus=corpus, mysession=session).id
SamuelFlag = False
# if field1 == field2 == 'ngrams' :
......
from collections import defaultdict
from math import sqrt
from gargantext_web.db import session, NodeNgram, NodeNgramNgram, bulk_insert
from gargantext_web.db import session,get_session, NodeNgram, NodeNgramNgram, bulk_insert
class BaseClass:
......@@ -67,6 +67,7 @@ class Translations(BaseClass):
self.items = defaultdict(int)
self.groups = defaultdict(set)
elif isinstance(other, int):
# implicit global session
query = (session
.query(NodeNgramNgram.ngramy_id, NodeNgramNgram.ngramx_id)
.filter(NodeNgramNgram.node_id == other)
......@@ -118,6 +119,7 @@ class Translations(BaseClass):
def save(self, node_id):
# delete previous data
# implicit global session
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id == node_id).delete()
session.commit()
# insert new data
......@@ -134,6 +136,7 @@ class WeightedMatrix(BaseClass):
if other is None:
self.items = defaultdict(lambda: defaultdict(float))
elif isinstance(other, int):
# implicit global session
query = (session
.query(NodeNgramNgram.ngramx_id, NodeNgramNgram.ngramy_id, NodeNgramNgram.score)
.filter(NodeNgramNgram.node_id == other)
......@@ -159,6 +162,7 @@ class WeightedMatrix(BaseClass):
def save(self, node_id):
# delete previous data
# implicit global session
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id == node_id).delete()
session.commit()
# insert new data
......@@ -243,6 +247,7 @@ class UnweightedList(BaseClass):
if other is None:
self.items = set()
elif isinstance(other, int):
# implicit global session
query = (session
.query(NodeNgram.ngram_id)
.filter(NodeNgram.node_id == other)
......@@ -323,6 +328,7 @@ class UnweightedList(BaseClass):
def save(self, node_id):
# delete previous data
# implicit global session
session.query(NodeNgram).filter(NodeNgram.node_id == node_id).delete()
session.commit()
# insert new data
......@@ -339,6 +345,7 @@ class WeightedList(BaseClass):
if other is None:
self.items = defaultdict(float)
elif isinstance(other, int):
# implicit global session
query = (session
.query(NodeNgram.ngram_id, NodeNgram.weight)
.filter(NodeNgram.node_id == other)
......@@ -435,6 +442,7 @@ class WeightedList(BaseClass):
def save(self, node_id):
# delete previous data
# implicit global session
session.query(NodeNgram).filter(NodeNgram.node_id == node_id).delete()
session.commit()
# insert new data
......
from collections import defaultdict
from gargantext_web.db import get_or_create_node, session,get_session, Node, NodeHyperdata, Hyperdata, Ngram
import pandas as pd
import numpy as np
import scipy.spatial.distance as distance
from sqlalchemy.sql import func
from sqlalchemy import desc, asc, or_, and_, Date, cast, select
from sqlalchemy import literal_column
from sqlalchemy.orm import aliased
from analysis.distance import do_distance
from analysis.cooccurrences import do_cooc
# TFIDF ngrams / period
def periods(corpus, start=None, end=None):
'''
data
periods :: Corpus -> [Periods]
# compute TFIDF matrix
# a = np.asarray([1,2,3])
# b = np.asarray([1,2,4])
# distance.cosine(a,b)
# search for min and split
'''
# implicit global session
Doc = aliased(Node)
Corpus = aliased(Node)
query = (session
.query(NodeHyperdata.value_datetime)
.join(Doc, Doc.id == NodeHyperdata.node_id)
.join(Corpus, Corpus.id == Doc.parent_id)
.join(Hyperdata, Hyperdata.id == NodeHyperdata.hyperdata_id)
.filter(Doc.type_id == cache.NodeType['Document'].id)
.filter(Corpus.id == corpus.id)
.filter(Hyperdata.name == 'publication_date')
)
first = query.order_by(asc(NodeHyperdata.value_datetime)).first()[0]
last = query.order_by(desc(NodeHyperdata.value_datetime)).first()[0]
duration = last - first
if duration.days > 365 * 3 :
print("OK")
miam_id = get_or_create_node(nodetype='MiamList', corpus=corpus, mysession=session).id
result_list = list()
for t in times:
for ngram in miam_list:
result_list.add(temporal_tfidf(ngram, time))
def tfidf_temporal(corpus, start=None, end=None):
pass
def jacquard(period1, period2):
'''
type Start :: Date
type End :: Date
type Period :: (Start, End)
type Periods :: [Period]
'''
period1 = ['start1', 'end1']
period2 = ['start2', 'end2']
periods = [period1, period2]
nodes = [cooc(corpus=corpus_id, start=period[0], end=period[1]) for period in periods]
partitions = [get_cooc(cooc_id=node_id, type='bestpartition') for node_id in nodes]
for x in nodeCom.items():
comNode[x[1]] = comNode.get(x[1], set()).union({x[0]})
def get_partition(corpus, start=None, end=None, distance=distance):
# implicit global session
miam_id = get_or_create_node(corpus=corpus, nodetype='MapList', mysession=session).id
print("get Partition %s - %s" % (start, end))
cooc_id = do_cooc(corpus=corpus
, start=start
, end=end
, miam_id = miam_id
)
G, partition, ids, weight = do_distance(cooc_id
, field1="ngrams"
, field2="ngrams"
, isMonopartite=True
, distance=distance)
return(partition, weight)
def phylo_clusters(corpus, years):
'''
corpus :: Node Corpus
years :: [Year]
'''
# implicit global session
clusters = dict()
nodes_weight = dict()
periods_start_end = [
('2000-01-01', '2010-12-31')
, ('2011-01-01', '2012-12-31')
, ('2013-01-01', '2015-12-31')
]
periods = list()
for period in periods_start_end:
periods.append(' '.join(p for p in period))
print(periods)
periods_index = [ z for z in zip (periods[:-1], periods[1:])]
print(periods_index)
for period in periods_start_end:
#start,end = period
index = ' '.join([str(p) for p in list(period)])
clusters[index], nodes_weight[index] = get_partition( corpus
, start = str(period[0])
, end = str(period[1])
, distance='distributional')
nodes = set()
for period in nodes_weight.keys():
for node in nodes_weight[period].keys():
nodes.add(node)
id_terms = session.query(Ngram.id, Ngram.terms).filter(Ngram.id.in_(nodes)).all()
id_terms_dict = dict()
for id_term in id_terms:
id_terms_dict[id_term[0]] = id_term[1]
year_com_node = defaultdict(lambda: defaultdict(set))
for period in clusters.keys():
for node, com in clusters[period].items():
year_com_node[period][com].add(node)
proximity_dict = defaultdict(
lambda: defaultdict(
lambda: defaultdict(
lambda: defaultdict( float
)
)
)
)
def t1_t2(proximity_dict, t1_t2):
t1,t2 = t1_t2
for com1 in year_com_node[t1].keys():
for com2 in year_com_node[t2].keys():
set_1 = year_com_node[t1][com1]
set_2 = year_com_node[t2][com2]
intersection = set_1.intersection(set_2)
union = set_1.union(set_2)
proximity_dict[t1][t2][com1][com2] = len(intersection) / len(union)
for period in periods_index:
t1_t2(proximity_dict, period)
data = list()
data_dict = dict()
for y1 in proximity_dict.keys():
for y2 in proximity_dict[y1].keys():
for c1 in proximity_dict[y1][y2].keys():
for c2 in proximity_dict[y1][y2][c1].keys():
score = proximity_dict[y1][y2][c1][c2]
if score > 0.05:
#print(y1,y2,c1,c2,score)
list_node1 = list()
for node in year_com_node[y1][c1]:
list_node1.append((node, nodes_weight[y1][node]))
list_node1 = sorted(list_node1, key=lambda x: x[1], reverse=True)
list_node2 = list()
for node in year_com_node[y2][c2]:
list_node2.append((node, nodes_weight[y2][node]))
list_node2 = sorted(list_node2, key=lambda x: x[1], reverse=True)
flow = list()
from_data = [id_terms_dict[x[0]] for x in list_node1[:2]]
from_data.append(str(y1))
flow.append(','.join(from_data))
to_data = [id_terms_dict[x[0]] for x in list_node2[:2]]
to_data.append(str(y2))
flow.append(','.join(to_data))
flow.append(round(score*100))
data.append(flow)
return(data)
......@@ -13,7 +13,7 @@ from rest_framework.exceptions import APIException
from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from node.models import Node
from gargantext_web.db import session, cache, Node, NodeNgram, Ngram
from gargantext_web.db import session,get_session, cache, Node, NodeNgram, Ngram
from ngram.lists import listIds, listNgramIds
from gargantext_web.db import get_or_create_node
......@@ -68,6 +68,7 @@ class NgramEdit(APIView):
"""
Edit an existing NGram in a given list
"""
# implicit global session
list_id = int(list_id)
list_node = session.query(Node).filter(Node.id==list_id).first()
# TODO add 1 for MapList social score ?
......@@ -97,6 +98,7 @@ class NgramEdit(APIView):
"""
Delete a ngram from a list
"""
# implicit global session
print("to del",ngram_ids)
for ngram_id in ngram_ids.split('+'):
print('ngram_id', ngram_id)
......@@ -134,13 +136,14 @@ class NgramCreate(APIView):
"""
renderer_classes = (JSONRenderer,)
authentication_classes = (SessionAuthentication, BasicAuthentication)
def post(self, request, list_id):
"""
create NGram in a given list
example: request.data = {'text': 'phylogeny'}
"""
# implicit global session
list_id = int(list_id)
# format the ngram's text
ngram_text = request.data.get('text', None)
......@@ -175,7 +178,6 @@ class NgramCreate(APIView):
'list_id': list_id,
})
class Document(APIView):
"""
Read-only Document view, similar to /api/nodes/
......@@ -184,6 +186,7 @@ class Document(APIView):
def get(self, request, doc_id):
"""Document by ID"""
# implicit global session
node = session.query(Node).filter(Node.id == doc_id).first()
if node is None:
raise APIException('This node does not exist', 404)
......@@ -206,4 +209,3 @@ class Document(APIView):
}
return Response(data)
......@@ -31,6 +31,8 @@ processes = 10
# the socket (use the full path to be safe)
socket = /tmp/gargantext.sock
threads = 4
# with appropriate permissions - *may* be needed
chmod-socket = 666
......
......@@ -24,10 +24,10 @@ def get_team():
'picture' : 'david.jpg',
'role':'principal investigator'},
{ 'first_name' : 'Elias', 'last_name' : 'Showk',
'mail' : '',
'website' : 'https://github.com/elishowk',
'picture' : '', 'role' : 'developer'},
# { 'first_name' : 'Elias', 'last_name' : 'Showk',
# 'mail' : '',
# 'website' : 'https://github.com/elishowk',
# 'picture' : '', 'role' : 'developer'},
{ 'first_name' : 'Mathieu', 'last_name' : 'Rodic',
'mail' : '',
......@@ -41,7 +41,6 @@ def get_team():
'picture' : 'samuel.jpg',
'role' : 'developer'},
{ 'first_name' : 'Maziyar', 'last_name' : 'Panahi',
'mail' : '',
'website' : '',
......@@ -51,6 +50,7 @@ def get_team():
{ 'first_name' : 'Romain', 'last_name' : 'Loth',
'mail' : '',
'website' : '',
'picture' : 'romain.jpg',
'role' : 'developer'},
{ 'first_name' : 'Alexandre', 'last_name' : 'Delanoë',
......
......@@ -13,6 +13,7 @@ import collections
from gargantext_web.views import move_to_trash
from gargantext_web.db import *
from gargantext_web.views import session
from gargantext_web.validation import validate, ValidationException
from node import models
......@@ -101,6 +102,7 @@ class NodeNgramsQueries(APIView):
def post(self, request, project_id):
# example only
input = request.data or {
'x': {
'with_empty': True,
......@@ -256,3 +258,4 @@ class NodeNgramsQueries(APIView):
}, 201)
elif input['format'] == 'csv':
return CsvHttpResponse(sorted(result.items()), ('date', 'value'), 201)
......@@ -3,7 +3,7 @@
from celery import shared_task
from node import models
from django.db import transaction
from admin.utils import DebugTime
from admin.utils import DebugTime, PrintException
import cProfile
#@app.task(bind=True)
......@@ -11,18 +11,12 @@ import cProfile
def debug_task(request):
print('Request: {0!r}'.format(request))
from gargantext_web.db import session, cache, Node
from gargantext_web.db import get_session, cache, Node
from ngram.workflow import ngram_workflow
@shared_task
def apply_sum(x, y):
print(x+y)
print(session.query(Node.name).first())
from parsing.corpustools import parse_resources, extract_ngrams #add_resource,
from ngram.lists import ngrams2miam
#from ngram.lists import ngrams2miam
from admin.utils import WorkflowTracking
......@@ -33,24 +27,35 @@ def apply_workflow(corpus_id):
dbg.show('ALL WORKFLOW')
update_state = WorkflowTracking()
corpus = session.query(Node).filter(Node.id==corpus_id).first()
update_state.processing_(corpus, "Parsing")
#cProfile.runctx('parse_resources(corpus)', global,locals)
parse_resources(corpus)
update_state.processing_(corpus, "Terms extraction")
extract_ngrams(corpus, ['title', 'abstract'], nlp=True)
# update_state.processing_(corpus, "")
ngram_workflow(corpus)
#ngrams2miam(user_id=corpus.user_id, corpus_id=corpus_id)
print("End of the Workflow for corpus %d" % (corpus_id))
update_state.processing_(corpus, "0")
try :
mysession = get_session()
corpus = mysession.query(Node).filter(Node.id==corpus_id).first()
update_state.processing_(int(corpus_id), "Parsing")
#cProfile.runctx('parse_resources(corpus)', global,locals)
parse_resources(corpus, mysession=mysession)
update_state.processing_(int(corpus_id), "Terms extraction")
extract_ngrams(corpus, ['title', 'abstract'], nlp=True, mysession=mysession)
# update_state.processing_(corpus, "")
ngram_workflow(corpus, mysession=mysession)
#ngrams2miam(user_id=corpus.user_id, corpus_id=corpus_id)
print("End of the Workflow for corpus %d" % (corpus_id))
update_state.processing_(int(corpus_id), "0")
#mysession.close()
#get_session.remove()
mysession.remove()
except Exception as error:
print(error)
PrintException()
#mysession.close()
#get_session.remove()
mysession.remove()
@shared_task
def empty_trash(corpus_id):
......
......@@ -141,7 +141,6 @@ def get_ngrams(request , project_id , corpus_id ):
return HttpResponse(html)
def test_test(request , corpus_id , doc_id):
"""Get All for a doc id"""
corpus_id = int(corpus_id)
......
......@@ -2,18 +2,20 @@ from django.conf import settings
from node import models
__all__ = ['literalquery', 'session', 'cache', 'Session', 'bulk_insert', 'engine', 'get_cursor', 'User']
__all__ = ['literalquery', 'cache', 'Session', 'bulk_insert', 'engine', 'get_cursor', 'User']
# initialize sqlalchemy
from sqlalchemy.orm import Session, mapper
from sqlalchemy.orm import Session, mapper, scoped_session, sessionmaker
from sqlalchemy.ext.automap import automap_base
from sqlalchemy import create_engine, MetaData, Table, Column, ForeignKey
from sqlalchemy.types import Integer, String, DateTime
from sqlalchemy.dialects.postgresql import JSON
# SQLAlchemy session management
def get_engine():
from sqlalchemy import create_engine
......@@ -129,12 +131,27 @@ def literalquery(statement, dialect=None):
return LiteralCompiler(dialect, statement)
#
def get_sessionmaker():
from sqlalchemy.orm import sessionmaker
return sessionmaker(bind=engine)
Session = get_sessionmaker()
session = Session()
def get_session():
session_factory = get_sessionmaker()
return scoped_session(session_factory)
# get_session à importer, plus pratique pour les remove
#session_factory = get_sessionmaker()
#get_session = scoped_session(session_factory)
# the global session ------------
# pour les modules qui importent
# directement session
session = get_session()
#session = get_session()()
# -------------------------------
# SQLAlchemy model objects caching
......@@ -158,18 +175,22 @@ class ModelCache(dict):
for column in self._columns
if column.type.python_type == str or key.__class__ == column.type.python_type
]
#session = get_session()
element = session.query(self._model).filter(or_(*conditions)).first()
if element is None:
raise KeyError
self[key] = element
return element
#session.remove()
def preload(self):
self.clear()
#session = get_session()
for element in session.query(self._model).all():
for column_name in self._columns_names:
key = getattr(element, column_name)
self[key] = element
#session.remove()
class Cache():
......@@ -231,12 +252,17 @@ class bulk_insert:
readline = read
def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hyperdata=None):
def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hyperdata=None, mysession=None):
'''
Should be a method of the object. __get_or_create__ ?
name_str :: String
hyperdata :: Dict
'''
if mysession is None:
from gargantext_web.db import session
mysession = session
if nodetype is None:
print("Need to give a type node")
else:
......@@ -245,13 +271,13 @@ def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hy
except KeyError:
ntype = cache.NodeType[nodetype] = NodeType()
ntype.name = nodetype
session.add(ntype)
session.commit()
mysession.add(ntype)
mysession.commit()
if corpus_id is not None and corpus is None:
corpus = session.query(Node).filter(Node.id==corpus_id).first()
corpus = mysession.query(Node).filter(Node.id==corpus_id).first()
node = (session.query(Node).filter(Node.type_id == ntype.id
node = (mysession.query(Node).filter(Node.type_id == ntype.id
, Node.parent_id == corpus.id
, Node.user_id == corpus.user_id
)
......@@ -272,7 +298,9 @@ def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hy
node.name=name_str
else:
node.name=ntype.name
session.add(node)
session.commit()
mysession.add(node)
mysession.commit()
#print(parent_id, n.parent_id, n.id, n.name)
return(node)
......@@ -7,7 +7,6 @@ from django.contrib.staticfiles.storage import staticfiles_storage
from django.views.generic.base import RedirectView
from gargantext_web import views, views_optimized
import gargantext_web.corpus_views as corpus_views
from annotations import urls as annotations_urls
from annotations.views import main as annotations_main_view
......
......@@ -39,14 +39,14 @@ from django.contrib.auth import authenticate, login, logout
from scrappers.scrap_pubmed.admin import Logger
from gargantext_web.db import *
from sqlalchemy import or_, func
from gargantext_web import about
from gargantext_web.celery import empty_trash
from gargantext_web.db import cache, NodeNgram, NodeNgramNgram
from gargantext_web.db import *
from gargantext_web.db import session, cache, NodeNgram, NodeNgramNgram
def login_user(request):
logout(request)
......@@ -69,7 +69,6 @@ def logout_user(request):
return HttpResponseRedirect('/')
# Redirect to a success page.
def logo(request):
template = get_template('logo.svg')
group = "mines"
......@@ -114,7 +113,6 @@ def css(request):
}))
return HttpResponse(css_data, mimetype="text/css")
def query_to_dicts(query_string, *query_args):
"""Run a simple query and produce a generator
that returns the results as a bunch of dictionaries
......@@ -230,12 +228,12 @@ def projects(request):
date = datetime.datetime.now()
# print(Logger.write("STATIC_ROOT"))
# implicit global session
projects = session.query(Node).filter(Node.user_id == user_id, Node.type_id == project_type_id).order_by(Node.date).all()
number = len(projects)
# common_users = session.query(User_User.user_parent).filter( User_User.user_id==user_id ).all()
# [ Getting shared projects ] #
common_users = []
......@@ -278,7 +276,7 @@ def projects(request):
return HttpResponseRedirect('/projects/')
else:
form = ProjectForm()
return render(request, 'projects.html', {
'debug': settings.DEBUG,
'date': date,
......@@ -288,7 +286,7 @@ def projects(request):
'common_projects':common_projects,
'common_users':common_users,
})
def update_nodes(request, project_id, corpus_id, view=None):
'''
......@@ -297,6 +295,7 @@ def update_nodes(request, project_id, corpus_id, view=None):
- permanent deletion of Trash
'''
if not request.user.is_authenticated():
return redirect('/login/?next=%s' % request.path)
......@@ -359,6 +358,7 @@ def update_nodes(request, project_id, corpus_id, view=None):
#
def corpus(request, project_id, corpus_id):
if not request.user.is_authenticated():
return redirect('/login/?next=%s' % request.path)
......@@ -376,6 +376,8 @@ def corpus(request, project_id, corpus_id):
corpus = cache.Node[int(corpus_id)]
type_doc_id = cache.NodeType['Document'].id
# implicit global session
number = session.query(func.count(Node.id)).filter(Node.parent_id==corpus_id, Node.type_id==type_doc_id).all()[0][0]
......@@ -405,8 +407,7 @@ def corpus(request, project_id, corpus_id):
def newpaginatorJSON(request , corpus_id):
results = ["hola" , "mundo"]
# t = get_template('tests/newpag/thetable.html')
# project = session.query(Node).filter(Node.id==project_id).first()
......@@ -461,7 +462,6 @@ def newpaginatorJSON(request , corpus_id):
}
return JsonHttpResponse(finaldict)
def move_to_trash(node_id):
try:
node = session.query(Node).filter(Node.id == node_id).first()
......@@ -481,9 +481,12 @@ def move_to_trash(node_id):
#return(previous_type_id)
except Exception as error:
print("can not move to trash Node" + str(node_id) + ":" + str(error))
def move_to_trash_multiple(request):
user = request.user
if not user.is_authenticated():
return redirect('/login/?next=%s' % request.path)
......@@ -506,8 +509,10 @@ def move_to_trash_multiple(request):
def delete_node(request, node_id):
# do we have a valid user?
user = request.user
node = session.query(Node).filter(Node.id == node_id).first()
if not user.is_authenticated():
......@@ -523,7 +528,7 @@ def delete_node(request, node_id):
return HttpResponseRedirect('/project/' + str(node_parent_id))
else:
return HttpResponseRedirect('/projects/')
def delete_corpus(request, project_id, node_id):
# ORM Django
......@@ -545,6 +550,7 @@ def delete_corpus(request, project_id, node_id):
def chart(request, project_id, corpus_id):
''' Charts to compare, filter, count'''
t = get_template('chart.html')
user = request.user
date = datetime.datetime.now()
......@@ -562,6 +568,7 @@ def chart(request, project_id, corpus_id):
return HttpResponse(html)
def sankey(request, corpus_id):
t = get_template('sankey.html')
user = request.user
date = datetime.datetime.now()
......@@ -578,7 +585,6 @@ def sankey(request, corpus_id):
return HttpResponse(html)
def matrix(request, project_id, corpus_id):
t = get_template('matrix.html')
user = request.user
......@@ -598,6 +604,7 @@ def matrix(request, project_id, corpus_id):
return HttpResponse(html)
def graph(request, project_id, corpus_id, generic=100, specific=100):
t = get_template('explorer.html')
user = request.user
date = datetime.datetime.now()
......@@ -660,6 +667,7 @@ def corpus_csv(request, project_id, corpus_id):
'''
Create the HttpResponse object with the appropriate CSV header.
'''
response = HttpResponse(content_type='text/csv')
response['Content-Disposition'] = 'attachment; filename="corpus.csv"'
......@@ -735,30 +743,48 @@ def node_link(request, corpus_id):
'''
Create the HttpResponse object with the node_link dataset.
'''
data = []
corpus = session.query(Node).filter(Node.id==corpus_id).first()
data = get_cooc(request=request, corpus=corpus, type="node_link")
return JsonHttpResponse(data)
from analysis.periods import phylo_clusters
def sankey_csv(request, corpus_id):
data = []
corpus = session.query(Node).filter(Node.id==corpus_id).first()
#
# header = ["source", "target", "value"]
# data.append(header)
#
# flows = phylo_clusters(corpus, range(2005,2013))
# for flow in flows:
# data.append(flow)
# print(data)
#
data = [
["source", "target", "value"]
, ["Comment_1", "Theme_1", 1]
, ["Comment_2", "Theme_2", 2]
, ["Comment_3", "Theme_2", 2]
, ["Comment_7", "Theme_1", 2]
, ["Comment_8", "Theme_3", 2]
, ["Theme_1", "Reco_par_1", 2]
, ["Theme_2", "Reco_par_2", 2]
, ["Theme_2", "Reco_par_5", 2]
, ["Theme_3", "Reco_par_5", 1]
['source', 'target', 'value']
, ['inégalités,rapports sociaux,P1', 'critique,travail social,P2', 8]
, ['inégalités,rapports sociaux,P1', 'inégalités,éducation,P2', 21]
, ['éducation,institutions,P1', 'critique,travail social,P2', 7]
, ['éducation,institutions,P1', 'inégalités,éducation,P2', 10]
#, ['éducation,institutions,P1', 'personnes âgées,pouvoirs publics,P2', 8]
, ['éducation,institutions,P1', 'politiques publiques,personnes âgées dépendantes,P2', 8]
#, ['éducation,institutions,P1', 'intervention sociale,travailleur social,P2', 8]
#, ['intervention sociale,travailleur social,2011-01-01 2013-12-31', 'intervention sociale,travailleur social,P3', 0]
, ['critique,enseignement supérieur,P1', 'critique,travail social,P2', 6]
#, ['critique,enseignement supérieur,P1', 'personnes âgées,pouvoirs publics,P2', 7]
, ['justice,exclusion,violence,P1', 'inégalités,éducation,P2', 12]
, ['critique,travail social,P2', 'justice,travail social,P3', 14]
, ['inégalités,éducation,P2', 'justice,travail social,P3', 20]
, ['inégalités,éducation,P2', 'justice sociale,éducation,P3', 8]
, ['inégalités,éducation,P2', 'action publique,institutions,P3', 9]
, ['inégalités,éducation,P2', 'inégalités,inégalités sociales,P3', 18]
, ['politiques publiques,personnes âgées dépendantes,P2', 'justice sociale,éducation,P3', 20]
]
return(CsvHttpResponse(data))
def adjacency(request, corpus_id):
......@@ -818,7 +844,6 @@ def ngrams(request):
}))
return HttpResponse(html)
def nodeinfo(request , node_id):
'''Structure of the popUp for topPapers div '''
t = get_template('node-info.html')
......
......@@ -16,6 +16,8 @@ from threading import Thread
from node.admin import CustomForm
from gargantext_web.db import *
from gargantext_web.db import get_or_create_node
from gargantext_web.views import session
from gargantext_web.settings import DEBUG, MEDIA_ROOT
from rest_v1_0.api import JsonHttpResponse
from django.db import connection
......@@ -31,7 +33,6 @@ from gargantext_web.celery import apply_workflow
from admin.utils import ensure_dir
def project(request, project_id):
# do we have a valid project id?
try:
project_id = int(project_id)
......@@ -39,6 +40,7 @@ def project(request, project_id):
raise Http404()
# do we have a valid project?
project = (session
.query(Node)
.filter(Node.id == project_id)
......@@ -205,8 +207,9 @@ def tfidf(request, corpus_id, ngram_ids):
# filter input
ngram_ids = ngram_ids.split('a')
ngram_ids = [int(i) for i in ngram_ids]
corpus = session.query(Node).filter(Node.id==corpus_id).first()
tfidf_id = get_or_create_node(corpus=corpus, nodetype='Tfidf').id
print(tfidf_id)
# request data
......@@ -251,49 +254,108 @@ def tfidf(request, corpus_id, ngram_ids):
return JsonHttpResponse(nodes_list)
def getCorpusIntersection(request , corpuses_ids):
FinalDict = False
if request.method == 'POST' and "nodeids" in request.POST and len(request.POST["nodeids"])>0:
if request.method == 'POST' and "nodeids" in request.POST and len(request.POST["nodeids"])>0 :
import ast
import networkx as nx
node_ids = [int(i) for i in (ast.literal_eval( request.POST["nodeids"] )) ]
# Here are the visible nodes of the initial semantic map.
corpuses_ids = corpuses_ids.split('a')
corpuses_ids = [int(i) for i in corpuses_ids] # corpus[1] will be the corpus to compare
cooc_type_id = cache.NodeType['Cooccurrence'].id
cooc_ids = session.query(Node.id).filter(Node.user_id == request.user.id , Node.parent_id==corpuses_ids[1] , Node.type_id == cooc_type_id ).first()
if len(cooc_ids)==0:
return JsonHttpResponse(FinalDict)
# If corpus[1] has a coocurrence.id then lets continue
corpuses_ids = [int(i) for i in corpuses_ids]
print(corpuses_ids)
# corpus[1] will be the corpus to compare
Coocs = {}
import networkx as nx
G = nx.Graph() # I use an undirected graph, because direction doesnt matter here, coocs should be a triangular matrix, so...
ngrams_data1 = session.query(NodeNgramNgram).filter( NodeNgramNgram.node_id==cooc_ids[0], NodeNgramNgram.ngramx_id.in_( node_ids )).all()
for ngram in ngrams_data1: # are there visible nodes in the X-axis of corpus to compare ?
G.add_edge( ngram.ngramx_id , ngram.ngramy_id , weight=ngram.score)
ngrams_data2 = session.query(NodeNgramNgram).filter( NodeNgramNgram.node_id==cooc_ids[0], NodeNgramNgram.ngramy_id.in_( node_ids )).all()
for ngram in ngrams_data2: # are there visible nodes in the Y-axis of corpus to compare ?
if not G.has_edge(ngram.ngramx_id,ngram.ngramy_id):
def get_score(corpus_id):
cooc_type_id = cache.NodeType['Cooccurrence'].id
cooc_ids = (session.query(Node.id)
.filter(Node.user_id == request.user.id
, Node.parent_id==corpus_id
, Node.type_id == cooc_type_id )
.first()
)
if len(cooc_ids)==0:
return JsonHttpResponse(FinalDict)
# If corpus[1] has a coocurrence.id then lets continue
Coocs = {}
G = nx.Graph()
# undirected graph only
# because direction doesnt matter here
# coocs is triangular matrix
ngrams_data = ( session.query(NodeNgramNgram)
.filter( NodeNgramNgram.node_id==cooc_ids[0]
, or_(
NodeNgramNgram.ngramx_id.in_( node_ids )
, NodeNgramNgram.ngramy_id.in_( node_ids )
)
)
.group_by(NodeNgramNgram)
.all()
)
for ngram in ngrams_data :
# are there visible nodes in the X-axis of corpus to compare ?
G.add_edge( ngram.ngramx_id , ngram.ngramy_id , weight=ngram.score)
for e in G.edges_iter():
n1 = e[0]
n2 = e[1]
# print( G[n1][n2]["weight"] , "\t", n1,",",n2 )
if n1 not in Coocs:
Coocs[n1]=0
if n2 not in Coocs:
Coocs[n2]=0
Coocs[n1]+=G[n1][n2]["weight"]
Coocs[n2]+=G[n1][n2]["weight"]
print(corpus_id, ngram)
for e in G.edges_iter() :
n1 = e[0]
n2 = e[1]
# print( G[n1][n2]["weight"] , "\t", n1,",",n2 )
if n1 not in Coocs :
Coocs[n1] = 0
if n2 not in Coocs :
Coocs[n2] = 0
Coocs[n1] += G[n1][n2]["weight"]
Coocs[n2] += G[n1][n2]["weight"]
return(Coocs,G)
Coocs_0,G_0 = get_score( corpuses_ids[0] )
Coocs_1,G_1 = get_score( corpuses_ids[1] )
FinalDict = {}
for node in node_ids:
if node in Coocs:
FinalDict[node] = Coocs[node]/G.degree(node)
# Getting AVG-COOC of each ngram that exists in the cooc-matrix of the compared-corpus.
measure = 'cooc'
if measure == 'jacquard':
for node in node_ids :
if node in G_1.nodes() and node in G_0.nodes():
neighbors_0 = set(G_0.neighbors(node))
neighbors_1 = set(G_1.neighbors(node))
jacquard = len(neighbors_0.intersection(neighbors_1)) / len(neighbors_0.union(neighbors_1))
FinalDict[node] = jacquard * 3
elif node in G_0.nodes() and node not in G_1.nodes() :
FinalDict[node] = 2
elif node not in G_0.nodes() and node in G_1.nodes() :
FinalDict[node] = 1
else:
FinalDict[node] = 0
elif measure == 'cooc':
for node in node_ids :
if node in G_1.nodes() and node in G_0.nodes():
score_0 = Coocs_0[node] / G_0.degree(node)
score_1 = Coocs_1[node] / G_1.degree(node)
FinalDict[node] = 5 * score_0 / score_1
elif node in G_0.nodes() and node not in G_1.nodes() :
FinalDict[node] = 0.5
elif node not in G_0.nodes() and node in G_1.nodes() :
FinalDict[node] = 0.2
else:
FinalDict[node] = 0
print(FinalDict)
#print(node,score)
# Getting AVG-COOC of each ngram that exists in the cooc-matrix of the compared-corpus.
return JsonHttpResponse(FinalDict)
......
......@@ -6,7 +6,8 @@ from admin.utils import PrintException,DebugTime
from gargantext_web.db import NodeNgram,NodeNodeNgram
from gargantext_web.db import *
from gargantext_web.db import get_or_create_node
from gargantext_web.db import get_or_create_node, session,get_session
from parsing.corpustools import *
......@@ -40,6 +41,8 @@ def getNgrams(corpus=None, limit=1000):
'''
getNgrams :: Corpus -> [(Int, String, String, Float)]
'''
# implicit global session
terms = dict()
tfidf_node = get_or_create_node(nodetype='Tfidf (global)'
, corpus=corpus)
......@@ -63,7 +66,7 @@ def getNgrams(corpus=None, limit=1000):
PrintException()
return(terms)
def compute_cvalue(corpus=None, limit=1000):
def compute_cvalue(corpus=None, limit=1000, mysession=None):
'''
computeCvalue :: Corpus
frequency :: String -> Int -> Int
......@@ -122,12 +125,11 @@ def compute_cvalue(corpus=None, limit=1000):
result = cvalueAll()
#print([n for n in result])
session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==cvalue_node.id).delete()
session.commit()
mysession.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==cvalue_node.id).delete()
mysession.commit()
#bulk_insert(NodeNodeNgram, ['nodex_id', 'nodey_id', 'ngram_id', 'score'], [n for n in islice(result,0,100)])
bulk_insert(NodeNodeNgram, ['nodex_id', 'nodey_id', 'ngram_id', 'score'], [n for n in result])
# test
#corpus=session.query(Node).filter(Node.id==244250).first()
#computeCvalue(corpus)
......@@ -5,7 +5,7 @@ from admin.utils import PrintException,DebugTime
from gargantext_web.db import NodeNgram,NodeNodeNgram
from gargantext_web.db import *
from gargantext_web.db import get_or_create_node
from gargantext_web.db import get_or_create_node, get_session
from analysis.lists import Translations, UnweightedList
from parsing.corpustools import *
......@@ -23,7 +23,6 @@ from math import log
from functools import reduce
def getStemmer(corpus):
'''
getStemmer :: Corpus -> Stemmer
......@@ -48,10 +47,11 @@ def getStemmer(corpus):
return(stemIt)
def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem'):
def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem', mysession=None):
'''
group ngrams according to a function (stemming or lemming)
'''
dbg = DebugTime('Corpus #%d - group' % corpus.id)
dbg.show('Group')
......@@ -62,17 +62,19 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem'):
stemIt = getStemmer(corpus)
group_to_insert = set()
node_group = get_or_create_node(nodetype='Group', corpus=corpus)
node_group = get_or_create_node(nodetype='Group', corpus=corpus, mysession=mysession)
miam_to_insert = set()
miam_node = get_or_create_node(nodetype='MiamList', corpus=corpus)
miam_node = get_or_create_node(nodetype='MiamList', corpus=corpus, mysession=mysession)
stop_node = get_or_create_node(nodetype='StopList', corpus=corpus)
stop_node = get_or_create_node(nodetype='StopList', corpus=corpus, mysession=mysession)
#stop_list = UnweightedList(stop_node.id)
Stop = aliased(NodeNgram)
frequency = sa.func.count(NodeNgram.weight)
ngrams = (session.query(Ngram.id, Ngram.terms, frequency )
ngrams = (mysession.query(Ngram.id, Ngram.terms, frequency )
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.join(Node, Node.id == NodeNgram.node_id)
#.outerjoin(Stop, Stop.ngram_id == Ngram.id)
......@@ -84,7 +86,7 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem'):
.limit(limit_sup)
)
stops = (session.query(Ngram.id, Ngram.terms, frequency)
stops = (mysession.query(Ngram.id, Ngram.terms, frequency)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.join(Node, Node.id == NodeNgram.node_id)
.join(Stop, Stop.ngram_id == Ngram.id)
......@@ -125,13 +127,14 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem'):
miam_to_insert.add((miam_node.id, group[key]['mainForm'], 1))
# # Deleting previous groups
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id == node_group.id).delete()
mysession.query(NodeNgramNgram).filter(NodeNgramNgram.node_id == node_group.id).delete()
# # Deleting previous ngrams miam list
session.query(NodeNgram).filter(NodeNgram.node_id == miam_node.id).delete()
session.commit()
mysession.query(NodeNgram).filter(NodeNgram.node_id == miam_node.id).delete()
mysession.commit()
bulk_insert(NodeNgramNgram
, ('node_id', 'ngramx_id', 'ngramy_id', 'score')
, [data for data in group_to_insert])
bulk_insert(NodeNgram, ('node_id', 'ngram_id', 'weight'), [data for data in list(miam_to_insert)])
......@@ -12,7 +12,7 @@ TODO : REFACTOR 2) improvements in ngram creation (?bulk like node_ngram links)
"""
from gargantext_web.db import Ngram, NodeNgram, NodeNodeNgram, NodeNgramNgram
from gargantext_web.db import cache, session, get_or_create_node, bulk_insert
from gargantext_web.db import cache, session,get_session, get_or_create_node, bulk_insert
# import sqlalchemy as sa
from sqlalchemy.sql import func, exists
......@@ -105,6 +105,7 @@ def exportNgramLists(node,filename,delimiter="\t"):
2 <=> mapList
"""
# récupérer d'un coup les objets Ngram (avec terme)
# implicit global session
if len(ngram_ids):
ng_objs = session.query(Ngram).filter(Ngram.id.in_(ngram_ids)).all()
else:
......@@ -131,7 +132,6 @@ def exportNgramLists(node,filename,delimiter="\t"):
# csv_rows = [[ligne1_a, ligne1_b..],[ligne2_a, ligne2_b..],..]
return csv_rows
# on applique notre fonction ng_to_csv sur chaque liste
# ------------------------------------------------------
stop_csv_rows = ngrams_to_csv_rows(stop_ngram_ids,
......@@ -187,7 +187,7 @@ def importNgramLists(node,filename,delimiter="\t", del_lists=[]):
  (and ideally add its logic to analysis.lists.Translations)
'''
# implicit global session
# the node arg has to be a corpus here
if not hasattr(node, "type_id") or node.type_id != 4:
raise TypeError("IMPORT: node argument must be a Corpus Node")
......@@ -378,10 +378,6 @@ def importNgramLists(node,filename,delimiter="\t", del_lists=[]):
print("INFO: added %i elements in the lists indices" % added_nd_ng)
print("INFO: added %i new ngrams in the lexicon" % added_ng)
# à chronométrer:
......
from admin.utils import PrintException
from gargantext_web.db import NodeNgram
from gargantext_web.db import NodeNgram, session,get_session
from gargantext_web.db import *
from parsing.corpustools import *
......@@ -20,6 +20,9 @@ def listIds(typeList=None, user_id=None, corpus_id=None):
typeList :: String, Type of the Node that should be created
[Node] :: List of Int, returned or created by the function
'''
# implicit global session
if typeList is None:
typeList = 'MiamList'
......@@ -57,6 +60,7 @@ def listIds(typeList=None, user_id=None, corpus_id=None):
else:
raise Exception("Usage (Warning): Need corpus_id and user_id")
# Some functions to manage ngrams according to the lists
def listNgramIds(list_id=None, typeList=None,
......@@ -74,6 +78,9 @@ def listNgramIds(list_id=None, typeList=None,
doc_id : to get specific ngrams related to a document with Node.id=doc_id
user_id : needed to create list if it does not exist
'''
# implicit global session
if typeList is None:
typeList = ['MiamList', 'StopList']
elif isinstance(typeList, string):
......@@ -123,6 +130,8 @@ def ngramList(do, list_id, ngram_ids=None) :
ngram_id = [Int] : list of Ngrams id (Ngrams.id)
list_id = Int : list id (Node.id)
'''
# implicit global session
results = []
if do == 'create':
......@@ -174,6 +183,7 @@ def ngrams2miam(user_id=None, corpus_id=None):
'''
Create a Miam List only
'''
# implicit global session
miam_id = listIds(typeList='MiamList', user_id=user_id, corpus_id=corpus_id)[0][0]
print(miam_id)
......@@ -205,6 +215,8 @@ def ngrams2miamBis(corpus):
miam_id = get_or_create_node(corpus=corpus, nodetype='MiamList')
stop_id = get_or_create_node(corpus=corpus,nodetype='StopList')
# implicit global session
query = (session.query(
literal_column(str(miam_id)).label("node_id"),
......@@ -247,6 +259,7 @@ def doList(
lem = equivalent Words which are lemmatized (but the main form)
cvalue = equivalent N-Words according to C-Value (but the main form)
'''
# implicit global session
if type_list not in ['MiamList', 'MainList']:
raise Exception("Type List (%s) not supported, try: \'MiamList\' or \'MainList\'" % type_list)
......@@ -356,4 +369,3 @@ def doList(
return(list_dict[type_list]['id'])
......@@ -5,7 +5,7 @@ from admin.env import *
from admin.utils import PrintException,DebugTime
from gargantext_web.db import NodeNgram,NodeNodeNgram,NodeNgramNgram
from gargantext_web.db import get_or_create_node, session, bulk_insert
from gargantext_web.db import get_or_create_node, get_session, bulk_insert
from sqlalchemy.sql import func
from sqlalchemy import desc, asc, or_, and_, Date, cast, select
......@@ -15,22 +15,23 @@ from sqlalchemy.orm import aliased
from ngram.tools import insert_ngrams
import csv
def compute_mapList(corpus,limit=500,n=1):
def compute_mapList(corpus,limit=500,n=1, mysession=None):
'''
According to Specificities and stoplist,
'''
monograms_part = 0.005
monograms_limit = round(limit * monograms_part)
multigrams_limit = limit - monograms_limit
dbg = DebugTime('Corpus #%d - computing Miam' % corpus.id)
node_miam = get_or_create_node(nodetype='MiamList', corpus=corpus)
node_stop = get_or_create_node(nodetype='StopList', corpus=corpus)
node_group = get_or_create_node(nodetype='Group', corpus=corpus)
node_miam = get_or_create_node(nodetype='MiamList', corpus=corpus, mysession=mysession)
node_stop = get_or_create_node(nodetype='StopList', corpus=corpus, mysession=mysession)
node_group = get_or_create_node(nodetype='Group', corpus=corpus, mysession=mysession)
node_spec = get_or_create_node(nodetype='Specificity', corpus=corpus)
node_spec = get_or_create_node(nodetype='Specificity', corpus=corpus, mysession=mysession)
Miam=aliased(NodeNgram)
Stop=aliased(NodeNgram)
......@@ -38,7 +39,7 @@ def compute_mapList(corpus,limit=500,n=1):
Spec=aliased(NodeNodeNgram)
query = (session.query(Spec.ngram_id, Spec.score)
query = (mysession.query(Spec.ngram_id, Spec.score)
.join(Miam, Spec.ngram_id == Miam.ngram_id)
.join(Ngram, Ngram.id == Spec.ngram_id)
#.outerjoin(Group, Group.ngramy_id == Spec.ngram_id)
......@@ -61,19 +62,19 @@ def compute_mapList(corpus,limit=500,n=1):
.limit(multigrams_limit)
)
stop_ngrams = (session.query(NodeNgram.ngram_id)
stop_ngrams = (mysession.query(NodeNgram.ngram_id)
.filter(NodeNgram.node_id == node_stop.id)
.all()
)
grouped_ngrams = (session.query(NodeNgramNgram.ngramy_id)
grouped_ngrams = (mysession.query(NodeNgramNgram.ngramy_id)
.filter(NodeNgramNgram.node_id == node_group.id)
.all()
)
node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus)
session.query(NodeNgram).filter(NodeNgram.node_id==node_mapList.id).delete()
session.commit()
node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus, mysession=mysession)
mysession.query(NodeNgram).filter(NodeNgram.node_id==node_mapList.id).delete()
mysession.commit()
data = zip(
[node_mapList.id for i in range(1,limit)]
......@@ -87,12 +88,13 @@ def compute_mapList(corpus,limit=500,n=1):
dbg.show('MapList computed')
def insert_miam(corpus, ngrams=None, path_file_csv=None):
def insert_miam(corpus, ngrams=None, path_file_csv=None, mysession=None):
dbg = DebugTime('Corpus #%d - computing Miam' % corpus.id)
node_miam = get_or_create_node(nodetype='MiamList', corpus=corpus)
session.query(NodeNgram).filter(NodeNgram.node_id==node_miam.id).delete()
session.commit()
node_miam = get_or_create_node(nodetype='MiamList', corpus=corpus, mysession=mysession)
mysession.query(NodeNgram).filter(NodeNgram.node_id==node_miam.id).delete()
mysession.commit()
stop_words = set()
miam_words = set()
......@@ -122,7 +124,4 @@ def insert_miam(corpus, ngrams=None, path_file_csv=None):
file_csv.close()
dbg.show('Miam computed')
#corpus = session.query(Node).filter(Node.id==540420).first()
#compute_mapList(corpus)
#insert_miam(corpus=corpus, path_file_csv="Thesaurus_tag.csv")
from gargantext_web.db import session, cache, get_cursor
from gargantext_web.db import get_session, cache, get_cursor
from gargantext_web.db import Node, NodeNgram, NodeNodeNgram
from gargantext_web.db import get_or_create_node
from admin.utils import DebugTime
def compute_occs(corpus):
def compute_occs(corpus, mysession=None):
'''
compute_occs :: Corpus -> IO ()
'''
dbg = DebugTime('Corpus #%d - OCCURRENCES' % corpus.id)
dbg.show('Calculate occurrences')
occs_node = get_or_create_node(nodetype='Occurrences', corpus=corpus)
occs_node = get_or_create_node(nodetype='Occurrences', corpus=corpus, mysession=mysession)
#print(occs_node.id)
(session.query(NodeNodeNgram)
(mysession.query(NodeNodeNgram)
.filter(NodeNodeNgram.nodex_id==occs_node.id).delete()
)
session.commit()
mysession.commit()
db, cursor = get_cursor()
cursor.execute('''
......@@ -46,5 +51,7 @@ def compute_occs(corpus):
)
)
db.commit()
#data = session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==occs_node.id).all()
#print([n for n in data])
......@@ -10,17 +10,17 @@ import numpy as np
import pandas as pd
from analysis.cooccurrences import do_cooc
from gargantext_web.db import session, cache, get_or_create_node, bulk_insert
from gargantext_web.db import get_session, cache, get_or_create_node, bulk_insert
from gargantext_web.db import NodeNgramNgram, NodeNodeNgram
from sqlalchemy import desc, asc, or_, and_, Date, cast, select
def specificity(cooc_id=None, corpus=None, limit=100):
def specificity(cooc_id=None, corpus=None, limit=100, mysession=None):
'''
Compute the specificity, simple calculus.
'''
cooccurrences = (session.query(NodeNgramNgram)
cooccurrences = (mysession.query(NodeNgramNgram)
.filter(NodeNgramNgram.node_id==cooc_id)
.order_by(NodeNgramNgram.score)
.limit(limit)
......@@ -41,36 +41,37 @@ def specificity(cooc_id=None, corpus=None, limit=100):
m = ( xs - ys) / (2 * (x.shape[0] - 1))
m = m.sort(inplace=False)
node = get_or_create_node(nodetype='Specificity',corpus=corpus)
node = get_or_create_node(nodetype='Specificity',corpus=corpus, mysession=mysession)
data = zip( [node.id for i in range(1,m.shape[0])]
, [corpus.id for i in range(1,m.shape[0])]
, m.index.tolist()
, m.values.tolist()
)
session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==node.id).delete()
session.commit()
mysession.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==node.id).delete()
mysession.commit()
bulk_insert(NodeNodeNgram, ['nodex_id', 'nodey_id', 'ngram_id', 'score'], [d for d in data])
return(node.id)
def compute_specificity(corpus,limit=100):
def compute_specificity(corpus,limit=100, mysession=None):
'''
Computing specificities as NodeNodeNgram.
All workflow is the following:
1) Compute the cooc matrix
2) Compute the specificity score, saving it in database, return its Node
'''
dbg = DebugTime('Corpus #%d - specificity' % corpus.id)
list_cvalue = get_or_create_node(nodetype='Cvalue', corpus=corpus, mysession=mysession)
cooc_id = do_cooc(corpus=corpus, cvalue_id=list_cvalue.id,limit=limit, mysession=mysession)
list_cvalue = get_or_create_node(nodetype='Cvalue', corpus=corpus)
cooc_id = do_cooc(corpus=corpus, cvalue_id=list_cvalue.id,limit=limit)
specificity(cooc_id=cooc_id,corpus=corpus,limit=limit)
specificity(cooc_id=cooc_id,corpus=corpus,limit=limit,mysession=mysession)
dbg.show('specificity')
#corpus=session.query(Node).filter(Node.id==244250).first()
#compute_specificity(corpus)
......@@ -4,12 +4,14 @@ from parsing.corpustools import *
from gargantext_web.db import NodeNgram
from sqlalchemy import desc, asc, or_, and_, Date, cast, select
from gargantext_web.db import get_cursor, bulk_insert
from gargantext_web.db import get_cursor, bulk_insert, session,get_session
def get_ngramogram(corpus, limit=None):
"""
Ngram is a composition of ograms (ogram = 1gram)
"""
# implicit global session
try:
query = (session
.query(Ngram.id, Ngram.terms)
......@@ -303,6 +305,7 @@ def stem_corpus(corpus_id=None):
Returns Int as id of the Stem Node
stem_corpus :: Int
'''
# implicit global session
corpus = session.query(Node).filter(Node.id == corpus_id).first()
#print('Number of new ngrams to stem:',
......@@ -329,4 +332,3 @@ def stem_corpus(corpus_id=None):
print('Usage: stem_corpus(corpus_id=corpus.id)')
......@@ -2,7 +2,7 @@ import re
from admin.utils import PrintException
from gargantext_web.db import Node, Ngram, NodeNgram,NodeNodeNgram
from gargantext_web.db import cache, session, get_or_create_node, bulk_insert
from gargantext_web.db import cache, session,get_session, get_or_create_node, bulk_insert
import sqlalchemy as sa
from sqlalchemy.sql import func
......@@ -14,6 +14,8 @@ from ngram.tools import insert_ngrams
from analysis.lists import WeightedList, UnweightedList
def importStopList(node,filename,language='fr'):
# implicit global session
with open(filename, "r") as f:
stop_list = f.read().splitlines()
......@@ -72,17 +74,18 @@ def isStopWord(ngram, stop_words=None):
if test_match(word, regex) is True :
return(True)
def compute_stop(corpus,limit=2000,debug=False):
def compute_stop(corpus,limit=2000,debug=False, mysession=None):
'''
do some statitics on all stop lists of database of the same type
'''
stop_node = get_or_create_node(nodetype='StopList', corpus=corpus)
stop_node_id = get_or_create_node(nodetype='StopList', corpus=corpus, mysession=mysession).id
# TODO do a function to get all stop words with social scores
root = session.query(Node).filter(Node.type_id == cache.NodeType['Root'].id).first()
root_stop_id = get_or_create_node(nodetype='StopList', corpus=root).id
root = mysession.query(Node).filter(Node.type_id == cache.NodeType['Root'].id).first()
root_stop_id = get_or_create_node(nodetype='StopList', corpus=root, mysession=mysession).id
stop_words = (session.query(Ngram.terms)
stop_words = (mysession.query(Ngram.terms)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.filter(NodeNgram.node_id == root_stop_id)
.all()
......@@ -91,7 +94,7 @@ def compute_stop(corpus,limit=2000,debug=False):
#print([n for n in stop_words])
frequency = sa.func.count( NodeNgram.weight )
ngrams = ( session.query( Ngram.id, Ngram.terms, frequency )
ngrams = ( mysession.query( Ngram.id, Ngram.terms, frequency )
.join( NodeNgram, NodeNgram.ngram_id == Ngram.id )
.join( Node, Node.id == NodeNgram.node_id )
.filter( Node.parent_id == corpus.id,
......@@ -108,5 +111,5 @@ def compute_stop(corpus,limit=2000,debug=False):
#print([n for n in ngrams_to_stop])
stop = WeightedList({ n[0] : -1 for n in ngrams_to_stop})
stop.save(stop_node.id)
stop.save(stop_node_id)
#from admin.env import *
from math import log
from gargantext_web.db import *
from gargantext_web.db import get_or_create_node
from gargantext_web.db import get_session, get_or_create_node
from admin.utils import DebugTime
def compute_tfidf(corpus):
def compute_tfidf(corpus, mysession=None):
# compute terms frequency sum
dbg = DebugTime('Corpus #%d - TFIDF' % corpus.id)
dbg.show('calculate terms frequencies sums')
tfidf_node = get_or_create_node(nodetype='Tfidf', corpus=corpus)
tfidf_node = get_or_create_node(nodetype='Tfidf', corpus=corpus, mysession=mysession)
db, cursor = get_cursor()
cursor.execute('''
......@@ -119,18 +120,20 @@ def compute_tfidf(corpus):
# the end!
db.commit()
def compute_tfidf_global(corpus):
def compute_tfidf_global(corpus, mysession=None):
'''
Maybe improve this with:
#http://stackoverflow.com/questions/8674718/best-way-to-select-random-rows-postgresql
'''
dbg = DebugTime('Corpus #%d - tfidf global' % corpus.id)
dbg.show('calculate terms frequencies sums')
tfidf_node = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus)
tfidf_node = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus, mysession=mysession)
# update would be better
session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==tfidf_node.id).delete()
session.commit()
mysession.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==tfidf_node.id).delete()
mysession.commit()
# compute terms frequency sum
db, cursor = get_cursor()
......@@ -257,6 +260,3 @@ def compute_tfidf_global(corpus):
db.commit()
dbg.show('insert tfidf')
#corpus=session.query(Node).filter(Node.id==244250).first()
#compute_tfidf_global(corpus)
from gargantext_web.db import session
from gargantext_web.db import Ngram, NodeNgram, NodeNgramNgram
from gargantext_web.db import get_cursor, bulk_insert, get_or_create_node
from gargantext_web.db import get_cursor, bulk_insert, get_or_create_node, session,get_session
def insert_ngrams_to_list(list_of_ngrams, corpus, list_type='MapList', erase=True):
'''
Works only for Stop and Map
'''
list_node = get_or_create_node(corpus=corpus, nodetype=list_type)
group_node = get_or_create_node(corpus=corpus, nodetype='GroupList')
# implicit global session
list_node = get_or_create_node(corpus=corpus, nodetype=list_type, mysession=session)
group_node = get_or_create_node(corpus=corpus, nodetype='GroupList', mysession=session)
group_list = (session.query(NodeNgramNgram.ngramy_id)
.filter(NodeNgramNgram.id==group_node.id)
.all()
......
......@@ -8,21 +8,21 @@ from gargantext_web.db import get_or_create_node
from ngram.mapList import compute_mapList
from ngram.occurrences import compute_occs
from gargantext_web.db import session , Node , NodeNgram
from gargantext_web.db import Node , NodeNgram
from admin.utils import WorkflowTracking
def ngram_workflow(corpus, n=5000):
def ngram_workflow(corpus, n=5000, mysession=None):
'''
All the workflow to filter the ngrams.
'''
update_state = WorkflowTracking()
update_state.processing_(corpus, "Stop words")
compute_stop(corpus)
update_state.processing_(corpus.id, "Stop words")
compute_stop(corpus, mysession=mysession)
update_state.processing_(corpus, "TF-IDF global score")
compute_tfidf_global(corpus)
update_state.processing_(corpus.id, "TF-IDF global score")
compute_tfidf_global(corpus, mysession=mysession)
part = round(n * 0.9)
......@@ -31,28 +31,28 @@ def ngram_workflow(corpus, n=5000):
# part = round(part * 0.8)
#print('spec part:', part)
update_state.processing_(corpus, "Specificity score")
compute_specificity(corpus,limit=part)
update_state.processing_(corpus.id, "Specificity score")
compute_specificity(corpus,limit=part, mysession=mysession)
part = round(part * 0.8)
limit_inf = round(part * 1)
limit_sup = round(part * 5)
#print(limit_inf,limit_sup)
update_state.processing_(corpus, "Synonyms")
update_state.processing_(corpus.id, "Synonyms")
try:
compute_groups(corpus,limit_inf=limit_inf, limit_sup=limit_sup)
compute_groups(corpus,limit_inf=limit_inf, limit_sup=limit_sup, mysession=mysession)
except Exception as error:
print("Workflow Ngram Group error", error)
pass
update_state.processing_(corpus, "Map list terms")
compute_mapList(corpus,limit=1000) # size
update_state.processing_(corpus.id, "Map list terms")
compute_mapList(corpus,limit=1000, mysession=mysession) # size
update_state.processing_(corpus, "TF-IDF local score")
compute_tfidf(corpus)
update_state.processing_(corpus.id, "TF-IDF local score")
compute_tfidf(corpus, mysession=mysession)
update_state.processing_(corpus, "Occurrences")
compute_occs(corpus)
update_state.processing_(corpus.id, "Occurrences")
compute_occs(corpus, mysession=mysession)
This diff is collapsed.
......@@ -11,8 +11,11 @@ import datetime
import copy
from gargantext_web.views import move_to_trash
from gargantext_web.db import session, cache, Node, NodeNgram, NodeNgramNgram, NodeNodeNgram, Ngram, Hyperdata, Node_Ngram\
from gargantext_web.db import cache, Node, NodeNgram, NodeNgramNgram, NodeNodeNgram, Ngram, Hyperdata, Node_Ngram\
, NodeType, Node_Hyperdata
from gargantext_web.views import session
from gargantext_web.validation import validate, ValidationException
from node import models
......@@ -100,8 +103,10 @@ def Root(request, format=None):
class NodesChildrenNgrams(APIView):
def get(self, request, node_id):
# query ngrams
ParentNode = aliased(Node)
ngrams_query = (session
.query(Ngram.terms, func.sum(Node_Ngram.weight).label('count'))
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
......@@ -138,10 +143,12 @@ class NodesChildrenNgrams(APIView):
for ngram in ngrams_query[offset : offset+limit]
],
})
class NodesChildrenNgramsIds(APIView):
def get(self, request, node_id):
# query ngrams
ParentNode = aliased(Node)
ngrams_query = (session
......@@ -180,13 +187,14 @@ class NodesChildrenNgramsIds(APIView):
for node, count in ngrams_query[offset : offset+limit]
],
})
from gargantext_web.db import get_or_create_node
class Ngrams(APIView):
def get(self, request, node_id):
# query ngrams
ParentNode = aliased(Node)
corpus = session.query(Node).filter(Node.id==node_id).first()
......@@ -303,11 +311,11 @@ class Ngrams(APIView):
],
})
class NodesChildrenDuplicates(APIView):
def _fetch_duplicates(self, request, node_id, extra_columns=None, min_count=1):
# input validation
if extra_columns is None:
extra_columns = []
......@@ -346,6 +354,7 @@ class NodesChildrenDuplicates(APIView):
duplicates_query = duplicates_query.having(func.count() > min_count)
# and now, return it
return duplicates_query
def get(self, request, node_id):
# data to be returned
......@@ -395,6 +404,7 @@ class NodesChildrenDuplicates(APIView):
# retrieve metadata from a given list of parent node
def get_metadata(corpus_id_list):
# query hyperdata keys
ParentNode = aliased(Node)
......@@ -695,6 +705,7 @@ class NodesList(APIView):
authentication_classes = (SessionAuthentication, BasicAuthentication)
def get(self, request):
print("user id : " + str(request.user))
query = (session
.query(Node.id, Node.name, NodeType.name.label('type'))
......@@ -710,9 +721,11 @@ class NodesList(APIView):
node._asdict()
for node in query.all()
]})
class Nodes(APIView):
def get(self, request, node_id):
node = session.query(Node).filter(Node.id == node_id).first()
if node is None:
raise APIException('This node does not exist', 404)
......@@ -725,6 +738,7 @@ class Nodes(APIView):
#'hyperdata': dict(node.hyperdata),
'hyperdata': node.hyperdata,
})
# deleting node by id
# currently, very dangerous.
......@@ -732,6 +746,7 @@ class Nodes(APIView):
# for better constistency...
def delete(self, request, node_id):
user = request.user
node = session.query(Node).filter(Node.id == node_id).first()
......@@ -744,6 +759,7 @@ class Nodes(APIView):
except Exception as error:
msgres ="error deleting : " + node_id + str(error)
class CorpusController:
@classmethod
......@@ -764,7 +780,6 @@ class CorpusController:
# raise Http403("Unauthorized access.")
return corpus
@classmethod
def ngrams(cls, request, node_id):
......@@ -773,6 +788,7 @@ class CorpusController:
# build query
ParentNode = aliased(Node)
query = (session
.query(Ngram.terms, func.count('*'))
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
......@@ -799,3 +815,4 @@ class CorpusController:
)
else:
raise ValidationError('Unrecognized "format=%s", should be "csv" or "json"' % (format, ))
from rest_v1_0.api import APIView, APIException, JsonHttpResponse, CsvHttpResponse
from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from gargantext_web.db import session, Node
from gargantext_web.db import session,get_session, Node
from analysis.functions import get_cooc
class Graph(APIView):
authentication_classes = (SessionAuthentication, BasicAuthentication)
def get(self, request, corpus_id):
'''
Graph.get :: Get graph data as REST api.
......@@ -13,6 +14,8 @@ class Graph(APIView):
graph?field1=ngrams&field2=ngrams&
graph?field1=ngrams&field2=ngrams&start=''&end=''
'''
# implicit global session
field1 = request.GET.get('field1', 'ngrams')
field2 = request.GET.get('field2', 'ngrams')
......
......@@ -16,8 +16,7 @@ from gargantext_web.db import cache
from gargantext_web.validation import validate, ValidationException
from gargantext_web.db import session, Node, NodeNgram, NodeNgramNgram\
, NodeNodeNgram, Ngram, Hyperdata, Node_Ngram, get_or_create_node
from gargantext_web.db import session,get_session, Node, NodeNgram, NodeNgramNgram, NodeNodeNgram, Ngram, Hyperdata, Node_Ngram, get_or_create_node
def DebugHttpResponse(data):
......@@ -82,6 +81,8 @@ class List(APIView):
def get_metadata ( self , ngram_ids , parent_id ):
# implicit global session
start_ = time.time()
nodes_ngrams = session.query(Ngram.id , Ngram.terms).filter( Ngram.id.in_( list(ngram_ids.keys()))).all()
......@@ -121,10 +122,10 @@ class List(APIView):
return { "data":ngram_ids , "secs":(end_ - start_) }
def get(self, request, corpus_id , list_name ):
if not request.user.is_authenticated():
return JsonHttpResponse( {"request" : "forbidden"} )
# implicit global session
corpus = session.query(Node).filter( Node.id==corpus_id ).first()
# if corpus==None:
# return JsonHttpResponse( {"request" : "forbidden"} )
......@@ -162,6 +163,7 @@ class Ngrams(APIView):
def get(self, request, node_id):
if not request.user.is_authenticated():
return JsonHttpResponse( {"request" : "forbidden"} )
# implicit global session
corpus = session.query(Node).filter( Node.id==node_id).first()
# if corpus==None:
# return JsonHttpResponse( {"request" : "forbidden"} )
......@@ -340,6 +342,9 @@ class Group(APIView):
'''
def get_group_id(self , node_id , user_id):
node_id = int(node_id)
# implicit global session
corpus = session.query(Node).filter( Node.id==node_id).first()
if corpus==None: return None
group = get_or_create_node(corpus=corpus, nodetype='Group')
......@@ -370,6 +375,9 @@ class Group(APIView):
import networkx as nx
G = nx.Graph()
DG = nx.DiGraph()
# implicit global session
ngrams_ngrams = (session
.query(NodeNgramNgram)
.filter(NodeNgramNgram.node_id==group_id)
......@@ -409,13 +417,15 @@ class Group(APIView):
# groups["nodes"] = get_occtfidf( ngrams , request.user.id , corpus_id , "Group")
return JsonHttpResponse( { "data" : groups } )
def post(self, request, node_id):
return JsonHttpResponse( ["hola" , "mundo"] )
def delete(self, request, corpus_id):
# input validation
# implicit global session
input = validate(request.DATA, {'data' : {'source': int, 'target': list}})
group_id = get_group_id(corpus_id , request.user.id)
......@@ -434,6 +444,7 @@ class Group(APIView):
raise APIException('Missing parameter: "{\'data\' : [\'source\': Int, \'target\': [Int]}"', 400)
def put(self , request , corpus_id ):
# implicit global session
group_rawreq = dict(request.data)
......@@ -448,6 +459,8 @@ class Group(APIView):
gdict.append(subform)
GDict.append( gdict )
existing_group_id = self.get_group_id(corpus_id , request.user.id)
# implicit global session
grouped_ngrams = (session
.query(NodeNgramNgram)
.filter(NodeNgramNgram.node_id==existing_group_id)
......@@ -512,6 +525,7 @@ class Group(APIView):
nodengramngram = NodeNgramNgram(node_id=existing_group_id, ngramx_id=n1 , ngramy_id=n2, score=1.0)
session.add(nodengramngram)
session.commit()
# [ - - - / doing links of new clique and adding to DB - - - ] #
......@@ -571,6 +585,7 @@ class Keep(APIView):
authentication_classes = (SessionAuthentication, BasicAuthentication)
def get (self, request, corpus_id):
# implicit global session
# list_id = session.query(Node).filter(Node.id==list_id).first()
corpus = session.query(Node).filter( Node.id==corpus_id ).first()
node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus )
......@@ -579,11 +594,12 @@ class Keep(APIView):
for node in nodes_in_map:
results[node.ngram_id] = True
return JsonHttpResponse(results)
def put (self, request, corpus_id):
"""
Add ngrams to map list
"""
# implicit global session
group_rawreq = dict(request.data)
ngram_2add = [int(i) for i in list(group_rawreq.keys())]
corpus = session.query(Node).filter( Node.id==corpus_id ).first()
......@@ -598,6 +614,8 @@ class Keep(APIView):
"""
Delete ngrams from the map list
"""
# implicit global session
group_rawreq = dict(request.data)
# print("group_rawreq:")
# print(group_rawreq)
......@@ -616,4 +634,3 @@ class Keep(APIView):
return JsonHttpResponse(True, 201)
......@@ -29,6 +29,7 @@ import threading
from node.admin import CustomForm
from gargantext_web.db import *
from gargantext_web.db import get_sessionmaker, session,get_session
from gargantext_web.settings import DEBUG, MEDIA_ROOT
from rest_v1_0.api import JsonHttpResponse
......@@ -45,7 +46,8 @@ def getGlobalStats(request ):
alist = ["bar","foo"]
if request.method == "POST":
N = 1000
#N = 1000
N = 300
query = request.POST["query"]
print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" query =", query )
print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" N =", N )
......@@ -83,14 +85,12 @@ def getGlobalStatsISTEXT(request ):
def doTheQuery(request , project_id):
alist = ["hola","mundo"]
# implicit global session
# do we have a valid project id?
try:
project_id = int(project_id)
except ValueError:
raise Http404()
# do we have a valid project?
project = (session
.query(Node)
......@@ -134,7 +134,7 @@ def doTheQuery(request , project_id):
)
session.add(corpus)
session.commit()
corpus_id = corpus.id
# """
# urlreqs: List of urls to query.
# - Then, to each url in urlreqs you do:
......@@ -170,9 +170,9 @@ def doTheQuery(request , project_id):
try:
if not DEBUG:
apply_workflow.apply_async((corpus.id,),)
apply_workflow.apply_async((corpus_id,),)
else:
thread = threading.Thread(target=apply_workflow, args=(corpus.id, ), daemon=True)
thread = threading.Thread(target=apply_workflow, args=(corpus_id, ), daemon=True)
thread.start()
except Exception as error:
print('WORKFLOW ERROR')
......@@ -188,7 +188,7 @@ def testISTEX(request , project_id):
print("testISTEX:")
print(request.method)
alist = ["bar","foo"]
# implicit global session
# do we have a valid project id?
try:
project_id = int(project_id)
......@@ -247,7 +247,7 @@ def testISTEX(request , project_id):
)
session.add(corpus)
session.commit()
corpus_id = corpus.id
ensure_dir(request.user)
tasks = MedlineFetcher()
......@@ -276,9 +276,9 @@ def testISTEX(request , project_id):
###########################
try:
if not DEBUG:
apply_workflow.apply_async((corpus.id,),)
apply_workflow.apply_async((corpus_id,),)
else:
thread = threading.Thread(target=apply_workflow, args=(corpus.id, ), daemon=True)
thread = threading.Thread(target=apply_workflow, args=(corpus_id, ), daemon=True)
thread.start()
except Exception as error:
print('WORKFLOW ERROR')
......@@ -289,4 +289,3 @@ def testISTEX(request , project_id):
data = [query_string,query,N]
return JsonHttpResponse(data)
......@@ -69,4 +69,4 @@ th a:hover {
.dynatable-disabled-page:hover {
background: none;
color: #999;
}
\ No newline at end of file
}
This diff is collapsed.
......@@ -25,13 +25,34 @@
-o-transition: height 0.1s;
transition: height 0.1s;
}
th { color: #fff; }
th {
color: #fff;
font-weight: normal;
font-size: 0.9em;
vertical-align: top ;
}
/* specific selector to override equally specific bootstrap.css */
.table > thead > tr > th { vertical-align: top ; }
th a {
color: #fff;
font-weight: normal;
font-style: italic;
font-size: 0.9em;
}
th p.note {
color: #ccc;
font-size: 0.6em;
margin: 1em 0 0 0 ;
}
th p.note > input {
float: left;
margin: 0 .2em 0 0 ;
}
th p.note > label {
float: left;
}
tr:hover {
......@@ -171,7 +192,7 @@ input[type=radio]:checked + label {
<div class="panel-heading">
<h4 class="panel-title">
<a data-toggle="collapse" data-parent="#accordion" href="#collapseOne">
<p id="corpusdisplayer" onclick='Final_UpdateTable("click")' class="btn btn-primary btn-lg" style="width:200px; margin:0 auto; display:block;">Open Folder</h2></p>
<p id="corpusdisplayer" onclick='Final_UpdateTable("click")' class="btn btn-primary btn-lg" style="width:200px; margin:0 auto; display:block;">Close Term List</h2></p>
</a>
</h4>
</div>
......@@ -194,7 +215,7 @@ input[type=radio]:checked + label {
</p> -->
<p align="right">
<!-- <button id="Clean_All" class="btn btn-warning">Clean</button> -->
<button id="Save_All" class="btn btn-primary">Save</button>
<button id="Save_All" class="btn btn-primary">Save changes permanently</button>
</p>
</div>
......@@ -247,66 +268,6 @@ input[type=radio]:checked + label {
</div>
<div id="pre_savechanges" class="modal fade">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<h3 class="modal-title">Do you want to apply these to the whole Project as well?:</h3>
</div>
<div class="modal-body">
<div id="stoplist_content">
</div>
<!--
<ul class="nav nav-tabs">
<li class="active"><a id="stoplist" href="#stoplist_content">Stop List</a></li>
<li><a id="maplist" href="#maplist_content">Map List</a></li>
<li><a id="grouplist" href="#grouplist_content">Group List</a></li>
</ul>
<div class="tab-content">
<div id="stoplist_content" class="tab-pane fade in active">
<ul>
<li>jiji01</li>
<li>jiji02</li>
<li>jiji03</li>
</ul>
</div>
<div id="maplist_content" class="tab-pane fade">
qowieuoqiwueowq
</div>
<div id="grouplist_content" class="tab-pane fade">
asdhasjkdhasjdh
</div>
</div>
-->
<div class="modal-footer">
<button onclick="SaveGlobalChanges(false)" id="nope" type="button" class="btn btn-default" data-dismiss="modal">No</button>
<button onclick="SaveGlobalChanges(true)" id="yep" type="button" class="btn btn-primary">Yes</button>
</div>
</div>
</div>
</div>
</div>
<div id="filter_search" style="visibility:hidden">
<select id="example-single-optgroups" onchange="SearchFilters(this);">
......
......@@ -299,7 +299,7 @@
var origQuery = $("#id_name").val()
console.log("printing the results:")
console.log(origQuery)
testISTEX(origQuery.replace(" ","+"),1000)
testISTEX(origQuery.replace(" ","+"),300)
}
}
else {
......@@ -349,7 +349,7 @@
console.log("enabling "+"#"+value.id)
$("#"+value.id).attr('onclick','getGlobalResults(this);');
// $("#submit_thing").prop('disabled' , false)
$("#submit_thing").html("Process a 1000 sample!")
$("#submit_thing").html("Process a 300 sample!")
thequeries = data
var N=0,k=0;
......@@ -388,7 +388,7 @@
console.log("enabling "+"#"+value.id)
$("#"+value.id).attr('onclick','getGlobalResults(this);');
// $("#submit_thing").prop('disabled' , false)
$("#submit_thing").html("Process a 1000 sample!")
$("#submit_thing").html("Process a 300 sample!")
thequeries = data
var N=data.length,k=0;
......
from django.shortcuts import redirect
from django.shortcuts import render
from django.db import transaction
from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseForbidden
# from django.shortcuts import render
# from django.db import transaction
#
from django.http import Http404, HttpResponse #, HttpResponseRedirect, HttpResponseForbidden
from django.template.loader import get_template
from django.template import Context
......@@ -13,49 +13,41 @@ from django.db import connection
# Node, NodeType, Node_Resource, Project, Corpus, \
# Ngram, Node_Ngram, NodeNgramNgram, NodeNodeNgram
from node.admin import CorpusForm, ProjectForm, ResourceForm, CustomForm
from django.contrib.auth.models import User
# from node.admin import CorpusForm, ProjectForm, ResourceForm, CustomForm
#
# from django.contrib.auth.models import User
#
import datetime
from itertools import *
from dateutil.parser import parse
from django.db import connection
from django import forms
from collections import defaultdict
from parsing.FileParsers import *
import os
# from itertools import *
# from dateutil.parser import parse
#
# from django.db import connection
# from django import forms
#
#
# from collections import defaultdict
#
# from parsing.FileParsers import *
# import os
import json
import math
# import math
# SOME FUNCTIONS
from gargantext_web import settings
#
# from django.http import *
# from django.shortcuts import render_to_response,redirect
# from django.template import RequestContext
from django.http import *
from django.shortcuts import render_to_response,redirect
from django.template import RequestContext
from django.contrib.auth.decorators import login_required
from django.contrib.auth import authenticate, login, logout
from scrappers.scrap_pubmed.admin import Logger
from gargantext_web.db import *
from sqlalchemy import or_, func
# from gargantext_web.db import *
from gargantext_web import about
from gargantext_web.db import session,get_session, cache, Node, NodeNgram
from sqlalchemy import func
from rest_v1_0.api import JsonHttpResponse
from ngram.lists import listIds, listNgramIds, ngramList , doList
def get_ngrams(request , project_id , corpus_id ):
if not request.user.is_authenticated():
return redirect('/login/?next=%s' % request.path)
......@@ -73,6 +65,8 @@ def get_ngrams(request , project_id , corpus_id ):
project = cache.Node[int(project_id)]
corpus = cache.Node[int(corpus_id)]
type_doc_id = cache.NodeType['Document'].id
# implicit global session
number = session.query(func.count(Node.id)).filter(Node.parent_id==corpus_id, Node.type_id==type_doc_id).all()[0][0]
myamlist_type_id = cache.NodeType['MiamList'].id
miamlist = session.query(Node).filter(Node.parent_id==corpus_id , Node.type_id == myamlist_type_id ).first()
......@@ -129,6 +123,8 @@ def get_journals(request , project_id , corpus_id ):
project = cache.Node[int(project_id)]
corpus = cache.Node[int(corpus_id)]
type_doc_id = cache.NodeType['Document'].id
# implicit global session
number = session.query(func.count(Node.id)).filter(Node.parent_id==corpus_id, Node.type_id==type_doc_id).all()[0][0]
the_query = """ SELECT hyperdata FROM node_node WHERE id=%d """ % ( int(corpus_id) )
......@@ -158,7 +154,10 @@ def get_journals_json(request , project_id, corpus_id ):
user_id = request.user.id
document_type_id = cache.NodeType['Document'].id
# implicit global session
documents = session.query(Node).filter( Node.parent_id==corpus_id , Node.type_id == document_type_id ).all()
for doc in documents:
if "journal" in doc.hyperdata:
journal = doc.hyperdata["journal"]
......@@ -167,25 +166,20 @@ def get_journals_json(request , project_id, corpus_id ):
JournalsDict[journal] += 1
return JsonHttpResponse(JournalsDict)
from gargantext_web.db import session, cache, Node, NodeNgram
from sqlalchemy import or_, func
from sqlalchemy.orm import aliased
def get_corpuses( request , node_ids ):
ngrams = [int(i) for i in node_ids.split("+") ]
# implicit global session
results = session.query(Node.id,Node.hyperdata).filter(Node.id.in_(ngrams) ).all()
for r in results:
print(r)
return JsonHttpResponse( [ "tudo" , "bem" ] )
def get_cores( request ):
import multiprocessing
cpus = multiprocessing.cpu_count()
return JsonHttpResponse( {"data":cpus} )
def get_corpus_state( request , corpus_id ):
if not request.user.is_authenticated():
return JsonHttpResponse( {"request" : "forbidden"} )
......@@ -200,8 +194,12 @@ def get_corpus_state( request , corpus_id ):
# processing = corpus.hyperdata['Processing']
return JsonHttpResponse( processing )
def get_groups( request ):
"""
User groups for current user.id
route: /get_groups
"""
if not request.user.is_authenticated():
return JsonHttpResponse( {"request" : "forbidden"} )
......@@ -222,11 +220,9 @@ def get_groups( request ):
return JsonHttpResponse( common_users )
def graph_share(request, generic=100, specific=100):
if request.method== 'GET' and "token" in request.GET:
import json
# import json
le_token = json.loads(request.GET["token"])[0]
import base64
le_query = base64.b64decode(le_token).decode("utf-8")
......@@ -237,11 +233,15 @@ def graph_share(request, generic=100, specific=100):
# resource_id = cache.ResourceType["Pubmed (xml format)"].id
# corpus = session.query(Node).filter( Node.type_id==resource_id , Node.user_id==user_id , Node.id==corpus_id , Node.type_id == cache.NodeType['Corpus'].id ).first()
# if corpus==None: return JsonHttpResponse( {"request" : "forbidden"} )
# implicit global session
miamlist = session.query(Node).filter( Node.user_id==user_id , Node.parent_id==corpus_id , Node.type_id == cache.NodeType['MiamList'].id ).first()
if miamlist==None: return JsonHttpResponse( {"request" : "forbidden"} )
graphurl = "node_link_share.json?token="+request.GET["token"]
date = datetime.datetime.now()
t = get_template('explorer_share.html')
html = t.render(Context({\
'debug': settings.DEBUG,
'date' : date,\
......@@ -252,11 +252,10 @@ def graph_share(request, generic=100, specific=100):
return JsonHttpResponse(request.GET["token"])
def node_link_share(request):
data = { "request" : "error" }
if request.method== 'GET' and "token" in request.GET:
import json
# import json
le_token = json.loads(request.GET["token"])[0]
import base64
le_query = base64.b64decode(le_token).decode("utf-8")
......@@ -268,6 +267,8 @@ def node_link_share(request):
from analysis.functions import get_cooc
data = []
# implicit global session
corpus = session.query(Node).filter( Node.user_id==user_id , Node.id==corpus_id).first()
data = get_cooc(request=request, corpus=corpus, type="node_link")
......@@ -301,7 +302,10 @@ def share_resource(request , resource_id , group_id) :
# [ getting all childs ids of this project ]
ids2changeowner = [ project2share.id ]
# implicit global session
corpuses = session.query(Node.id).filter(Node.user_id == request.user.id, Node.parent_id==resource_id , Node.type_id == cache.NodeType["Corpus"].id ).all()
for corpus in corpuses:
ids2changeowner.append(corpus.id)
lists = session.query(Node.id,Node.name).filter(Node.user_id == request.user.id, Node.parent_id==corpus.id ).all()
......@@ -323,4 +327,4 @@ def share_resource(request , resource_id , group_id) :
connection.close()
# [ / changing owner ]
return JsonHttpResponse( results )
\ No newline at end of file
return JsonHttpResponse( results )
from admin.env import *
from gargantext_web.db import session, cache, get_or_create_node
from gargantext_web.db import Node, NodeHyperdata, Hyperdata, Ngram, NodeNgram, NodeNgramNgram, NodeHyperdataNgram
from sqlalchemy import func, alias, asc, desc
import sqlalchemy as sa
from sqlalchemy.orm import aliased
from ngram.group import compute_groups, getStemmer
# corpus = Corpus(272)
corpus_id = 540420
corpus = session.query(Node).filter(Node.id==corpus_id).first()
#group = get_or_create_node(corpus=corpus, nodetype="Group")
stop_id = get_or_create_node(nodetype='StopList',corpus=corpus).id
miam_id = get_or_create_node(nodetype='MiamList',corpus=corpus).id
somme = sa.func.count(NodeNgram.weight)
ngrams = (session.query(Ngram.id, Ngram.terms, somme )
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.join(Node, Node.id == NodeNgram.node_id)
.filter(Node.parent_id==corpus_id, Node.type_id==cache.NodeType['Document'].id)
.group_by(Ngram.id)
.order_by(desc(somme))
.limit(100000)
)
stops = (session.query(Ngram.id, Ngram.terms)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.filter(NodeNgram.node_id == stop_id)
.all()
)
miams = (session.query(Ngram.id, Ngram.terms, somme)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.filter(NodeNgram.node_id == miam_id)
.group_by(Ngram.id, Ngram.terms)
.order_by(desc(somme))
.all()
)
stemmer = getStemmer(corpus)
ws = ['honeybee', 'honeybees']
print(stemmer(ws[0]) == stemmer(ws[1]))
#
#for n in miams:
# if n[1] == 'bees':
# print("!" * 30)
# print(n)
# print("-" * 30)
# else:
# print(n)
#
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment