Commit 47f6061a authored by PkSM3's avatar PkSM3

Merge branch 'unstable' of ssh://delanoe.org:1979/gargantext into samuel

parents b7d18edb 8099d381
...@@ -47,13 +47,12 @@ def PrintException(): ...@@ -47,13 +47,12 @@ def PrintException():
class WorkflowTracking: class WorkflowTracking:
def __init__( self ): def __init__( self ):
self.hola = "mundo" self.hola = "mundo"
def processing_(self , corpus , step): def processing_(self , corpus_id , step):
try: try:
the_query = """ UPDATE node_node SET hyperdata=\'{ \"%s\" : \"%s\"}\' WHERE id=%d """ % ( "Processing", step , corpus.id ) the_query = """ UPDATE node_node SET hyperdata=\'{ \"%s\" : \"%s\"}\' WHERE id=%d """ % ( "Processing", step , corpus_id )
cursor = connection.cursor() cursor = connection.cursor()
try: try:
cursor.execute(the_query) cursor.execute(the_query)
......
...@@ -5,7 +5,7 @@ from sqlalchemy.sql import func ...@@ -5,7 +5,7 @@ from sqlalchemy.sql import func
from gargantext_web.db import Node, Ngram, NodeNgram, NodeNgramNgram, \ from gargantext_web.db import Node, Ngram, NodeNgram, NodeNgramNgram, \
NodeNodeNgram, NodeHyperdataNgram, NodeHyperdata, Hyperdata NodeNodeNgram, NodeHyperdataNgram, NodeHyperdata, Hyperdata
from gargantext_web.db import session, cache, get_or_create_node, bulk_insert from gargantext_web.db import get_session, cache, get_or_create_node, bulk_insert
from analysis.lists import WeightedMatrix, UnweightedList, Translations from analysis.lists import WeightedMatrix, UnweightedList, Translations
import inspect import inspect
import datetime import datetime
...@@ -18,7 +18,8 @@ def do_cooc(corpus=None ...@@ -18,7 +18,8 @@ def do_cooc(corpus=None
, start=None, end=None , start=None, end=None
, limit=1000 , limit=1000
, isMonopartite=True , isMonopartite=True
, hapax = 3): , hapax = 3
, mysession=None):
''' '''
Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id
For the moment list of paramters are not supported because, lists need to For the moment list of paramters are not supported because, lists need to
...@@ -40,30 +41,35 @@ def do_cooc(corpus=None ...@@ -40,30 +41,35 @@ def do_cooc(corpus=None
# Security test # Security test
field1,field2 = str(field1), str(field2) field1,field2 = str(field1), str(field2)
if mysession is None:
from gargantext_web.db import session
mysession = session
# Get node # Get node
node_cooc = get_or_create_node(nodetype='Cooccurrence', corpus=corpus node_cooc = get_or_create_node(nodetype='Cooccurrence', corpus=corpus
, name_str="Cooccurrences corpus " \ , name_str="Cooccurrences corpus " \
+ str(corpus.id) + "list_id: " + str(miam_id) + str(corpus.id) + "list_id: " + str(miam_id)
#, hyperdata={'field1': field1, 'field2':field2} #, hyperdata={'field1': field1, 'field2':field2}
) , mysession=mysession)
# BEGIN # BEGIN
# Saving the parameters of the analysis in the Node JSONB hyperdata field # Saving the parameters of the analysis in the Node JSONB hyperdata field
args, _, _, parameters = inspect.getargvalues(inspect.currentframe()) args, _, _, parameters = inspect.getargvalues(inspect.currentframe())
hyperdata = dict() # hyperdata = dict()
#
for parameter in parameters.keys(): # for parameter in parameters.keys():
if parameter != 'corpus' and parameter != 'node_cooc': # if parameter != 'corpus' and parameter != 'node_cooc':
hyperdata[parameter] = parameters[parameter] # hyperdata[parameter] = parameters[parameter]
#
node_cooc.hyperdata = hyperdata # node_cooc.hyperdata = hyperdata
session.add(node_cooc) #
session.commit() mysession.add(node_cooc)
mysession.commit()
# END # END
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==node_cooc.id).delete() mysession.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==node_cooc.id).delete()
session.commit() mysession.commit()
doc_id = cache.NodeType['Document'].id doc_id = cache.NodeType['Document'].id
...@@ -75,7 +81,7 @@ def do_cooc(corpus=None ...@@ -75,7 +81,7 @@ def do_cooc(corpus=None
if isMonopartite : if isMonopartite :
NodeNgramY = aliased(NodeNgram) NodeNgramY = aliased(NodeNgram)
cooc_query = (session.query(NodeNgramX.ngram_id, NodeNgramY.ngram_id, cooc_score) cooc_query = (mysession.query(NodeNgramX.ngram_id, NodeNgramY.ngram_id, cooc_score)
.join(Node, Node.id == NodeNgramX.node_id) .join(Node, Node.id == NodeNgramX.node_id)
.join(NodeNgramY, NodeNgramY.node_id == Node.id) .join(NodeNgramY, NodeNgramY.node_id == Node.id)
.filter(Node.parent_id==corpus.id, Node.type_id==doc_id) .filter(Node.parent_id==corpus.id, Node.type_id==doc_id)
...@@ -83,7 +89,7 @@ def do_cooc(corpus=None ...@@ -83,7 +89,7 @@ def do_cooc(corpus=None
else : else :
NodeNgramY = aliased(NodeNgram) NodeNgramY = aliased(NodeNgram)
cooc_query = (session.query(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id, cooc_score) cooc_query = (mysession.query(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id, cooc_score)
.join(Node, Node.id == NodeHyperdataNgram.node_id) .join(Node, Node.id == NodeHyperdataNgram.node_id)
.join(NodeNgramY, NodeNgramY.node_id == Node.id) .join(NodeNgramY, NodeNgramY.node_id == Node.id)
.join(Hyperdata, Hyperdata.id == NodeHyperdataNgram.hyperdata_id) .join(Hyperdata, Hyperdata.id == NodeHyperdataNgram.hyperdata_id)
...@@ -167,7 +173,7 @@ def do_cooc(corpus=None ...@@ -167,7 +173,7 @@ def do_cooc(corpus=None
# Select according some scores # Select according some scores
if cvalue_id is not None : if cvalue_id is not None :
#miam = get_or_create_node(nodetype='Cvalue', corpus=corpus) #miam = get_or_create_node(nodetype='Cvalue', corpus=corpus)
cvalue_list = UnweightedList(session.query(NodeNodeNgram.ngram_id) cvalue_list = UnweightedList(mysession.query(NodeNodeNgram.ngram_id)
.filter(NodeNodeNgram.nodex_id == cvalue_id).all() .filter(NodeNodeNgram.nodex_id == cvalue_id).all()
) )
......
...@@ -9,7 +9,6 @@ import numpy as np ...@@ -9,7 +9,6 @@ import numpy as np
import collections import collections
session = get_session()
def result2dict(query): def result2dict(query):
results = dict() results = dict()
...@@ -27,6 +26,7 @@ def diachronic_specificity(corpus_id, terms, order=True): ...@@ -27,6 +26,7 @@ def diachronic_specificity(corpus_id, terms, order=True):
Values are measure to indicate diachronic specificity. Values are measure to indicate diachronic specificity.
Nowadays, the measure is rather simple: distance of frequency of period from mean of frequency of all corpus. Nowadays, the measure is rather simple: distance of frequency of period from mean of frequency of all corpus.
''' '''
# implicit global session
ngram_frequency_query = (session ngram_frequency_query = (session
.query(Node.hyperdata['publication_year'], func.count('*')) .query(Node.hyperdata['publication_year'], func.count('*'))
.join(NodeNgram, Node.id == NodeNgram.node_id) .join(NodeNgram, Node.id == NodeNgram.node_id)
...@@ -64,7 +64,6 @@ def diachronic_specificity(corpus_id, terms, order=True): ...@@ -64,7 +64,6 @@ def diachronic_specificity(corpus_id, terms, order=True):
else: else:
return relative_terms_count return relative_terms_count
# For tests # For tests
# diachronic_specificity(102750, "bayer", order=True) # diachronic_specificity(102750, "bayer", order=True)
# diachronic_specificity(26128, "bee", order=True) # diachronic_specificity(26128, "bee", order=True)
from admin.utils import PrintException from admin.utils import PrintException
from gargantext_web.db import * from gargantext_web.db import *
from gargantext_web.db import get_or_create_node from gargantext_web.db import get_or_create_node, session,get_session
from collections import defaultdict from collections import defaultdict
from operator import itemgetter from operator import itemgetter
...@@ -31,6 +31,8 @@ def do_distance(cooc_id, field1=None, field2=None, isMonopartite=True, distance= ...@@ -31,6 +31,8 @@ def do_distance(cooc_id, field1=None, field2=None, isMonopartite=True, distance=
do_distance :: Int -> (Graph, Partition, {ids}, {weight}) do_distance :: Int -> (Graph, Partition, {ids}, {weight})
''' '''
# implicit global session
authorized = ['conditional', 'distributional', 'cosine'] authorized = ['conditional', 'distributional', 'cosine']
if distance not in authorized: if distance not in authorized:
distance = 'conditional' distance = 'conditional'
...@@ -203,7 +205,6 @@ def do_distance(cooc_id, field1=None, field2=None, isMonopartite=True, distance= ...@@ -203,7 +205,6 @@ def do_distance(cooc_id, field1=None, field2=None, isMonopartite=True, distance=
def getWeight(item): def getWeight(item):
return item[1] return item[1]
# #
......
...@@ -9,7 +9,7 @@ from math import log ...@@ -9,7 +9,7 @@ from math import log
import scipy import scipy
from gargantext_web.db import get_or_create_node from gargantext_web.db import session,get_session, get_or_create_node,session
from analysis.cooccurrences import do_cooc from analysis.cooccurrences import do_cooc
from analysis.distance import do_distance from analysis.distance import do_distance
...@@ -39,12 +39,14 @@ def get_cooc(request=None, corpus=None ...@@ -39,12 +39,14 @@ def get_cooc(request=None, corpus=None
''' '''
get_ccoc : to compute the graph. get_ccoc : to compute the graph.
''' '''
# implicit global session
data = {} data = {}
#if session.query(Node).filter(Node.type_id==type_cooc_id, Node.parent_id==corpus_id).first() is None: #if session.query(Node).filter(Node.type_id==type_cooc_id, Node.parent_id==corpus_id).first() is None:
print("Cooccurrences do not exist yet, creating it.") print("Cooccurrences do not exist yet, creating it.")
miam_id = get_or_create_node(nodetype='MapList', corpus=corpus).id miam_id = get_or_create_node(nodetype='MapList', corpus=corpus, mysession=session).id
stop_id = get_or_create_node(nodetype='StopList', corpus=corpus).id stop_id = get_or_create_node(nodetype='StopList', corpus=corpus, mysession=session).id
group_id = get_or_create_node(nodetype='Group', corpus=corpus).id group_id = get_or_create_node(nodetype='Group', corpus=corpus, mysession=session).id
SamuelFlag = False SamuelFlag = False
# if field1 == field2 == 'ngrams' : # if field1 == field2 == 'ngrams' :
......
from collections import defaultdict from collections import defaultdict
from math import sqrt from math import sqrt
from gargantext_web.db import session, NodeNgram, NodeNgramNgram, bulk_insert from gargantext_web.db import session,get_session, NodeNgram, NodeNgramNgram, bulk_insert
class BaseClass: class BaseClass:
...@@ -67,6 +67,7 @@ class Translations(BaseClass): ...@@ -67,6 +67,7 @@ class Translations(BaseClass):
self.items = defaultdict(int) self.items = defaultdict(int)
self.groups = defaultdict(set) self.groups = defaultdict(set)
elif isinstance(other, int): elif isinstance(other, int):
# implicit global session
query = (session query = (session
.query(NodeNgramNgram.ngramy_id, NodeNgramNgram.ngramx_id) .query(NodeNgramNgram.ngramy_id, NodeNgramNgram.ngramx_id)
.filter(NodeNgramNgram.node_id == other) .filter(NodeNgramNgram.node_id == other)
...@@ -118,6 +119,7 @@ class Translations(BaseClass): ...@@ -118,6 +119,7 @@ class Translations(BaseClass):
def save(self, node_id): def save(self, node_id):
# delete previous data # delete previous data
# implicit global session
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id == node_id).delete() session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id == node_id).delete()
session.commit() session.commit()
# insert new data # insert new data
...@@ -134,6 +136,7 @@ class WeightedMatrix(BaseClass): ...@@ -134,6 +136,7 @@ class WeightedMatrix(BaseClass):
if other is None: if other is None:
self.items = defaultdict(lambda: defaultdict(float)) self.items = defaultdict(lambda: defaultdict(float))
elif isinstance(other, int): elif isinstance(other, int):
# implicit global session
query = (session query = (session
.query(NodeNgramNgram.ngramx_id, NodeNgramNgram.ngramy_id, NodeNgramNgram.score) .query(NodeNgramNgram.ngramx_id, NodeNgramNgram.ngramy_id, NodeNgramNgram.score)
.filter(NodeNgramNgram.node_id == other) .filter(NodeNgramNgram.node_id == other)
...@@ -159,6 +162,7 @@ class WeightedMatrix(BaseClass): ...@@ -159,6 +162,7 @@ class WeightedMatrix(BaseClass):
def save(self, node_id): def save(self, node_id):
# delete previous data # delete previous data
# implicit global session
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id == node_id).delete() session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id == node_id).delete()
session.commit() session.commit()
# insert new data # insert new data
...@@ -243,6 +247,7 @@ class UnweightedList(BaseClass): ...@@ -243,6 +247,7 @@ class UnweightedList(BaseClass):
if other is None: if other is None:
self.items = set() self.items = set()
elif isinstance(other, int): elif isinstance(other, int):
# implicit global session
query = (session query = (session
.query(NodeNgram.ngram_id) .query(NodeNgram.ngram_id)
.filter(NodeNgram.node_id == other) .filter(NodeNgram.node_id == other)
...@@ -323,6 +328,7 @@ class UnweightedList(BaseClass): ...@@ -323,6 +328,7 @@ class UnweightedList(BaseClass):
def save(self, node_id): def save(self, node_id):
# delete previous data # delete previous data
# implicit global session
session.query(NodeNgram).filter(NodeNgram.node_id == node_id).delete() session.query(NodeNgram).filter(NodeNgram.node_id == node_id).delete()
session.commit() session.commit()
# insert new data # insert new data
...@@ -339,6 +345,7 @@ class WeightedList(BaseClass): ...@@ -339,6 +345,7 @@ class WeightedList(BaseClass):
if other is None: if other is None:
self.items = defaultdict(float) self.items = defaultdict(float)
elif isinstance(other, int): elif isinstance(other, int):
# implicit global session
query = (session query = (session
.query(NodeNgram.ngram_id, NodeNgram.weight) .query(NodeNgram.ngram_id, NodeNgram.weight)
.filter(NodeNgram.node_id == other) .filter(NodeNgram.node_id == other)
...@@ -435,6 +442,7 @@ class WeightedList(BaseClass): ...@@ -435,6 +442,7 @@ class WeightedList(BaseClass):
def save(self, node_id): def save(self, node_id):
# delete previous data # delete previous data
# implicit global session
session.query(NodeNgram).filter(NodeNgram.node_id == node_id).delete() session.query(NodeNgram).filter(NodeNgram.node_id == node_id).delete()
session.commit() session.commit()
# insert new data # insert new data
......
from collections import defaultdict
from gargantext_web.db import get_or_create_node, session,get_session, Node, NodeHyperdata, Hyperdata, Ngram
import pandas as pd
import numpy as np
import scipy.spatial.distance as distance
from sqlalchemy.sql import func
from sqlalchemy import desc, asc, or_, and_, Date, cast, select
from sqlalchemy import literal_column
from sqlalchemy.orm import aliased
from analysis.distance import do_distance
from analysis.cooccurrences import do_cooc
# TFIDF ngrams / period
def periods(corpus, start=None, end=None):
'''
data
periods :: Corpus -> [Periods]
# compute TFIDF matrix
# a = np.asarray([1,2,3])
# b = np.asarray([1,2,4])
# distance.cosine(a,b)
# search for min and split
'''
# implicit global session
Doc = aliased(Node)
Corpus = aliased(Node)
query = (session
.query(NodeHyperdata.value_datetime)
.join(Doc, Doc.id == NodeHyperdata.node_id)
.join(Corpus, Corpus.id == Doc.parent_id)
.join(Hyperdata, Hyperdata.id == NodeHyperdata.hyperdata_id)
.filter(Doc.type_id == cache.NodeType['Document'].id)
.filter(Corpus.id == corpus.id)
.filter(Hyperdata.name == 'publication_date')
)
first = query.order_by(asc(NodeHyperdata.value_datetime)).first()[0]
last = query.order_by(desc(NodeHyperdata.value_datetime)).first()[0]
duration = last - first
if duration.days > 365 * 3 :
print("OK")
miam_id = get_or_create_node(nodetype='MiamList', corpus=corpus, mysession=session).id
result_list = list()
for t in times:
for ngram in miam_list:
result_list.add(temporal_tfidf(ngram, time))
def tfidf_temporal(corpus, start=None, end=None):
pass
def jacquard(period1, period2):
'''
type Start :: Date
type End :: Date
type Period :: (Start, End)
type Periods :: [Period]
'''
period1 = ['start1', 'end1']
period2 = ['start2', 'end2']
periods = [period1, period2]
nodes = [cooc(corpus=corpus_id, start=period[0], end=period[1]) for period in periods]
partitions = [get_cooc(cooc_id=node_id, type='bestpartition') for node_id in nodes]
for x in nodeCom.items():
comNode[x[1]] = comNode.get(x[1], set()).union({x[0]})
def get_partition(corpus, start=None, end=None, distance=distance):
# implicit global session
miam_id = get_or_create_node(corpus=corpus, nodetype='MapList', mysession=session).id
print("get Partition %s - %s" % (start, end))
cooc_id = do_cooc(corpus=corpus
, start=start
, end=end
, miam_id = miam_id
)
G, partition, ids, weight = do_distance(cooc_id
, field1="ngrams"
, field2="ngrams"
, isMonopartite=True
, distance=distance)
return(partition, weight)
def phylo_clusters(corpus, years):
'''
corpus :: Node Corpus
years :: [Year]
'''
# implicit global session
clusters = dict()
nodes_weight = dict()
periods_start_end = [
('2000-01-01', '2010-12-31')
, ('2011-01-01', '2012-12-31')
, ('2013-01-01', '2015-12-31')
]
periods = list()
for period in periods_start_end:
periods.append(' '.join(p for p in period))
print(periods)
periods_index = [ z for z in zip (periods[:-1], periods[1:])]
print(periods_index)
for period in periods_start_end:
#start,end = period
index = ' '.join([str(p) for p in list(period)])
clusters[index], nodes_weight[index] = get_partition( corpus
, start = str(period[0])
, end = str(period[1])
, distance='distributional')
nodes = set()
for period in nodes_weight.keys():
for node in nodes_weight[period].keys():
nodes.add(node)
id_terms = session.query(Ngram.id, Ngram.terms).filter(Ngram.id.in_(nodes)).all()
id_terms_dict = dict()
for id_term in id_terms:
id_terms_dict[id_term[0]] = id_term[1]
year_com_node = defaultdict(lambda: defaultdict(set))
for period in clusters.keys():
for node, com in clusters[period].items():
year_com_node[period][com].add(node)
proximity_dict = defaultdict(
lambda: defaultdict(
lambda: defaultdict(
lambda: defaultdict( float
)
)
)
)
def t1_t2(proximity_dict, t1_t2):
t1,t2 = t1_t2
for com1 in year_com_node[t1].keys():
for com2 in year_com_node[t2].keys():
set_1 = year_com_node[t1][com1]
set_2 = year_com_node[t2][com2]
intersection = set_1.intersection(set_2)
union = set_1.union(set_2)
proximity_dict[t1][t2][com1][com2] = len(intersection) / len(union)
for period in periods_index:
t1_t2(proximity_dict, period)
data = list()
data_dict = dict()
for y1 in proximity_dict.keys():
for y2 in proximity_dict[y1].keys():
for c1 in proximity_dict[y1][y2].keys():
for c2 in proximity_dict[y1][y2][c1].keys():
score = proximity_dict[y1][y2][c1][c2]
if score > 0.05:
#print(y1,y2,c1,c2,score)
list_node1 = list()
for node in year_com_node[y1][c1]:
list_node1.append((node, nodes_weight[y1][node]))
list_node1 = sorted(list_node1, key=lambda x: x[1], reverse=True)
list_node2 = list()
for node in year_com_node[y2][c2]:
list_node2.append((node, nodes_weight[y2][node]))
list_node2 = sorted(list_node2, key=lambda x: x[1], reverse=True)
flow = list()
from_data = [id_terms_dict[x[0]] for x in list_node1[:2]]
from_data.append(str(y1))
flow.append(','.join(from_data))
to_data = [id_terms_dict[x[0]] for x in list_node2[:2]]
to_data.append(str(y2))
flow.append(','.join(to_data))
flow.append(round(score*100))
data.append(flow)
return(data)
...@@ -13,7 +13,7 @@ from rest_framework.exceptions import APIException ...@@ -13,7 +13,7 @@ from rest_framework.exceptions import APIException
from rest_framework.authentication import SessionAuthentication, BasicAuthentication from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from node.models import Node from node.models import Node
from gargantext_web.db import session, cache, Node, NodeNgram, Ngram from gargantext_web.db import session,get_session, cache, Node, NodeNgram, Ngram
from ngram.lists import listIds, listNgramIds from ngram.lists import listIds, listNgramIds
from gargantext_web.db import get_or_create_node from gargantext_web.db import get_or_create_node
...@@ -68,6 +68,7 @@ class NgramEdit(APIView): ...@@ -68,6 +68,7 @@ class NgramEdit(APIView):
""" """
Edit an existing NGram in a given list Edit an existing NGram in a given list
""" """
# implicit global session
list_id = int(list_id) list_id = int(list_id)
list_node = session.query(Node).filter(Node.id==list_id).first() list_node = session.query(Node).filter(Node.id==list_id).first()
# TODO add 1 for MapList social score ? # TODO add 1 for MapList social score ?
...@@ -97,6 +98,7 @@ class NgramEdit(APIView): ...@@ -97,6 +98,7 @@ class NgramEdit(APIView):
""" """
Delete a ngram from a list Delete a ngram from a list
""" """
# implicit global session
print("to del",ngram_ids) print("to del",ngram_ids)
for ngram_id in ngram_ids.split('+'): for ngram_id in ngram_ids.split('+'):
print('ngram_id', ngram_id) print('ngram_id', ngram_id)
...@@ -141,6 +143,7 @@ class NgramCreate(APIView): ...@@ -141,6 +143,7 @@ class NgramCreate(APIView):
example: request.data = {'text': 'phylogeny'} example: request.data = {'text': 'phylogeny'}
""" """
# implicit global session
list_id = int(list_id) list_id = int(list_id)
# format the ngram's text # format the ngram's text
ngram_text = request.data.get('text', None) ngram_text = request.data.get('text', None)
...@@ -175,7 +178,6 @@ class NgramCreate(APIView): ...@@ -175,7 +178,6 @@ class NgramCreate(APIView):
'list_id': list_id, 'list_id': list_id,
}) })
class Document(APIView): class Document(APIView):
""" """
Read-only Document view, similar to /api/nodes/ Read-only Document view, similar to /api/nodes/
...@@ -184,6 +186,7 @@ class Document(APIView): ...@@ -184,6 +186,7 @@ class Document(APIView):
def get(self, request, doc_id): def get(self, request, doc_id):
"""Document by ID""" """Document by ID"""
# implicit global session
node = session.query(Node).filter(Node.id == doc_id).first() node = session.query(Node).filter(Node.id == doc_id).first()
if node is None: if node is None:
raise APIException('This node does not exist', 404) raise APIException('This node does not exist', 404)
...@@ -206,4 +209,3 @@ class Document(APIView): ...@@ -206,4 +209,3 @@ class Document(APIView):
} }
return Response(data) return Response(data)
...@@ -31,6 +31,8 @@ processes = 10 ...@@ -31,6 +31,8 @@ processes = 10
# the socket (use the full path to be safe) # the socket (use the full path to be safe)
socket = /tmp/gargantext.sock socket = /tmp/gargantext.sock
threads = 4
# with appropriate permissions - *may* be needed # with appropriate permissions - *may* be needed
chmod-socket = 666 chmod-socket = 666
......
...@@ -24,10 +24,10 @@ def get_team(): ...@@ -24,10 +24,10 @@ def get_team():
'picture' : 'david.jpg', 'picture' : 'david.jpg',
'role':'principal investigator'}, 'role':'principal investigator'},
{ 'first_name' : 'Elias', 'last_name' : 'Showk', # { 'first_name' : 'Elias', 'last_name' : 'Showk',
'mail' : '', # 'mail' : '',
'website' : 'https://github.com/elishowk', # 'website' : 'https://github.com/elishowk',
'picture' : '', 'role' : 'developer'}, # 'picture' : '', 'role' : 'developer'},
{ 'first_name' : 'Mathieu', 'last_name' : 'Rodic', { 'first_name' : 'Mathieu', 'last_name' : 'Rodic',
'mail' : '', 'mail' : '',
...@@ -41,7 +41,6 @@ def get_team(): ...@@ -41,7 +41,6 @@ def get_team():
'picture' : 'samuel.jpg', 'picture' : 'samuel.jpg',
'role' : 'developer'}, 'role' : 'developer'},
{ 'first_name' : 'Maziyar', 'last_name' : 'Panahi', { 'first_name' : 'Maziyar', 'last_name' : 'Panahi',
'mail' : '', 'mail' : '',
'website' : '', 'website' : '',
...@@ -51,6 +50,7 @@ def get_team(): ...@@ -51,6 +50,7 @@ def get_team():
{ 'first_name' : 'Romain', 'last_name' : 'Loth', { 'first_name' : 'Romain', 'last_name' : 'Loth',
'mail' : '', 'mail' : '',
'website' : '', 'website' : '',
'picture' : 'romain.jpg',
'role' : 'developer'}, 'role' : 'developer'},
{ 'first_name' : 'Alexandre', 'last_name' : 'Delanoë', { 'first_name' : 'Alexandre', 'last_name' : 'Delanoë',
......
...@@ -13,6 +13,7 @@ import collections ...@@ -13,6 +13,7 @@ import collections
from gargantext_web.views import move_to_trash from gargantext_web.views import move_to_trash
from gargantext_web.db import * from gargantext_web.db import *
from gargantext_web.views import session
from gargantext_web.validation import validate, ValidationException from gargantext_web.validation import validate, ValidationException
from node import models from node import models
...@@ -101,6 +102,7 @@ class NodeNgramsQueries(APIView): ...@@ -101,6 +102,7 @@ class NodeNgramsQueries(APIView):
def post(self, request, project_id): def post(self, request, project_id):
# example only # example only
input = request.data or { input = request.data or {
'x': { 'x': {
'with_empty': True, 'with_empty': True,
...@@ -256,3 +258,4 @@ class NodeNgramsQueries(APIView): ...@@ -256,3 +258,4 @@ class NodeNgramsQueries(APIView):
}, 201) }, 201)
elif input['format'] == 'csv': elif input['format'] == 'csv':
return CsvHttpResponse(sorted(result.items()), ('date', 'value'), 201) return CsvHttpResponse(sorted(result.items()), ('date', 'value'), 201)
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
from celery import shared_task from celery import shared_task
from node import models from node import models
from django.db import transaction from django.db import transaction
from admin.utils import DebugTime from admin.utils import DebugTime, PrintException
import cProfile import cProfile
#@app.task(bind=True) #@app.task(bind=True)
...@@ -11,18 +11,12 @@ import cProfile ...@@ -11,18 +11,12 @@ import cProfile
def debug_task(request): def debug_task(request):
print('Request: {0!r}'.format(request)) print('Request: {0!r}'.format(request))
from gargantext_web.db import session, cache, Node from gargantext_web.db import get_session, cache, Node
from ngram.workflow import ngram_workflow from ngram.workflow import ngram_workflow
@shared_task
def apply_sum(x, y):
print(x+y)
print(session.query(Node.name).first())
from parsing.corpustools import parse_resources, extract_ngrams #add_resource, from parsing.corpustools import parse_resources, extract_ngrams #add_resource,
from ngram.lists import ngrams2miam #from ngram.lists import ngrams2miam
from admin.utils import WorkflowTracking from admin.utils import WorkflowTracking
...@@ -34,23 +28,34 @@ def apply_workflow(corpus_id): ...@@ -34,23 +28,34 @@ def apply_workflow(corpus_id):
update_state = WorkflowTracking() update_state = WorkflowTracking()
corpus = session.query(Node).filter(Node.id==corpus_id).first() try :
mysession = get_session()
corpus = mysession.query(Node).filter(Node.id==corpus_id).first()
update_state.processing_(corpus, "Parsing") update_state.processing_(int(corpus_id), "Parsing")
#cProfile.runctx('parse_resources(corpus)', global,locals) #cProfile.runctx('parse_resources(corpus)', global,locals)
parse_resources(corpus) parse_resources(corpus, mysession=mysession)
update_state.processing_(corpus, "Terms extraction") update_state.processing_(int(corpus_id), "Terms extraction")
extract_ngrams(corpus, ['title', 'abstract'], nlp=True) extract_ngrams(corpus, ['title', 'abstract'], nlp=True, mysession=mysession)
# update_state.processing_(corpus, "") # update_state.processing_(corpus, "")
ngram_workflow(corpus) ngram_workflow(corpus, mysession=mysession)
#ngrams2miam(user_id=corpus.user_id, corpus_id=corpus_id) #ngrams2miam(user_id=corpus.user_id, corpus_id=corpus_id)
print("End of the Workflow for corpus %d" % (corpus_id)) print("End of the Workflow for corpus %d" % (corpus_id))
update_state.processing_(corpus, "0") update_state.processing_(int(corpus_id), "0")
#mysession.close()
#get_session.remove()
mysession.remove()
except Exception as error:
print(error)
PrintException()
#mysession.close()
#get_session.remove()
mysession.remove()
@shared_task @shared_task
def empty_trash(corpus_id): def empty_trash(corpus_id):
......
...@@ -141,7 +141,6 @@ def get_ngrams(request , project_id , corpus_id ): ...@@ -141,7 +141,6 @@ def get_ngrams(request , project_id , corpus_id ):
return HttpResponse(html) return HttpResponse(html)
def test_test(request , corpus_id , doc_id): def test_test(request , corpus_id , doc_id):
"""Get All for a doc id""" """Get All for a doc id"""
corpus_id = int(corpus_id) corpus_id = int(corpus_id)
......
...@@ -2,18 +2,20 @@ from django.conf import settings ...@@ -2,18 +2,20 @@ from django.conf import settings
from node import models from node import models
__all__ = ['literalquery', 'session', 'cache', 'Session', 'bulk_insert', 'engine', 'get_cursor', 'User'] __all__ = ['literalquery', 'cache', 'Session', 'bulk_insert', 'engine', 'get_cursor', 'User']
# initialize sqlalchemy # initialize sqlalchemy
from sqlalchemy.orm import Session, mapper from sqlalchemy.orm import Session, mapper, scoped_session, sessionmaker
from sqlalchemy.ext.automap import automap_base from sqlalchemy.ext.automap import automap_base
from sqlalchemy import create_engine, MetaData, Table, Column, ForeignKey from sqlalchemy import create_engine, MetaData, Table, Column, ForeignKey
from sqlalchemy.types import Integer, String, DateTime from sqlalchemy.types import Integer, String, DateTime
from sqlalchemy.dialects.postgresql import JSON from sqlalchemy.dialects.postgresql import JSON
# SQLAlchemy session management # SQLAlchemy session management
def get_engine(): def get_engine():
from sqlalchemy import create_engine from sqlalchemy import create_engine
...@@ -129,12 +131,27 @@ def literalquery(statement, dialect=None): ...@@ -129,12 +131,27 @@ def literalquery(statement, dialect=None):
return LiteralCompiler(dialect, statement) return LiteralCompiler(dialect, statement)
#
def get_sessionmaker(): def get_sessionmaker():
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
return sessionmaker(bind=engine) return sessionmaker(bind=engine)
Session = get_sessionmaker() def get_session():
session = Session() session_factory = get_sessionmaker()
return scoped_session(session_factory)
# get_session à importer, plus pratique pour les remove
#session_factory = get_sessionmaker()
#get_session = scoped_session(session_factory)
# the global session ------------
# pour les modules qui importent
# directement session
session = get_session()
#session = get_session()()
# -------------------------------
# SQLAlchemy model objects caching # SQLAlchemy model objects caching
...@@ -158,18 +175,22 @@ class ModelCache(dict): ...@@ -158,18 +175,22 @@ class ModelCache(dict):
for column in self._columns for column in self._columns
if column.type.python_type == str or key.__class__ == column.type.python_type if column.type.python_type == str or key.__class__ == column.type.python_type
] ]
#session = get_session()
element = session.query(self._model).filter(or_(*conditions)).first() element = session.query(self._model).filter(or_(*conditions)).first()
if element is None: if element is None:
raise KeyError raise KeyError
self[key] = element self[key] = element
return element return element
#session.remove()
def preload(self): def preload(self):
self.clear() self.clear()
#session = get_session()
for element in session.query(self._model).all(): for element in session.query(self._model).all():
for column_name in self._columns_names: for column_name in self._columns_names:
key = getattr(element, column_name) key = getattr(element, column_name)
self[key] = element self[key] = element
#session.remove()
class Cache(): class Cache():
...@@ -231,12 +252,17 @@ class bulk_insert: ...@@ -231,12 +252,17 @@ class bulk_insert:
readline = read readline = read
def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hyperdata=None): def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hyperdata=None, mysession=None):
''' '''
Should be a method of the object. __get_or_create__ ? Should be a method of the object. __get_or_create__ ?
name_str :: String name_str :: String
hyperdata :: Dict hyperdata :: Dict
''' '''
if mysession is None:
from gargantext_web.db import session
mysession = session
if nodetype is None: if nodetype is None:
print("Need to give a type node") print("Need to give a type node")
else: else:
...@@ -245,13 +271,13 @@ def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hy ...@@ -245,13 +271,13 @@ def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hy
except KeyError: except KeyError:
ntype = cache.NodeType[nodetype] = NodeType() ntype = cache.NodeType[nodetype] = NodeType()
ntype.name = nodetype ntype.name = nodetype
session.add(ntype) mysession.add(ntype)
session.commit() mysession.commit()
if corpus_id is not None and corpus is None: if corpus_id is not None and corpus is None:
corpus = session.query(Node).filter(Node.id==corpus_id).first() corpus = mysession.query(Node).filter(Node.id==corpus_id).first()
node = (session.query(Node).filter(Node.type_id == ntype.id node = (mysession.query(Node).filter(Node.type_id == ntype.id
, Node.parent_id == corpus.id , Node.parent_id == corpus.id
, Node.user_id == corpus.user_id , Node.user_id == corpus.user_id
) )
...@@ -272,7 +298,9 @@ def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hy ...@@ -272,7 +298,9 @@ def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hy
node.name=name_str node.name=name_str
else: else:
node.name=ntype.name node.name=ntype.name
session.add(node) mysession.add(node)
session.commit() mysession.commit()
#print(parent_id, n.parent_id, n.id, n.name) #print(parent_id, n.parent_id, n.id, n.name)
return(node) return(node)
...@@ -7,7 +7,6 @@ from django.contrib.staticfiles.storage import staticfiles_storage ...@@ -7,7 +7,6 @@ from django.contrib.staticfiles.storage import staticfiles_storage
from django.views.generic.base import RedirectView from django.views.generic.base import RedirectView
from gargantext_web import views, views_optimized from gargantext_web import views, views_optimized
import gargantext_web.corpus_views as corpus_views
from annotations import urls as annotations_urls from annotations import urls as annotations_urls
from annotations.views import main as annotations_main_view from annotations.views import main as annotations_main_view
......
...@@ -39,14 +39,14 @@ from django.contrib.auth import authenticate, login, logout ...@@ -39,14 +39,14 @@ from django.contrib.auth import authenticate, login, logout
from scrappers.scrap_pubmed.admin import Logger from scrappers.scrap_pubmed.admin import Logger
from gargantext_web.db import *
from sqlalchemy import or_, func from sqlalchemy import or_, func
from gargantext_web import about from gargantext_web import about
from gargantext_web.celery import empty_trash from gargantext_web.celery import empty_trash
from gargantext_web.db import cache, NodeNgram, NodeNgramNgram from gargantext_web.db import *
from gargantext_web.db import session, cache, NodeNgram, NodeNgramNgram
def login_user(request): def login_user(request):
logout(request) logout(request)
...@@ -69,7 +69,6 @@ def logout_user(request): ...@@ -69,7 +69,6 @@ def logout_user(request):
return HttpResponseRedirect('/') return HttpResponseRedirect('/')
# Redirect to a success page. # Redirect to a success page.
def logo(request): def logo(request):
template = get_template('logo.svg') template = get_template('logo.svg')
group = "mines" group = "mines"
...@@ -114,7 +113,6 @@ def css(request): ...@@ -114,7 +113,6 @@ def css(request):
})) }))
return HttpResponse(css_data, mimetype="text/css") return HttpResponse(css_data, mimetype="text/css")
def query_to_dicts(query_string, *query_args): def query_to_dicts(query_string, *query_args):
"""Run a simple query and produce a generator """Run a simple query and produce a generator
that returns the results as a bunch of dictionaries that returns the results as a bunch of dictionaries
...@@ -231,11 +229,11 @@ def projects(request): ...@@ -231,11 +229,11 @@ def projects(request):
date = datetime.datetime.now() date = datetime.datetime.now()
# print(Logger.write("STATIC_ROOT")) # print(Logger.write("STATIC_ROOT"))
# implicit global session
projects = session.query(Node).filter(Node.user_id == user_id, Node.type_id == project_type_id).order_by(Node.date).all() projects = session.query(Node).filter(Node.user_id == user_id, Node.type_id == project_type_id).order_by(Node.date).all()
number = len(projects) number = len(projects)
# common_users = session.query(User_User.user_parent).filter( User_User.user_id==user_id ).all() # common_users = session.query(User_User.user_parent).filter( User_User.user_id==user_id ).all()
# [ Getting shared projects ] # # [ Getting shared projects ] #
common_users = [] common_users = []
...@@ -297,6 +295,7 @@ def update_nodes(request, project_id, corpus_id, view=None): ...@@ -297,6 +295,7 @@ def update_nodes(request, project_id, corpus_id, view=None):
- permanent deletion of Trash - permanent deletion of Trash
''' '''
if not request.user.is_authenticated(): if not request.user.is_authenticated():
return redirect('/login/?next=%s' % request.path) return redirect('/login/?next=%s' % request.path)
...@@ -359,6 +358,7 @@ def update_nodes(request, project_id, corpus_id, view=None): ...@@ -359,6 +358,7 @@ def update_nodes(request, project_id, corpus_id, view=None):
# #
def corpus(request, project_id, corpus_id): def corpus(request, project_id, corpus_id):
if not request.user.is_authenticated(): if not request.user.is_authenticated():
return redirect('/login/?next=%s' % request.path) return redirect('/login/?next=%s' % request.path)
...@@ -376,6 +376,8 @@ def corpus(request, project_id, corpus_id): ...@@ -376,6 +376,8 @@ def corpus(request, project_id, corpus_id):
corpus = cache.Node[int(corpus_id)] corpus = cache.Node[int(corpus_id)]
type_doc_id = cache.NodeType['Document'].id type_doc_id = cache.NodeType['Document'].id
# implicit global session
number = session.query(func.count(Node.id)).filter(Node.parent_id==corpus_id, Node.type_id==type_doc_id).all()[0][0] number = session.query(func.count(Node.id)).filter(Node.parent_id==corpus_id, Node.type_id==type_doc_id).all()[0][0]
...@@ -405,7 +407,6 @@ def corpus(request, project_id, corpus_id): ...@@ -405,7 +407,6 @@ def corpus(request, project_id, corpus_id):
def newpaginatorJSON(request , corpus_id): def newpaginatorJSON(request , corpus_id):
results = ["hola" , "mundo"]
# t = get_template('tests/newpag/thetable.html') # t = get_template('tests/newpag/thetable.html')
...@@ -461,7 +462,6 @@ def newpaginatorJSON(request , corpus_id): ...@@ -461,7 +462,6 @@ def newpaginatorJSON(request , corpus_id):
} }
return JsonHttpResponse(finaldict) return JsonHttpResponse(finaldict)
def move_to_trash(node_id): def move_to_trash(node_id):
try: try:
node = session.query(Node).filter(Node.id == node_id).first() node = session.query(Node).filter(Node.id == node_id).first()
...@@ -482,8 +482,11 @@ def move_to_trash(node_id): ...@@ -482,8 +482,11 @@ def move_to_trash(node_id):
except Exception as error: except Exception as error:
print("can not move to trash Node" + str(node_id) + ":" + str(error)) print("can not move to trash Node" + str(node_id) + ":" + str(error))
def move_to_trash_multiple(request): def move_to_trash_multiple(request):
user = request.user user = request.user
if not user.is_authenticated(): if not user.is_authenticated():
return redirect('/login/?next=%s' % request.path) return redirect('/login/?next=%s' % request.path)
...@@ -506,8 +509,10 @@ def move_to_trash_multiple(request): ...@@ -506,8 +509,10 @@ def move_to_trash_multiple(request):
def delete_node(request, node_id): def delete_node(request, node_id):
# do we have a valid user? # do we have a valid user?
user = request.user user = request.user
node = session.query(Node).filter(Node.id == node_id).first() node = session.query(Node).filter(Node.id == node_id).first()
if not user.is_authenticated(): if not user.is_authenticated():
...@@ -545,6 +550,7 @@ def delete_corpus(request, project_id, node_id): ...@@ -545,6 +550,7 @@ def delete_corpus(request, project_id, node_id):
def chart(request, project_id, corpus_id): def chart(request, project_id, corpus_id):
''' Charts to compare, filter, count''' ''' Charts to compare, filter, count'''
t = get_template('chart.html') t = get_template('chart.html')
user = request.user user = request.user
date = datetime.datetime.now() date = datetime.datetime.now()
...@@ -562,6 +568,7 @@ def chart(request, project_id, corpus_id): ...@@ -562,6 +568,7 @@ def chart(request, project_id, corpus_id):
return HttpResponse(html) return HttpResponse(html)
def sankey(request, corpus_id): def sankey(request, corpus_id):
t = get_template('sankey.html') t = get_template('sankey.html')
user = request.user user = request.user
date = datetime.datetime.now() date = datetime.datetime.now()
...@@ -578,7 +585,6 @@ def sankey(request, corpus_id): ...@@ -578,7 +585,6 @@ def sankey(request, corpus_id):
return HttpResponse(html) return HttpResponse(html)
def matrix(request, project_id, corpus_id): def matrix(request, project_id, corpus_id):
t = get_template('matrix.html') t = get_template('matrix.html')
user = request.user user = request.user
...@@ -598,6 +604,7 @@ def matrix(request, project_id, corpus_id): ...@@ -598,6 +604,7 @@ def matrix(request, project_id, corpus_id):
return HttpResponse(html) return HttpResponse(html)
def graph(request, project_id, corpus_id, generic=100, specific=100): def graph(request, project_id, corpus_id, generic=100, specific=100):
t = get_template('explorer.html') t = get_template('explorer.html')
user = request.user user = request.user
date = datetime.datetime.now() date = datetime.datetime.now()
...@@ -660,6 +667,7 @@ def corpus_csv(request, project_id, corpus_id): ...@@ -660,6 +667,7 @@ def corpus_csv(request, project_id, corpus_id):
''' '''
Create the HttpResponse object with the appropriate CSV header. Create the HttpResponse object with the appropriate CSV header.
''' '''
response = HttpResponse(content_type='text/csv') response = HttpResponse(content_type='text/csv')
response['Content-Disposition'] = 'attachment; filename="corpus.csv"' response['Content-Disposition'] = 'attachment; filename="corpus.csv"'
...@@ -735,30 +743,48 @@ def node_link(request, corpus_id): ...@@ -735,30 +743,48 @@ def node_link(request, corpus_id):
''' '''
Create the HttpResponse object with the node_link dataset. Create the HttpResponse object with the node_link dataset.
''' '''
data = [] data = []
corpus = session.query(Node).filter(Node.id==corpus_id).first() corpus = session.query(Node).filter(Node.id==corpus_id).first()
data = get_cooc(request=request, corpus=corpus, type="node_link") data = get_cooc(request=request, corpus=corpus, type="node_link")
return JsonHttpResponse(data) return JsonHttpResponse(data)
from analysis.periods import phylo_clusters
def sankey_csv(request, corpus_id): def sankey_csv(request, corpus_id):
data = [] data = []
corpus = session.query(Node).filter(Node.id==corpus_id).first() corpus = session.query(Node).filter(Node.id==corpus_id).first()
#
# header = ["source", "target", "value"]
# data.append(header)
#
# flows = phylo_clusters(corpus, range(2005,2013))
# for flow in flows:
# data.append(flow)
# print(data)
#
data = [ data = [
["source", "target", "value"] ['source', 'target', 'value']
, ["Comment_1", "Theme_1", 1] , ['inégalités,rapports sociaux,P1', 'critique,travail social,P2', 8]
, ['inégalités,rapports sociaux,P1', 'inégalités,éducation,P2', 21]
, ["Comment_2", "Theme_2", 2] , ['éducation,institutions,P1', 'critique,travail social,P2', 7]
, ["Comment_3", "Theme_2", 2] , ['éducation,institutions,P1', 'inégalités,éducation,P2', 10]
, ["Comment_7", "Theme_1", 2] #, ['éducation,institutions,P1', 'personnes âgées,pouvoirs publics,P2', 8]
, ["Comment_8", "Theme_3", 2] , ['éducation,institutions,P1', 'politiques publiques,personnes âgées dépendantes,P2', 8]
#, ['éducation,institutions,P1', 'intervention sociale,travailleur social,P2', 8]
, ["Theme_1", "Reco_par_1", 2] #, ['intervention sociale,travailleur social,2011-01-01 2013-12-31', 'intervention sociale,travailleur social,P3', 0]
, ["Theme_2", "Reco_par_2", 2] , ['critique,enseignement supérieur,P1', 'critique,travail social,P2', 6]
, ["Theme_2", "Reco_par_5", 2] #, ['critique,enseignement supérieur,P1', 'personnes âgées,pouvoirs publics,P2', 7]
, ["Theme_3", "Reco_par_5", 1] , ['justice,exclusion,violence,P1', 'inégalités,éducation,P2', 12]
, ['critique,travail social,P2', 'justice,travail social,P3', 14]
, ['inégalités,éducation,P2', 'justice,travail social,P3', 20]
, ['inégalités,éducation,P2', 'justice sociale,éducation,P3', 8]
, ['inégalités,éducation,P2', 'action publique,institutions,P3', 9]
, ['inégalités,éducation,P2', 'inégalités,inégalités sociales,P3', 18]
, ['politiques publiques,personnes âgées dépendantes,P2', 'justice sociale,éducation,P3', 20]
] ]
return(CsvHttpResponse(data)) return(CsvHttpResponse(data))
def adjacency(request, corpus_id): def adjacency(request, corpus_id):
...@@ -818,7 +844,6 @@ def ngrams(request): ...@@ -818,7 +844,6 @@ def ngrams(request):
})) }))
return HttpResponse(html) return HttpResponse(html)
def nodeinfo(request , node_id): def nodeinfo(request , node_id):
'''Structure of the popUp for topPapers div ''' '''Structure of the popUp for topPapers div '''
t = get_template('node-info.html') t = get_template('node-info.html')
......
...@@ -16,6 +16,8 @@ from threading import Thread ...@@ -16,6 +16,8 @@ from threading import Thread
from node.admin import CustomForm from node.admin import CustomForm
from gargantext_web.db import * from gargantext_web.db import *
from gargantext_web.db import get_or_create_node from gargantext_web.db import get_or_create_node
from gargantext_web.views import session
from gargantext_web.settings import DEBUG, MEDIA_ROOT from gargantext_web.settings import DEBUG, MEDIA_ROOT
from rest_v1_0.api import JsonHttpResponse from rest_v1_0.api import JsonHttpResponse
from django.db import connection from django.db import connection
...@@ -31,7 +33,6 @@ from gargantext_web.celery import apply_workflow ...@@ -31,7 +33,6 @@ from gargantext_web.celery import apply_workflow
from admin.utils import ensure_dir from admin.utils import ensure_dir
def project(request, project_id): def project(request, project_id):
# do we have a valid project id? # do we have a valid project id?
try: try:
project_id = int(project_id) project_id = int(project_id)
...@@ -39,6 +40,7 @@ def project(request, project_id): ...@@ -39,6 +40,7 @@ def project(request, project_id):
raise Http404() raise Http404()
# do we have a valid project? # do we have a valid project?
project = (session project = (session
.query(Node) .query(Node)
.filter(Node.id == project_id) .filter(Node.id == project_id)
...@@ -207,6 +209,7 @@ def tfidf(request, corpus_id, ngram_ids): ...@@ -207,6 +209,7 @@ def tfidf(request, corpus_id, ngram_ids):
ngram_ids = [int(i) for i in ngram_ids] ngram_ids = [int(i) for i in ngram_ids]
corpus = session.query(Node).filter(Node.id==corpus_id).first() corpus = session.query(Node).filter(Node.id==corpus_id).first()
tfidf_id = get_or_create_node(corpus=corpus, nodetype='Tfidf').id tfidf_id = get_or_create_node(corpus=corpus, nodetype='Tfidf').id
print(tfidf_id) print(tfidf_id)
# request data # request data
...@@ -251,48 +254,107 @@ def tfidf(request, corpus_id, ngram_ids): ...@@ -251,48 +254,107 @@ def tfidf(request, corpus_id, ngram_ids):
return JsonHttpResponse(nodes_list) return JsonHttpResponse(nodes_list)
def getCorpusIntersection(request , corpuses_ids): def getCorpusIntersection(request , corpuses_ids):
FinalDict = False FinalDict = False
if request.method == 'POST' and "nodeids" in request.POST and len(request.POST["nodeids"])>0: if request.method == 'POST' and "nodeids" in request.POST and len(request.POST["nodeids"])>0 :
import ast import ast
import networkx as nx
node_ids = [int(i) for i in (ast.literal_eval( request.POST["nodeids"] )) ] node_ids = [int(i) for i in (ast.literal_eval( request.POST["nodeids"] )) ]
# Here are the visible nodes of the initial semantic map. # Here are the visible nodes of the initial semantic map.
corpuses_ids = corpuses_ids.split('a') corpuses_ids = corpuses_ids.split('a')
corpuses_ids = [int(i) for i in corpuses_ids] # corpus[1] will be the corpus to compare
corpuses_ids = [int(i) for i in corpuses_ids]
print(corpuses_ids)
# corpus[1] will be the corpus to compare
def get_score(corpus_id):
cooc_type_id = cache.NodeType['Cooccurrence'].id cooc_type_id = cache.NodeType['Cooccurrence'].id
cooc_ids = (session.query(Node.id)
.filter(Node.user_id == request.user.id
, Node.parent_id==corpus_id
, Node.type_id == cooc_type_id )
.first()
)
cooc_ids = session.query(Node.id).filter(Node.user_id == request.user.id , Node.parent_id==corpuses_ids[1] , Node.type_id == cooc_type_id ).first()
if len(cooc_ids)==0: if len(cooc_ids)==0:
return JsonHttpResponse(FinalDict) return JsonHttpResponse(FinalDict)
# If corpus[1] has a coocurrence.id then lets continue # If corpus[1] has a coocurrence.id then lets continue
Coocs = {} Coocs = {}
import networkx as nx
G = nx.Graph() # I use an undirected graph, because direction doesnt matter here, coocs should be a triangular matrix, so...
ngrams_data1 = session.query(NodeNgramNgram).filter( NodeNgramNgram.node_id==cooc_ids[0], NodeNgramNgram.ngramx_id.in_( node_ids )).all()
for ngram in ngrams_data1: # are there visible nodes in the X-axis of corpus to compare ?
G.add_edge( ngram.ngramx_id , ngram.ngramy_id , weight=ngram.score)
ngrams_data2 = session.query(NodeNgramNgram).filter( NodeNgramNgram.node_id==cooc_ids[0], NodeNgramNgram.ngramy_id.in_( node_ids )).all() G = nx.Graph()
for ngram in ngrams_data2: # are there visible nodes in the Y-axis of corpus to compare ? # undirected graph only
if not G.has_edge(ngram.ngramx_id,ngram.ngramy_id): # because direction doesnt matter here
# coocs is triangular matrix
ngrams_data = ( session.query(NodeNgramNgram)
.filter( NodeNgramNgram.node_id==cooc_ids[0]
, or_(
NodeNgramNgram.ngramx_id.in_( node_ids )
, NodeNgramNgram.ngramy_id.in_( node_ids )
)
)
.group_by(NodeNgramNgram)
.all()
)
for ngram in ngrams_data :
# are there visible nodes in the X-axis of corpus to compare ?
G.add_edge( ngram.ngramx_id , ngram.ngramy_id , weight=ngram.score) G.add_edge( ngram.ngramx_id , ngram.ngramy_id , weight=ngram.score)
print(corpus_id, ngram)
for e in G.edges_iter(): for e in G.edges_iter() :
n1 = e[0] n1 = e[0]
n2 = e[1] n2 = e[1]
# print( G[n1][n2]["weight"] , "\t", n1,",",n2 ) # print( G[n1][n2]["weight"] , "\t", n1,",",n2 )
if n1 not in Coocs: if n1 not in Coocs :
Coocs[n1]=0 Coocs[n1] = 0
if n2 not in Coocs: if n2 not in Coocs :
Coocs[n2]=0 Coocs[n2] = 0
Coocs[n1]+=G[n1][n2]["weight"] Coocs[n1] += G[n1][n2]["weight"]
Coocs[n2]+=G[n1][n2]["weight"] Coocs[n2] += G[n1][n2]["weight"]
return(Coocs,G)
Coocs_0,G_0 = get_score( corpuses_ids[0] )
Coocs_1,G_1 = get_score( corpuses_ids[1] )
FinalDict = {} FinalDict = {}
for node in node_ids: measure = 'cooc'
if node in Coocs:
FinalDict[node] = Coocs[node]/G.degree(node) if measure == 'jacquard':
for node in node_ids :
if node in G_1.nodes() and node in G_0.nodes():
neighbors_0 = set(G_0.neighbors(node))
neighbors_1 = set(G_1.neighbors(node))
jacquard = len(neighbors_0.intersection(neighbors_1)) / len(neighbors_0.union(neighbors_1))
FinalDict[node] = jacquard * 3
elif node in G_0.nodes() and node not in G_1.nodes() :
FinalDict[node] = 2
elif node not in G_0.nodes() and node in G_1.nodes() :
FinalDict[node] = 1
else:
FinalDict[node] = 0
elif measure == 'cooc':
for node in node_ids :
if node in G_1.nodes() and node in G_0.nodes():
score_0 = Coocs_0[node] / G_0.degree(node)
score_1 = Coocs_1[node] / G_1.degree(node)
FinalDict[node] = 5 * score_0 / score_1
elif node in G_0.nodes() and node not in G_1.nodes() :
FinalDict[node] = 0.5
elif node not in G_0.nodes() and node in G_1.nodes() :
FinalDict[node] = 0.2
else:
FinalDict[node] = 0
print(FinalDict)
#print(node,score)
# Getting AVG-COOC of each ngram that exists in the cooc-matrix of the compared-corpus. # Getting AVG-COOC of each ngram that exists in the cooc-matrix of the compared-corpus.
return JsonHttpResponse(FinalDict) return JsonHttpResponse(FinalDict)
......
...@@ -6,7 +6,8 @@ from admin.utils import PrintException,DebugTime ...@@ -6,7 +6,8 @@ from admin.utils import PrintException,DebugTime
from gargantext_web.db import NodeNgram,NodeNodeNgram from gargantext_web.db import NodeNgram,NodeNodeNgram
from gargantext_web.db import * from gargantext_web.db import *
from gargantext_web.db import get_or_create_node from gargantext_web.db import get_or_create_node, session,get_session
from parsing.corpustools import * from parsing.corpustools import *
...@@ -40,6 +41,8 @@ def getNgrams(corpus=None, limit=1000): ...@@ -40,6 +41,8 @@ def getNgrams(corpus=None, limit=1000):
''' '''
getNgrams :: Corpus -> [(Int, String, String, Float)] getNgrams :: Corpus -> [(Int, String, String, Float)]
''' '''
# implicit global session
terms = dict() terms = dict()
tfidf_node = get_or_create_node(nodetype='Tfidf (global)' tfidf_node = get_or_create_node(nodetype='Tfidf (global)'
, corpus=corpus) , corpus=corpus)
...@@ -63,7 +66,7 @@ def getNgrams(corpus=None, limit=1000): ...@@ -63,7 +66,7 @@ def getNgrams(corpus=None, limit=1000):
PrintException() PrintException()
return(terms) return(terms)
def compute_cvalue(corpus=None, limit=1000): def compute_cvalue(corpus=None, limit=1000, mysession=None):
''' '''
computeCvalue :: Corpus computeCvalue :: Corpus
frequency :: String -> Int -> Int frequency :: String -> Int -> Int
...@@ -122,12 +125,11 @@ def compute_cvalue(corpus=None, limit=1000): ...@@ -122,12 +125,11 @@ def compute_cvalue(corpus=None, limit=1000):
result = cvalueAll() result = cvalueAll()
#print([n for n in result]) #print([n for n in result])
session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==cvalue_node.id).delete() mysession.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==cvalue_node.id).delete()
session.commit() mysession.commit()
#bulk_insert(NodeNodeNgram, ['nodex_id', 'nodey_id', 'ngram_id', 'score'], [n for n in islice(result,0,100)]) #bulk_insert(NodeNodeNgram, ['nodex_id', 'nodey_id', 'ngram_id', 'score'], [n for n in islice(result,0,100)])
bulk_insert(NodeNodeNgram, ['nodex_id', 'nodey_id', 'ngram_id', 'score'], [n for n in result]) bulk_insert(NodeNodeNgram, ['nodex_id', 'nodey_id', 'ngram_id', 'score'], [n for n in result])
# test # test
#corpus=session.query(Node).filter(Node.id==244250).first() #corpus=session.query(Node).filter(Node.id==244250).first()
#computeCvalue(corpus) #computeCvalue(corpus)
...@@ -5,7 +5,7 @@ from admin.utils import PrintException,DebugTime ...@@ -5,7 +5,7 @@ from admin.utils import PrintException,DebugTime
from gargantext_web.db import NodeNgram,NodeNodeNgram from gargantext_web.db import NodeNgram,NodeNodeNgram
from gargantext_web.db import * from gargantext_web.db import *
from gargantext_web.db import get_or_create_node from gargantext_web.db import get_or_create_node, get_session
from analysis.lists import Translations, UnweightedList from analysis.lists import Translations, UnweightedList
from parsing.corpustools import * from parsing.corpustools import *
...@@ -23,7 +23,6 @@ from math import log ...@@ -23,7 +23,6 @@ from math import log
from functools import reduce from functools import reduce
def getStemmer(corpus): def getStemmer(corpus):
''' '''
getStemmer :: Corpus -> Stemmer getStemmer :: Corpus -> Stemmer
...@@ -48,10 +47,11 @@ def getStemmer(corpus): ...@@ -48,10 +47,11 @@ def getStemmer(corpus):
return(stemIt) return(stemIt)
def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem'): def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem', mysession=None):
''' '''
group ngrams according to a function (stemming or lemming) group ngrams according to a function (stemming or lemming)
''' '''
dbg = DebugTime('Corpus #%d - group' % corpus.id) dbg = DebugTime('Corpus #%d - group' % corpus.id)
dbg.show('Group') dbg.show('Group')
...@@ -62,17 +62,19 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem'): ...@@ -62,17 +62,19 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem'):
stemIt = getStemmer(corpus) stemIt = getStemmer(corpus)
group_to_insert = set() group_to_insert = set()
node_group = get_or_create_node(nodetype='Group', corpus=corpus) node_group = get_or_create_node(nodetype='Group', corpus=corpus, mysession=mysession)
miam_to_insert = set() miam_to_insert = set()
miam_node = get_or_create_node(nodetype='MiamList', corpus=corpus) miam_node = get_or_create_node(nodetype='MiamList', corpus=corpus, mysession=mysession)
stop_node = get_or_create_node(nodetype='StopList', corpus=corpus) stop_node = get_or_create_node(nodetype='StopList', corpus=corpus, mysession=mysession)
#stop_list = UnweightedList(stop_node.id) #stop_list = UnweightedList(stop_node.id)
Stop = aliased(NodeNgram) Stop = aliased(NodeNgram)
frequency = sa.func.count(NodeNgram.weight) frequency = sa.func.count(NodeNgram.weight)
ngrams = (session.query(Ngram.id, Ngram.terms, frequency )
ngrams = (mysession.query(Ngram.id, Ngram.terms, frequency )
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id) .join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.join(Node, Node.id == NodeNgram.node_id) .join(Node, Node.id == NodeNgram.node_id)
#.outerjoin(Stop, Stop.ngram_id == Ngram.id) #.outerjoin(Stop, Stop.ngram_id == Ngram.id)
...@@ -84,7 +86,7 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem'): ...@@ -84,7 +86,7 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem'):
.limit(limit_sup) .limit(limit_sup)
) )
stops = (session.query(Ngram.id, Ngram.terms, frequency) stops = (mysession.query(Ngram.id, Ngram.terms, frequency)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id) .join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.join(Node, Node.id == NodeNgram.node_id) .join(Node, Node.id == NodeNgram.node_id)
.join(Stop, Stop.ngram_id == Ngram.id) .join(Stop, Stop.ngram_id == Ngram.id)
...@@ -125,13 +127,14 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem'): ...@@ -125,13 +127,14 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem'):
miam_to_insert.add((miam_node.id, group[key]['mainForm'], 1)) miam_to_insert.add((miam_node.id, group[key]['mainForm'], 1))
# # Deleting previous groups # # Deleting previous groups
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id == node_group.id).delete() mysession.query(NodeNgramNgram).filter(NodeNgramNgram.node_id == node_group.id).delete()
# # Deleting previous ngrams miam list # # Deleting previous ngrams miam list
session.query(NodeNgram).filter(NodeNgram.node_id == miam_node.id).delete() mysession.query(NodeNgram).filter(NodeNgram.node_id == miam_node.id).delete()
session.commit() mysession.commit()
bulk_insert(NodeNgramNgram bulk_insert(NodeNgramNgram
, ('node_id', 'ngramx_id', 'ngramy_id', 'score') , ('node_id', 'ngramx_id', 'ngramy_id', 'score')
, [data for data in group_to_insert]) , [data for data in group_to_insert])
bulk_insert(NodeNgram, ('node_id', 'ngram_id', 'weight'), [data for data in list(miam_to_insert)]) bulk_insert(NodeNgram, ('node_id', 'ngram_id', 'weight'), [data for data in list(miam_to_insert)])
...@@ -12,7 +12,7 @@ TODO : REFACTOR 2) improvements in ngram creation (?bulk like node_ngram links) ...@@ -12,7 +12,7 @@ TODO : REFACTOR 2) improvements in ngram creation (?bulk like node_ngram links)
""" """
from gargantext_web.db import Ngram, NodeNgram, NodeNodeNgram, NodeNgramNgram from gargantext_web.db import Ngram, NodeNgram, NodeNodeNgram, NodeNgramNgram
from gargantext_web.db import cache, session, get_or_create_node, bulk_insert from gargantext_web.db import cache, session,get_session, get_or_create_node, bulk_insert
# import sqlalchemy as sa # import sqlalchemy as sa
from sqlalchemy.sql import func, exists from sqlalchemy.sql import func, exists
...@@ -105,6 +105,7 @@ def exportNgramLists(node,filename,delimiter="\t"): ...@@ -105,6 +105,7 @@ def exportNgramLists(node,filename,delimiter="\t"):
2 <=> mapList 2 <=> mapList
""" """
# récupérer d'un coup les objets Ngram (avec terme) # récupérer d'un coup les objets Ngram (avec terme)
# implicit global session
if len(ngram_ids): if len(ngram_ids):
ng_objs = session.query(Ngram).filter(Ngram.id.in_(ngram_ids)).all() ng_objs = session.query(Ngram).filter(Ngram.id.in_(ngram_ids)).all()
else: else:
...@@ -131,7 +132,6 @@ def exportNgramLists(node,filename,delimiter="\t"): ...@@ -131,7 +132,6 @@ def exportNgramLists(node,filename,delimiter="\t"):
# csv_rows = [[ligne1_a, ligne1_b..],[ligne2_a, ligne2_b..],..] # csv_rows = [[ligne1_a, ligne1_b..],[ligne2_a, ligne2_b..],..]
return csv_rows return csv_rows
# on applique notre fonction ng_to_csv sur chaque liste # on applique notre fonction ng_to_csv sur chaque liste
# ------------------------------------------------------ # ------------------------------------------------------
stop_csv_rows = ngrams_to_csv_rows(stop_ngram_ids, stop_csv_rows = ngrams_to_csv_rows(stop_ngram_ids,
...@@ -187,7 +187,7 @@ def importNgramLists(node,filename,delimiter="\t", del_lists=[]): ...@@ -187,7 +187,7 @@ def importNgramLists(node,filename,delimiter="\t", del_lists=[]):
  (and ideally add its logic to analysis.lists.Translations)   (and ideally add its logic to analysis.lists.Translations)
''' '''
# implicit global session
# the node arg has to be a corpus here # the node arg has to be a corpus here
if not hasattr(node, "type_id") or node.type_id != 4: if not hasattr(node, "type_id") or node.type_id != 4:
raise TypeError("IMPORT: node argument must be a Corpus Node") raise TypeError("IMPORT: node argument must be a Corpus Node")
...@@ -380,10 +380,6 @@ def importNgramLists(node,filename,delimiter="\t", del_lists=[]): ...@@ -380,10 +380,6 @@ def importNgramLists(node,filename,delimiter="\t", del_lists=[]):
print("INFO: added %i new ngrams in the lexicon" % added_ng) print("INFO: added %i new ngrams in the lexicon" % added_ng)
# à chronométrer: # à chronométrer:
# [w.node_ngram for w in listnode.node_node_ngram_collection] # [w.node_ngram for w in listnode.node_node_ngram_collection]
......
from admin.utils import PrintException from admin.utils import PrintException
from gargantext_web.db import NodeNgram from gargantext_web.db import NodeNgram, session,get_session
from gargantext_web.db import * from gargantext_web.db import *
from parsing.corpustools import * from parsing.corpustools import *
...@@ -20,6 +20,9 @@ def listIds(typeList=None, user_id=None, corpus_id=None): ...@@ -20,6 +20,9 @@ def listIds(typeList=None, user_id=None, corpus_id=None):
typeList :: String, Type of the Node that should be created typeList :: String, Type of the Node that should be created
[Node] :: List of Int, returned or created by the function [Node] :: List of Int, returned or created by the function
''' '''
# implicit global session
if typeList is None: if typeList is None:
typeList = 'MiamList' typeList = 'MiamList'
...@@ -57,6 +60,7 @@ def listIds(typeList=None, user_id=None, corpus_id=None): ...@@ -57,6 +60,7 @@ def listIds(typeList=None, user_id=None, corpus_id=None):
else: else:
raise Exception("Usage (Warning): Need corpus_id and user_id") raise Exception("Usage (Warning): Need corpus_id and user_id")
# Some functions to manage ngrams according to the lists # Some functions to manage ngrams according to the lists
def listNgramIds(list_id=None, typeList=None, def listNgramIds(list_id=None, typeList=None,
...@@ -74,6 +78,9 @@ def listNgramIds(list_id=None, typeList=None, ...@@ -74,6 +78,9 @@ def listNgramIds(list_id=None, typeList=None,
doc_id : to get specific ngrams related to a document with Node.id=doc_id doc_id : to get specific ngrams related to a document with Node.id=doc_id
user_id : needed to create list if it does not exist user_id : needed to create list if it does not exist
''' '''
# implicit global session
if typeList is None: if typeList is None:
typeList = ['MiamList', 'StopList'] typeList = ['MiamList', 'StopList']
elif isinstance(typeList, string): elif isinstance(typeList, string):
...@@ -123,6 +130,8 @@ def ngramList(do, list_id, ngram_ids=None) : ...@@ -123,6 +130,8 @@ def ngramList(do, list_id, ngram_ids=None) :
ngram_id = [Int] : list of Ngrams id (Ngrams.id) ngram_id = [Int] : list of Ngrams id (Ngrams.id)
list_id = Int : list id (Node.id) list_id = Int : list id (Node.id)
''' '''
# implicit global session
results = [] results = []
if do == 'create': if do == 'create':
...@@ -174,6 +183,7 @@ def ngrams2miam(user_id=None, corpus_id=None): ...@@ -174,6 +183,7 @@ def ngrams2miam(user_id=None, corpus_id=None):
''' '''
Create a Miam List only Create a Miam List only
''' '''
# implicit global session
miam_id = listIds(typeList='MiamList', user_id=user_id, corpus_id=corpus_id)[0][0] miam_id = listIds(typeList='MiamList', user_id=user_id, corpus_id=corpus_id)[0][0]
print(miam_id) print(miam_id)
...@@ -206,6 +216,8 @@ def ngrams2miamBis(corpus): ...@@ -206,6 +216,8 @@ def ngrams2miamBis(corpus):
miam_id = get_or_create_node(corpus=corpus, nodetype='MiamList') miam_id = get_or_create_node(corpus=corpus, nodetype='MiamList')
stop_id = get_or_create_node(corpus=corpus,nodetype='StopList') stop_id = get_or_create_node(corpus=corpus,nodetype='StopList')
# implicit global session
query = (session.query( query = (session.query(
literal_column(str(miam_id)).label("node_id"), literal_column(str(miam_id)).label("node_id"),
Ngram.id, Ngram.id,
...@@ -247,6 +259,7 @@ def doList( ...@@ -247,6 +259,7 @@ def doList(
lem = equivalent Words which are lemmatized (but the main form) lem = equivalent Words which are lemmatized (but the main form)
cvalue = equivalent N-Words according to C-Value (but the main form) cvalue = equivalent N-Words according to C-Value (but the main form)
''' '''
# implicit global session
if type_list not in ['MiamList', 'MainList']: if type_list not in ['MiamList', 'MainList']:
raise Exception("Type List (%s) not supported, try: \'MiamList\' or \'MainList\'" % type_list) raise Exception("Type List (%s) not supported, try: \'MiamList\' or \'MainList\'" % type_list)
...@@ -356,4 +369,3 @@ def doList( ...@@ -356,4 +369,3 @@ def doList(
return(list_dict[type_list]['id']) return(list_dict[type_list]['id'])
...@@ -5,7 +5,7 @@ from admin.env import * ...@@ -5,7 +5,7 @@ from admin.env import *
from admin.utils import PrintException,DebugTime from admin.utils import PrintException,DebugTime
from gargantext_web.db import NodeNgram,NodeNodeNgram,NodeNgramNgram from gargantext_web.db import NodeNgram,NodeNodeNgram,NodeNgramNgram
from gargantext_web.db import get_or_create_node, session, bulk_insert from gargantext_web.db import get_or_create_node, get_session, bulk_insert
from sqlalchemy.sql import func from sqlalchemy.sql import func
from sqlalchemy import desc, asc, or_, and_, Date, cast, select from sqlalchemy import desc, asc, or_, and_, Date, cast, select
...@@ -15,22 +15,23 @@ from sqlalchemy.orm import aliased ...@@ -15,22 +15,23 @@ from sqlalchemy.orm import aliased
from ngram.tools import insert_ngrams from ngram.tools import insert_ngrams
import csv import csv
def compute_mapList(corpus,limit=500,n=1): def compute_mapList(corpus,limit=500,n=1, mysession=None):
''' '''
According to Specificities and stoplist, According to Specificities and stoplist,
''' '''
monograms_part = 0.005 monograms_part = 0.005
monograms_limit = round(limit * monograms_part) monograms_limit = round(limit * monograms_part)
multigrams_limit = limit - monograms_limit multigrams_limit = limit - monograms_limit
dbg = DebugTime('Corpus #%d - computing Miam' % corpus.id) dbg = DebugTime('Corpus #%d - computing Miam' % corpus.id)
node_miam = get_or_create_node(nodetype='MiamList', corpus=corpus) node_miam = get_or_create_node(nodetype='MiamList', corpus=corpus, mysession=mysession)
node_stop = get_or_create_node(nodetype='StopList', corpus=corpus) node_stop = get_or_create_node(nodetype='StopList', corpus=corpus, mysession=mysession)
node_group = get_or_create_node(nodetype='Group', corpus=corpus) node_group = get_or_create_node(nodetype='Group', corpus=corpus, mysession=mysession)
node_spec = get_or_create_node(nodetype='Specificity', corpus=corpus) node_spec = get_or_create_node(nodetype='Specificity', corpus=corpus, mysession=mysession)
Miam=aliased(NodeNgram) Miam=aliased(NodeNgram)
Stop=aliased(NodeNgram) Stop=aliased(NodeNgram)
...@@ -38,7 +39,7 @@ def compute_mapList(corpus,limit=500,n=1): ...@@ -38,7 +39,7 @@ def compute_mapList(corpus,limit=500,n=1):
Spec=aliased(NodeNodeNgram) Spec=aliased(NodeNodeNgram)
query = (session.query(Spec.ngram_id, Spec.score) query = (mysession.query(Spec.ngram_id, Spec.score)
.join(Miam, Spec.ngram_id == Miam.ngram_id) .join(Miam, Spec.ngram_id == Miam.ngram_id)
.join(Ngram, Ngram.id == Spec.ngram_id) .join(Ngram, Ngram.id == Spec.ngram_id)
#.outerjoin(Group, Group.ngramy_id == Spec.ngram_id) #.outerjoin(Group, Group.ngramy_id == Spec.ngram_id)
...@@ -61,19 +62,19 @@ def compute_mapList(corpus,limit=500,n=1): ...@@ -61,19 +62,19 @@ def compute_mapList(corpus,limit=500,n=1):
.limit(multigrams_limit) .limit(multigrams_limit)
) )
stop_ngrams = (session.query(NodeNgram.ngram_id) stop_ngrams = (mysession.query(NodeNgram.ngram_id)
.filter(NodeNgram.node_id == node_stop.id) .filter(NodeNgram.node_id == node_stop.id)
.all() .all()
) )
grouped_ngrams = (session.query(NodeNgramNgram.ngramy_id) grouped_ngrams = (mysession.query(NodeNgramNgram.ngramy_id)
.filter(NodeNgramNgram.node_id == node_group.id) .filter(NodeNgramNgram.node_id == node_group.id)
.all() .all()
) )
node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus) node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus, mysession=mysession)
session.query(NodeNgram).filter(NodeNgram.node_id==node_mapList.id).delete() mysession.query(NodeNgram).filter(NodeNgram.node_id==node_mapList.id).delete()
session.commit() mysession.commit()
data = zip( data = zip(
[node_mapList.id for i in range(1,limit)] [node_mapList.id for i in range(1,limit)]
...@@ -87,12 +88,13 @@ def compute_mapList(corpus,limit=500,n=1): ...@@ -87,12 +88,13 @@ def compute_mapList(corpus,limit=500,n=1):
dbg.show('MapList computed') dbg.show('MapList computed')
def insert_miam(corpus, ngrams=None, path_file_csv=None): def insert_miam(corpus, ngrams=None, path_file_csv=None, mysession=None):
dbg = DebugTime('Corpus #%d - computing Miam' % corpus.id) dbg = DebugTime('Corpus #%d - computing Miam' % corpus.id)
node_miam = get_or_create_node(nodetype='MiamList', corpus=corpus) node_miam = get_or_create_node(nodetype='MiamList', corpus=corpus, mysession=mysession)
session.query(NodeNgram).filter(NodeNgram.node_id==node_miam.id).delete() mysession.query(NodeNgram).filter(NodeNgram.node_id==node_miam.id).delete()
session.commit() mysession.commit()
stop_words = set() stop_words = set()
miam_words = set() miam_words = set()
...@@ -122,7 +124,4 @@ def insert_miam(corpus, ngrams=None, path_file_csv=None): ...@@ -122,7 +124,4 @@ def insert_miam(corpus, ngrams=None, path_file_csv=None):
file_csv.close() file_csv.close()
dbg.show('Miam computed') dbg.show('Miam computed')
#corpus = session.query(Node).filter(Node.id==540420).first()
#compute_mapList(corpus)
#insert_miam(corpus=corpus, path_file_csv="Thesaurus_tag.csv")
from gargantext_web.db import session, cache, get_cursor from gargantext_web.db import get_session, cache, get_cursor
from gargantext_web.db import Node, NodeNgram, NodeNodeNgram from gargantext_web.db import Node, NodeNgram, NodeNodeNgram
from gargantext_web.db import get_or_create_node from gargantext_web.db import get_or_create_node
from admin.utils import DebugTime from admin.utils import DebugTime
def compute_occs(corpus): def compute_occs(corpus, mysession=None):
'''
compute_occs :: Corpus -> IO ()
'''
dbg = DebugTime('Corpus #%d - OCCURRENCES' % corpus.id) dbg = DebugTime('Corpus #%d - OCCURRENCES' % corpus.id)
dbg.show('Calculate occurrences') dbg.show('Calculate occurrences')
occs_node = get_or_create_node(nodetype='Occurrences', corpus=corpus) occs_node = get_or_create_node(nodetype='Occurrences', corpus=corpus, mysession=mysession)
#print(occs_node.id) #print(occs_node.id)
(session.query(NodeNodeNgram) (mysession.query(NodeNodeNgram)
.filter(NodeNodeNgram.nodex_id==occs_node.id).delete() .filter(NodeNodeNgram.nodex_id==occs_node.id).delete()
) )
session.commit() mysession.commit()
db, cursor = get_cursor() db, cursor = get_cursor()
cursor.execute(''' cursor.execute('''
...@@ -46,5 +51,7 @@ def compute_occs(corpus): ...@@ -46,5 +51,7 @@ def compute_occs(corpus):
) )
) )
db.commit() db.commit()
#data = session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==occs_node.id).all() #data = session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==occs_node.id).all()
#print([n for n in data]) #print([n for n in data])
...@@ -10,17 +10,17 @@ import numpy as np ...@@ -10,17 +10,17 @@ import numpy as np
import pandas as pd import pandas as pd
from analysis.cooccurrences import do_cooc from analysis.cooccurrences import do_cooc
from gargantext_web.db import session, cache, get_or_create_node, bulk_insert from gargantext_web.db import get_session, cache, get_or_create_node, bulk_insert
from gargantext_web.db import NodeNgramNgram, NodeNodeNgram from gargantext_web.db import NodeNgramNgram, NodeNodeNgram
from sqlalchemy import desc, asc, or_, and_, Date, cast, select from sqlalchemy import desc, asc, or_, and_, Date, cast, select
def specificity(cooc_id=None, corpus=None, limit=100): def specificity(cooc_id=None, corpus=None, limit=100, mysession=None):
''' '''
Compute the specificity, simple calculus. Compute the specificity, simple calculus.
''' '''
cooccurrences = (session.query(NodeNgramNgram) cooccurrences = (mysession.query(NodeNgramNgram)
.filter(NodeNgramNgram.node_id==cooc_id) .filter(NodeNgramNgram.node_id==cooc_id)
.order_by(NodeNgramNgram.score) .order_by(NodeNgramNgram.score)
.limit(limit) .limit(limit)
...@@ -41,36 +41,37 @@ def specificity(cooc_id=None, corpus=None, limit=100): ...@@ -41,36 +41,37 @@ def specificity(cooc_id=None, corpus=None, limit=100):
m = ( xs - ys) / (2 * (x.shape[0] - 1)) m = ( xs - ys) / (2 * (x.shape[0] - 1))
m = m.sort(inplace=False) m = m.sort(inplace=False)
node = get_or_create_node(nodetype='Specificity',corpus=corpus) node = get_or_create_node(nodetype='Specificity',corpus=corpus, mysession=mysession)
data = zip( [node.id for i in range(1,m.shape[0])] data = zip( [node.id for i in range(1,m.shape[0])]
, [corpus.id for i in range(1,m.shape[0])] , [corpus.id for i in range(1,m.shape[0])]
, m.index.tolist() , m.index.tolist()
, m.values.tolist() , m.values.tolist()
) )
session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==node.id).delete() mysession.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==node.id).delete()
session.commit() mysession.commit()
bulk_insert(NodeNodeNgram, ['nodex_id', 'nodey_id', 'ngram_id', 'score'], [d for d in data]) bulk_insert(NodeNodeNgram, ['nodex_id', 'nodey_id', 'ngram_id', 'score'], [d for d in data])
return(node.id) return(node.id)
def compute_specificity(corpus,limit=100):
def compute_specificity(corpus,limit=100, mysession=None):
''' '''
Computing specificities as NodeNodeNgram. Computing specificities as NodeNodeNgram.
All workflow is the following: All workflow is the following:
1) Compute the cooc matrix 1) Compute the cooc matrix
2) Compute the specificity score, saving it in database, return its Node 2) Compute the specificity score, saving it in database, return its Node
''' '''
dbg = DebugTime('Corpus #%d - specificity' % corpus.id) dbg = DebugTime('Corpus #%d - specificity' % corpus.id)
list_cvalue = get_or_create_node(nodetype='Cvalue', corpus=corpus) list_cvalue = get_or_create_node(nodetype='Cvalue', corpus=corpus, mysession=mysession)
cooc_id = do_cooc(corpus=corpus, cvalue_id=list_cvalue.id,limit=limit) cooc_id = do_cooc(corpus=corpus, cvalue_id=list_cvalue.id,limit=limit, mysession=mysession)
specificity(cooc_id=cooc_id,corpus=corpus,limit=limit) specificity(cooc_id=cooc_id,corpus=corpus,limit=limit,mysession=mysession)
dbg.show('specificity') dbg.show('specificity')
#corpus=session.query(Node).filter(Node.id==244250).first() #corpus=session.query(Node).filter(Node.id==244250).first()
#compute_specificity(corpus) #compute_specificity(corpus)
...@@ -4,12 +4,14 @@ from parsing.corpustools import * ...@@ -4,12 +4,14 @@ from parsing.corpustools import *
from gargantext_web.db import NodeNgram from gargantext_web.db import NodeNgram
from sqlalchemy import desc, asc, or_, and_, Date, cast, select from sqlalchemy import desc, asc, or_, and_, Date, cast, select
from gargantext_web.db import get_cursor, bulk_insert from gargantext_web.db import get_cursor, bulk_insert, session,get_session
def get_ngramogram(corpus, limit=None): def get_ngramogram(corpus, limit=None):
""" """
Ngram is a composition of ograms (ogram = 1gram) Ngram is a composition of ograms (ogram = 1gram)
""" """
# implicit global session
try: try:
query = (session query = (session
.query(Ngram.id, Ngram.terms) .query(Ngram.id, Ngram.terms)
...@@ -303,6 +305,7 @@ def stem_corpus(corpus_id=None): ...@@ -303,6 +305,7 @@ def stem_corpus(corpus_id=None):
Returns Int as id of the Stem Node Returns Int as id of the Stem Node
stem_corpus :: Int stem_corpus :: Int
''' '''
# implicit global session
corpus = session.query(Node).filter(Node.id == corpus_id).first() corpus = session.query(Node).filter(Node.id == corpus_id).first()
#print('Number of new ngrams to stem:', #print('Number of new ngrams to stem:',
...@@ -329,4 +332,3 @@ def stem_corpus(corpus_id=None): ...@@ -329,4 +332,3 @@ def stem_corpus(corpus_id=None):
print('Usage: stem_corpus(corpus_id=corpus.id)') print('Usage: stem_corpus(corpus_id=corpus.id)')
...@@ -2,7 +2,7 @@ import re ...@@ -2,7 +2,7 @@ import re
from admin.utils import PrintException from admin.utils import PrintException
from gargantext_web.db import Node, Ngram, NodeNgram,NodeNodeNgram from gargantext_web.db import Node, Ngram, NodeNgram,NodeNodeNgram
from gargantext_web.db import cache, session, get_or_create_node, bulk_insert from gargantext_web.db import cache, session,get_session, get_or_create_node, bulk_insert
import sqlalchemy as sa import sqlalchemy as sa
from sqlalchemy.sql import func from sqlalchemy.sql import func
...@@ -14,6 +14,8 @@ from ngram.tools import insert_ngrams ...@@ -14,6 +14,8 @@ from ngram.tools import insert_ngrams
from analysis.lists import WeightedList, UnweightedList from analysis.lists import WeightedList, UnweightedList
def importStopList(node,filename,language='fr'): def importStopList(node,filename,language='fr'):
# implicit global session
with open(filename, "r") as f: with open(filename, "r") as f:
stop_list = f.read().splitlines() stop_list = f.read().splitlines()
...@@ -72,17 +74,18 @@ def isStopWord(ngram, stop_words=None): ...@@ -72,17 +74,18 @@ def isStopWord(ngram, stop_words=None):
if test_match(word, regex) is True : if test_match(word, regex) is True :
return(True) return(True)
def compute_stop(corpus,limit=2000,debug=False): def compute_stop(corpus,limit=2000,debug=False, mysession=None):
''' '''
do some statitics on all stop lists of database of the same type do some statitics on all stop lists of database of the same type
''' '''
stop_node = get_or_create_node(nodetype='StopList', corpus=corpus)
stop_node_id = get_or_create_node(nodetype='StopList', corpus=corpus, mysession=mysession).id
# TODO do a function to get all stop words with social scores # TODO do a function to get all stop words with social scores
root = session.query(Node).filter(Node.type_id == cache.NodeType['Root'].id).first() root = mysession.query(Node).filter(Node.type_id == cache.NodeType['Root'].id).first()
root_stop_id = get_or_create_node(nodetype='StopList', corpus=root).id root_stop_id = get_or_create_node(nodetype='StopList', corpus=root, mysession=mysession).id
stop_words = (session.query(Ngram.terms) stop_words = (mysession.query(Ngram.terms)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id) .join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.filter(NodeNgram.node_id == root_stop_id) .filter(NodeNgram.node_id == root_stop_id)
.all() .all()
...@@ -91,7 +94,7 @@ def compute_stop(corpus,limit=2000,debug=False): ...@@ -91,7 +94,7 @@ def compute_stop(corpus,limit=2000,debug=False):
#print([n for n in stop_words]) #print([n for n in stop_words])
frequency = sa.func.count( NodeNgram.weight ) frequency = sa.func.count( NodeNgram.weight )
ngrams = ( session.query( Ngram.id, Ngram.terms, frequency ) ngrams = ( mysession.query( Ngram.id, Ngram.terms, frequency )
.join( NodeNgram, NodeNgram.ngram_id == Ngram.id ) .join( NodeNgram, NodeNgram.ngram_id == Ngram.id )
.join( Node, Node.id == NodeNgram.node_id ) .join( Node, Node.id == NodeNgram.node_id )
.filter( Node.parent_id == corpus.id, .filter( Node.parent_id == corpus.id,
...@@ -108,5 +111,5 @@ def compute_stop(corpus,limit=2000,debug=False): ...@@ -108,5 +111,5 @@ def compute_stop(corpus,limit=2000,debug=False):
#print([n for n in ngrams_to_stop]) #print([n for n in ngrams_to_stop])
stop = WeightedList({ n[0] : -1 for n in ngrams_to_stop}) stop = WeightedList({ n[0] : -1 for n in ngrams_to_stop})
stop.save(stop_node.id) stop.save(stop_node_id)
#from admin.env import * #from admin.env import *
from math import log from math import log
from gargantext_web.db import * from gargantext_web.db import *
from gargantext_web.db import get_or_create_node from gargantext_web.db import get_session, get_or_create_node
from admin.utils import DebugTime from admin.utils import DebugTime
def compute_tfidf(corpus): def compute_tfidf(corpus, mysession=None):
# compute terms frequency sum # compute terms frequency sum
dbg = DebugTime('Corpus #%d - TFIDF' % corpus.id) dbg = DebugTime('Corpus #%d - TFIDF' % corpus.id)
dbg.show('calculate terms frequencies sums') dbg.show('calculate terms frequencies sums')
tfidf_node = get_or_create_node(nodetype='Tfidf', corpus=corpus) tfidf_node = get_or_create_node(nodetype='Tfidf', corpus=corpus, mysession=mysession)
db, cursor = get_cursor() db, cursor = get_cursor()
cursor.execute(''' cursor.execute('''
...@@ -119,18 +120,20 @@ def compute_tfidf(corpus): ...@@ -119,18 +120,20 @@ def compute_tfidf(corpus):
# the end! # the end!
db.commit() db.commit()
def compute_tfidf_global(corpus):
def compute_tfidf_global(corpus, mysession=None):
''' '''
Maybe improve this with: Maybe improve this with:
#http://stackoverflow.com/questions/8674718/best-way-to-select-random-rows-postgresql #http://stackoverflow.com/questions/8674718/best-way-to-select-random-rows-postgresql
''' '''
dbg = DebugTime('Corpus #%d - tfidf global' % corpus.id) dbg = DebugTime('Corpus #%d - tfidf global' % corpus.id)
dbg.show('calculate terms frequencies sums') dbg.show('calculate terms frequencies sums')
tfidf_node = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus)
tfidf_node = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus, mysession=mysession)
# update would be better # update would be better
session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==tfidf_node.id).delete() mysession.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==tfidf_node.id).delete()
session.commit() mysession.commit()
# compute terms frequency sum # compute terms frequency sum
db, cursor = get_cursor() db, cursor = get_cursor()
...@@ -257,6 +260,3 @@ def compute_tfidf_global(corpus): ...@@ -257,6 +260,3 @@ def compute_tfidf_global(corpus):
db.commit() db.commit()
dbg.show('insert tfidf') dbg.show('insert tfidf')
#corpus=session.query(Node).filter(Node.id==244250).first()
#compute_tfidf_global(corpus)
from gargantext_web.db import session
from gargantext_web.db import Ngram, NodeNgram, NodeNgramNgram from gargantext_web.db import Ngram, NodeNgram, NodeNgramNgram
from gargantext_web.db import get_cursor, bulk_insert, get_or_create_node from gargantext_web.db import get_cursor, bulk_insert, get_or_create_node, session,get_session
def insert_ngrams_to_list(list_of_ngrams, corpus, list_type='MapList', erase=True): def insert_ngrams_to_list(list_of_ngrams, corpus, list_type='MapList', erase=True):
''' '''
Works only for Stop and Map Works only for Stop and Map
''' '''
list_node = get_or_create_node(corpus=corpus, nodetype=list_type) # implicit global session
group_node = get_or_create_node(corpus=corpus, nodetype='GroupList')
list_node = get_or_create_node(corpus=corpus, nodetype=list_type, mysession=session)
group_node = get_or_create_node(corpus=corpus, nodetype='GroupList', mysession=session)
group_list = (session.query(NodeNgramNgram.ngramy_id) group_list = (session.query(NodeNgramNgram.ngramy_id)
.filter(NodeNgramNgram.id==group_node.id) .filter(NodeNgramNgram.id==group_node.id)
.all() .all()
......
...@@ -8,21 +8,21 @@ from gargantext_web.db import get_or_create_node ...@@ -8,21 +8,21 @@ from gargantext_web.db import get_or_create_node
from ngram.mapList import compute_mapList from ngram.mapList import compute_mapList
from ngram.occurrences import compute_occs from ngram.occurrences import compute_occs
from gargantext_web.db import session , Node , NodeNgram from gargantext_web.db import Node , NodeNgram
from admin.utils import WorkflowTracking from admin.utils import WorkflowTracking
def ngram_workflow(corpus, n=5000): def ngram_workflow(corpus, n=5000, mysession=None):
''' '''
All the workflow to filter the ngrams. All the workflow to filter the ngrams.
''' '''
update_state = WorkflowTracking() update_state = WorkflowTracking()
update_state.processing_(corpus, "Stop words") update_state.processing_(corpus.id, "Stop words")
compute_stop(corpus) compute_stop(corpus, mysession=mysession)
update_state.processing_(corpus, "TF-IDF global score") update_state.processing_(corpus.id, "TF-IDF global score")
compute_tfidf_global(corpus) compute_tfidf_global(corpus, mysession=mysession)
part = round(n * 0.9) part = round(n * 0.9)
...@@ -31,28 +31,28 @@ def ngram_workflow(corpus, n=5000): ...@@ -31,28 +31,28 @@ def ngram_workflow(corpus, n=5000):
# part = round(part * 0.8) # part = round(part * 0.8)
#print('spec part:', part) #print('spec part:', part)
update_state.processing_(corpus, "Specificity score") update_state.processing_(corpus.id, "Specificity score")
compute_specificity(corpus,limit=part) compute_specificity(corpus,limit=part, mysession=mysession)
part = round(part * 0.8) part = round(part * 0.8)
limit_inf = round(part * 1) limit_inf = round(part * 1)
limit_sup = round(part * 5) limit_sup = round(part * 5)
#print(limit_inf,limit_sup) #print(limit_inf,limit_sup)
update_state.processing_(corpus, "Synonyms") update_state.processing_(corpus.id, "Synonyms")
try: try:
compute_groups(corpus,limit_inf=limit_inf, limit_sup=limit_sup) compute_groups(corpus,limit_inf=limit_inf, limit_sup=limit_sup, mysession=mysession)
except Exception as error: except Exception as error:
print("Workflow Ngram Group error", error) print("Workflow Ngram Group error", error)
pass pass
update_state.processing_(corpus, "Map list terms") update_state.processing_(corpus.id, "Map list terms")
compute_mapList(corpus,limit=1000) # size compute_mapList(corpus,limit=1000, mysession=mysession) # size
update_state.processing_(corpus, "TF-IDF local score") update_state.processing_(corpus.id, "TF-IDF local score")
compute_tfidf(corpus) compute_tfidf(corpus, mysession=mysession)
update_state.processing_(corpus, "Occurrences") update_state.processing_(corpus.id, "Occurrences")
compute_occs(corpus) compute_occs(corpus, mysession=mysession)
This diff is collapsed.
...@@ -11,8 +11,11 @@ import datetime ...@@ -11,8 +11,11 @@ import datetime
import copy import copy
from gargantext_web.views import move_to_trash from gargantext_web.views import move_to_trash
from gargantext_web.db import session, cache, Node, NodeNgram, NodeNgramNgram, NodeNodeNgram, Ngram, Hyperdata, Node_Ngram\ from gargantext_web.db import cache, Node, NodeNgram, NodeNgramNgram, NodeNodeNgram, Ngram, Hyperdata, Node_Ngram\
, NodeType, Node_Hyperdata , NodeType, Node_Hyperdata
from gargantext_web.views import session
from gargantext_web.validation import validate, ValidationException from gargantext_web.validation import validate, ValidationException
from node import models from node import models
...@@ -100,8 +103,10 @@ def Root(request, format=None): ...@@ -100,8 +103,10 @@ def Root(request, format=None):
class NodesChildrenNgrams(APIView): class NodesChildrenNgrams(APIView):
def get(self, request, node_id): def get(self, request, node_id):
# query ngrams # query ngrams
ParentNode = aliased(Node) ParentNode = aliased(Node)
ngrams_query = (session ngrams_query = (session
.query(Ngram.terms, func.sum(Node_Ngram.weight).label('count')) .query(Ngram.terms, func.sum(Node_Ngram.weight).label('count'))
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id) .join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
...@@ -139,9 +144,11 @@ class NodesChildrenNgrams(APIView): ...@@ -139,9 +144,11 @@ class NodesChildrenNgrams(APIView):
], ],
}) })
class NodesChildrenNgramsIds(APIView): class NodesChildrenNgramsIds(APIView):
def get(self, request, node_id): def get(self, request, node_id):
# query ngrams # query ngrams
ParentNode = aliased(Node) ParentNode = aliased(Node)
ngrams_query = (session ngrams_query = (session
...@@ -187,6 +194,7 @@ from gargantext_web.db import get_or_create_node ...@@ -187,6 +194,7 @@ from gargantext_web.db import get_or_create_node
class Ngrams(APIView): class Ngrams(APIView):
def get(self, request, node_id): def get(self, request, node_id):
# query ngrams # query ngrams
ParentNode = aliased(Node) ParentNode = aliased(Node)
corpus = session.query(Node).filter(Node.id==node_id).first() corpus = session.query(Node).filter(Node.id==node_id).first()
...@@ -305,9 +313,9 @@ class Ngrams(APIView): ...@@ -305,9 +313,9 @@ class Ngrams(APIView):
}) })
class NodesChildrenDuplicates(APIView): class NodesChildrenDuplicates(APIView):
def _fetch_duplicates(self, request, node_id, extra_columns=None, min_count=1): def _fetch_duplicates(self, request, node_id, extra_columns=None, min_count=1):
# input validation # input validation
if extra_columns is None: if extra_columns is None:
extra_columns = [] extra_columns = []
...@@ -347,6 +355,7 @@ class NodesChildrenDuplicates(APIView): ...@@ -347,6 +355,7 @@ class NodesChildrenDuplicates(APIView):
# and now, return it # and now, return it
return duplicates_query return duplicates_query
def get(self, request, node_id): def get(self, request, node_id):
# data to be returned # data to be returned
duplicates = self._fetch_duplicates(request, node_id) duplicates = self._fetch_duplicates(request, node_id)
...@@ -396,6 +405,7 @@ class NodesChildrenDuplicates(APIView): ...@@ -396,6 +405,7 @@ class NodesChildrenDuplicates(APIView):
# retrieve metadata from a given list of parent node # retrieve metadata from a given list of parent node
def get_metadata(corpus_id_list): def get_metadata(corpus_id_list):
# query hyperdata keys # query hyperdata keys
ParentNode = aliased(Node) ParentNode = aliased(Node)
hyperdata_query = (session hyperdata_query = (session
...@@ -695,6 +705,7 @@ class NodesList(APIView): ...@@ -695,6 +705,7 @@ class NodesList(APIView):
authentication_classes = (SessionAuthentication, BasicAuthentication) authentication_classes = (SessionAuthentication, BasicAuthentication)
def get(self, request): def get(self, request):
print("user id : " + str(request.user)) print("user id : " + str(request.user))
query = (session query = (session
.query(Node.id, Node.name, NodeType.name.label('type')) .query(Node.id, Node.name, NodeType.name.label('type'))
...@@ -711,8 +722,10 @@ class NodesList(APIView): ...@@ -711,8 +722,10 @@ class NodesList(APIView):
for node in query.all() for node in query.all()
]}) ]})
class Nodes(APIView): class Nodes(APIView):
def get(self, request, node_id): def get(self, request, node_id):
node = session.query(Node).filter(Node.id == node_id).first() node = session.query(Node).filter(Node.id == node_id).first()
if node is None: if node is None:
raise APIException('This node does not exist', 404) raise APIException('This node does not exist', 404)
...@@ -726,12 +739,14 @@ class Nodes(APIView): ...@@ -726,12 +739,14 @@ class Nodes(APIView):
'hyperdata': node.hyperdata, 'hyperdata': node.hyperdata,
}) })
# deleting node by id # deleting node by id
# currently, very dangerous. # currently, very dangerous.
# it should take the subnodes into account as well, # it should take the subnodes into account as well,
# for better constistency... # for better constistency...
def delete(self, request, node_id): def delete(self, request, node_id):
user = request.user user = request.user
node = session.query(Node).filter(Node.id == node_id).first() node = session.query(Node).filter(Node.id == node_id).first()
...@@ -745,6 +760,7 @@ class Nodes(APIView): ...@@ -745,6 +760,7 @@ class Nodes(APIView):
except Exception as error: except Exception as error:
msgres ="error deleting : " + node_id + str(error) msgres ="error deleting : " + node_id + str(error)
class CorpusController: class CorpusController:
@classmethod @classmethod
def get(cls, corpus_id): def get(cls, corpus_id):
...@@ -764,7 +780,6 @@ class CorpusController: ...@@ -764,7 +780,6 @@ class CorpusController:
# raise Http403("Unauthorized access.") # raise Http403("Unauthorized access.")
return corpus return corpus
@classmethod @classmethod
def ngrams(cls, request, node_id): def ngrams(cls, request, node_id):
...@@ -773,6 +788,7 @@ class CorpusController: ...@@ -773,6 +788,7 @@ class CorpusController:
# build query # build query
ParentNode = aliased(Node) ParentNode = aliased(Node)
query = (session query = (session
.query(Ngram.terms, func.count('*')) .query(Ngram.terms, func.count('*'))
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id) .join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
...@@ -799,3 +815,4 @@ class CorpusController: ...@@ -799,3 +815,4 @@ class CorpusController:
) )
else: else:
raise ValidationError('Unrecognized "format=%s", should be "csv" or "json"' % (format, )) raise ValidationError('Unrecognized "format=%s", should be "csv" or "json"' % (format, ))
from rest_v1_0.api import APIView, APIException, JsonHttpResponse, CsvHttpResponse from rest_v1_0.api import APIView, APIException, JsonHttpResponse, CsvHttpResponse
from rest_framework.authentication import SessionAuthentication, BasicAuthentication from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from gargantext_web.db import session, Node from gargantext_web.db import session,get_session, Node
from analysis.functions import get_cooc from analysis.functions import get_cooc
class Graph(APIView): class Graph(APIView):
authentication_classes = (SessionAuthentication, BasicAuthentication) authentication_classes = (SessionAuthentication, BasicAuthentication)
def get(self, request, corpus_id): def get(self, request, corpus_id):
''' '''
Graph.get :: Get graph data as REST api. Graph.get :: Get graph data as REST api.
...@@ -13,6 +14,8 @@ class Graph(APIView): ...@@ -13,6 +14,8 @@ class Graph(APIView):
graph?field1=ngrams&field2=ngrams& graph?field1=ngrams&field2=ngrams&
graph?field1=ngrams&field2=ngrams&start=''&end='' graph?field1=ngrams&field2=ngrams&start=''&end=''
''' '''
# implicit global session
field1 = request.GET.get('field1', 'ngrams') field1 = request.GET.get('field1', 'ngrams')
field2 = request.GET.get('field2', 'ngrams') field2 = request.GET.get('field2', 'ngrams')
......
...@@ -16,8 +16,7 @@ from gargantext_web.db import cache ...@@ -16,8 +16,7 @@ from gargantext_web.db import cache
from gargantext_web.validation import validate, ValidationException from gargantext_web.validation import validate, ValidationException
from gargantext_web.db import session, Node, NodeNgram, NodeNgramNgram\ from gargantext_web.db import session,get_session, Node, NodeNgram, NodeNgramNgram, NodeNodeNgram, Ngram, Hyperdata, Node_Ngram, get_or_create_node
, NodeNodeNgram, Ngram, Hyperdata, Node_Ngram, get_or_create_node
def DebugHttpResponse(data): def DebugHttpResponse(data):
...@@ -82,6 +81,8 @@ class List(APIView): ...@@ -82,6 +81,8 @@ class List(APIView):
def get_metadata ( self , ngram_ids , parent_id ): def get_metadata ( self , ngram_ids , parent_id ):
# implicit global session
start_ = time.time() start_ = time.time()
nodes_ngrams = session.query(Ngram.id , Ngram.terms).filter( Ngram.id.in_( list(ngram_ids.keys()))).all() nodes_ngrams = session.query(Ngram.id , Ngram.terms).filter( Ngram.id.in_( list(ngram_ids.keys()))).all()
...@@ -121,10 +122,10 @@ class List(APIView): ...@@ -121,10 +122,10 @@ class List(APIView):
return { "data":ngram_ids , "secs":(end_ - start_) } return { "data":ngram_ids , "secs":(end_ - start_) }
def get(self, request, corpus_id , list_name ): def get(self, request, corpus_id , list_name ):
if not request.user.is_authenticated(): if not request.user.is_authenticated():
return JsonHttpResponse( {"request" : "forbidden"} ) return JsonHttpResponse( {"request" : "forbidden"} )
# implicit global session
corpus = session.query(Node).filter( Node.id==corpus_id ).first() corpus = session.query(Node).filter( Node.id==corpus_id ).first()
# if corpus==None: # if corpus==None:
# return JsonHttpResponse( {"request" : "forbidden"} ) # return JsonHttpResponse( {"request" : "forbidden"} )
...@@ -162,6 +163,7 @@ class Ngrams(APIView): ...@@ -162,6 +163,7 @@ class Ngrams(APIView):
def get(self, request, node_id): def get(self, request, node_id):
if not request.user.is_authenticated(): if not request.user.is_authenticated():
return JsonHttpResponse( {"request" : "forbidden"} ) return JsonHttpResponse( {"request" : "forbidden"} )
# implicit global session
corpus = session.query(Node).filter( Node.id==node_id).first() corpus = session.query(Node).filter( Node.id==node_id).first()
# if corpus==None: # if corpus==None:
# return JsonHttpResponse( {"request" : "forbidden"} ) # return JsonHttpResponse( {"request" : "forbidden"} )
...@@ -340,6 +342,9 @@ class Group(APIView): ...@@ -340,6 +342,9 @@ class Group(APIView):
''' '''
def get_group_id(self , node_id , user_id): def get_group_id(self , node_id , user_id):
node_id = int(node_id) node_id = int(node_id)
# implicit global session
corpus = session.query(Node).filter( Node.id==node_id).first() corpus = session.query(Node).filter( Node.id==node_id).first()
if corpus==None: return None if corpus==None: return None
group = get_or_create_node(corpus=corpus, nodetype='Group') group = get_or_create_node(corpus=corpus, nodetype='Group')
...@@ -370,6 +375,9 @@ class Group(APIView): ...@@ -370,6 +375,9 @@ class Group(APIView):
import networkx as nx import networkx as nx
G = nx.Graph() G = nx.Graph()
DG = nx.DiGraph() DG = nx.DiGraph()
# implicit global session
ngrams_ngrams = (session ngrams_ngrams = (session
.query(NodeNgramNgram) .query(NodeNgramNgram)
.filter(NodeNgramNgram.node_id==group_id) .filter(NodeNgramNgram.node_id==group_id)
...@@ -416,6 +424,8 @@ class Group(APIView): ...@@ -416,6 +424,8 @@ class Group(APIView):
def delete(self, request, corpus_id): def delete(self, request, corpus_id):
# input validation # input validation
# implicit global session
input = validate(request.DATA, {'data' : {'source': int, 'target': list}}) input = validate(request.DATA, {'data' : {'source': int, 'target': list}})
group_id = get_group_id(corpus_id , request.user.id) group_id = get_group_id(corpus_id , request.user.id)
...@@ -434,6 +444,7 @@ class Group(APIView): ...@@ -434,6 +444,7 @@ class Group(APIView):
raise APIException('Missing parameter: "{\'data\' : [\'source\': Int, \'target\': [Int]}"', 400) raise APIException('Missing parameter: "{\'data\' : [\'source\': Int, \'target\': [Int]}"', 400)
def put(self , request , corpus_id ): def put(self , request , corpus_id ):
# implicit global session
group_rawreq = dict(request.data) group_rawreq = dict(request.data)
...@@ -448,6 +459,8 @@ class Group(APIView): ...@@ -448,6 +459,8 @@ class Group(APIView):
gdict.append(subform) gdict.append(subform)
GDict.append( gdict ) GDict.append( gdict )
existing_group_id = self.get_group_id(corpus_id , request.user.id) existing_group_id = self.get_group_id(corpus_id , request.user.id)
# implicit global session
grouped_ngrams = (session grouped_ngrams = (session
.query(NodeNgramNgram) .query(NodeNgramNgram)
.filter(NodeNgramNgram.node_id==existing_group_id) .filter(NodeNgramNgram.node_id==existing_group_id)
...@@ -512,6 +525,7 @@ class Group(APIView): ...@@ -512,6 +525,7 @@ class Group(APIView):
nodengramngram = NodeNgramNgram(node_id=existing_group_id, ngramx_id=n1 , ngramy_id=n2, score=1.0) nodengramngram = NodeNgramNgram(node_id=existing_group_id, ngramx_id=n1 , ngramy_id=n2, score=1.0)
session.add(nodengramngram) session.add(nodengramngram)
session.commit() session.commit()
# [ - - - / doing links of new clique and adding to DB - - - ] # # [ - - - / doing links of new clique and adding to DB - - - ] #
...@@ -571,6 +585,7 @@ class Keep(APIView): ...@@ -571,6 +585,7 @@ class Keep(APIView):
authentication_classes = (SessionAuthentication, BasicAuthentication) authentication_classes = (SessionAuthentication, BasicAuthentication)
def get (self, request, corpus_id): def get (self, request, corpus_id):
# implicit global session
# list_id = session.query(Node).filter(Node.id==list_id).first() # list_id = session.query(Node).filter(Node.id==list_id).first()
corpus = session.query(Node).filter( Node.id==corpus_id ).first() corpus = session.query(Node).filter( Node.id==corpus_id ).first()
node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus ) node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus )
...@@ -584,6 +599,7 @@ class Keep(APIView): ...@@ -584,6 +599,7 @@ class Keep(APIView):
""" """
Add ngrams to map list Add ngrams to map list
""" """
# implicit global session
group_rawreq = dict(request.data) group_rawreq = dict(request.data)
ngram_2add = [int(i) for i in list(group_rawreq.keys())] ngram_2add = [int(i) for i in list(group_rawreq.keys())]
corpus = session.query(Node).filter( Node.id==corpus_id ).first() corpus = session.query(Node).filter( Node.id==corpus_id ).first()
...@@ -598,6 +614,8 @@ class Keep(APIView): ...@@ -598,6 +614,8 @@ class Keep(APIView):
""" """
Delete ngrams from the map list Delete ngrams from the map list
""" """
# implicit global session
group_rawreq = dict(request.data) group_rawreq = dict(request.data)
# print("group_rawreq:") # print("group_rawreq:")
# print(group_rawreq) # print(group_rawreq)
...@@ -616,4 +634,3 @@ class Keep(APIView): ...@@ -616,4 +634,3 @@ class Keep(APIView):
return JsonHttpResponse(True, 201) return JsonHttpResponse(True, 201)
...@@ -29,6 +29,7 @@ import threading ...@@ -29,6 +29,7 @@ import threading
from node.admin import CustomForm from node.admin import CustomForm
from gargantext_web.db import * from gargantext_web.db import *
from gargantext_web.db import get_sessionmaker, session,get_session
from gargantext_web.settings import DEBUG, MEDIA_ROOT from gargantext_web.settings import DEBUG, MEDIA_ROOT
from rest_v1_0.api import JsonHttpResponse from rest_v1_0.api import JsonHttpResponse
...@@ -45,7 +46,8 @@ def getGlobalStats(request ): ...@@ -45,7 +46,8 @@ def getGlobalStats(request ):
alist = ["bar","foo"] alist = ["bar","foo"]
if request.method == "POST": if request.method == "POST":
N = 1000 #N = 1000
N = 300
query = request.POST["query"] query = request.POST["query"]
print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" query =", query ) print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" query =", query )
print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" N =", N ) print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" N =", N )
...@@ -83,14 +85,12 @@ def getGlobalStatsISTEXT(request ): ...@@ -83,14 +85,12 @@ def getGlobalStatsISTEXT(request ):
def doTheQuery(request , project_id): def doTheQuery(request , project_id):
alist = ["hola","mundo"] # implicit global session
# do we have a valid project id? # do we have a valid project id?
try: try:
project_id = int(project_id) project_id = int(project_id)
except ValueError: except ValueError:
raise Http404() raise Http404()
# do we have a valid project? # do we have a valid project?
project = (session project = (session
.query(Node) .query(Node)
...@@ -134,7 +134,7 @@ def doTheQuery(request , project_id): ...@@ -134,7 +134,7 @@ def doTheQuery(request , project_id):
) )
session.add(corpus) session.add(corpus)
session.commit() session.commit()
corpus_id = corpus.id
# """ # """
# urlreqs: List of urls to query. # urlreqs: List of urls to query.
# - Then, to each url in urlreqs you do: # - Then, to each url in urlreqs you do:
...@@ -170,9 +170,9 @@ def doTheQuery(request , project_id): ...@@ -170,9 +170,9 @@ def doTheQuery(request , project_id):
try: try:
if not DEBUG: if not DEBUG:
apply_workflow.apply_async((corpus.id,),) apply_workflow.apply_async((corpus_id,),)
else: else:
thread = threading.Thread(target=apply_workflow, args=(corpus.id, ), daemon=True) thread = threading.Thread(target=apply_workflow, args=(corpus_id, ), daemon=True)
thread.start() thread.start()
except Exception as error: except Exception as error:
print('WORKFLOW ERROR') print('WORKFLOW ERROR')
...@@ -188,7 +188,7 @@ def testISTEX(request , project_id): ...@@ -188,7 +188,7 @@ def testISTEX(request , project_id):
print("testISTEX:") print("testISTEX:")
print(request.method) print(request.method)
alist = ["bar","foo"] alist = ["bar","foo"]
# implicit global session
# do we have a valid project id? # do we have a valid project id?
try: try:
project_id = int(project_id) project_id = int(project_id)
...@@ -247,7 +247,7 @@ def testISTEX(request , project_id): ...@@ -247,7 +247,7 @@ def testISTEX(request , project_id):
) )
session.add(corpus) session.add(corpus)
session.commit() session.commit()
corpus_id = corpus.id
ensure_dir(request.user) ensure_dir(request.user)
tasks = MedlineFetcher() tasks = MedlineFetcher()
...@@ -276,9 +276,9 @@ def testISTEX(request , project_id): ...@@ -276,9 +276,9 @@ def testISTEX(request , project_id):
########################### ###########################
try: try:
if not DEBUG: if not DEBUG:
apply_workflow.apply_async((corpus.id,),) apply_workflow.apply_async((corpus_id,),)
else: else:
thread = threading.Thread(target=apply_workflow, args=(corpus.id, ), daemon=True) thread = threading.Thread(target=apply_workflow, args=(corpus_id, ), daemon=True)
thread.start() thread.start()
except Exception as error: except Exception as error:
print('WORKFLOW ERROR') print('WORKFLOW ERROR')
...@@ -289,4 +289,3 @@ def testISTEX(request , project_id): ...@@ -289,4 +289,3 @@ def testISTEX(request , project_id):
data = [query_string,query,N] data = [query_string,query,N]
return JsonHttpResponse(data) return JsonHttpResponse(data)
This diff is collapsed.
...@@ -25,13 +25,34 @@ ...@@ -25,13 +25,34 @@
-o-transition: height 0.1s; -o-transition: height 0.1s;
transition: height 0.1s; transition: height 0.1s;
} }
th { color: #fff; } th {
color: #fff;
font-weight: normal;
font-size: 0.9em;
vertical-align: top ;
}
/* specific selector to override equally specific bootstrap.css */
.table > thead > tr > th { vertical-align: top ; }
th a { th a {
color: #fff; color: #fff;
font-weight: normal;
font-style: italic; font-style: italic;
font-size: 0.9em; }
th p.note {
color: #ccc;
font-size: 0.6em;
margin: 1em 0 0 0 ;
}
th p.note > input {
float: left;
margin: 0 .2em 0 0 ;
}
th p.note > label {
float: left;
} }
tr:hover { tr:hover {
...@@ -171,7 +192,7 @@ input[type=radio]:checked + label { ...@@ -171,7 +192,7 @@ input[type=radio]:checked + label {
<div class="panel-heading"> <div class="panel-heading">
<h4 class="panel-title"> <h4 class="panel-title">
<a data-toggle="collapse" data-parent="#accordion" href="#collapseOne"> <a data-toggle="collapse" data-parent="#accordion" href="#collapseOne">
<p id="corpusdisplayer" onclick='Final_UpdateTable("click")' class="btn btn-primary btn-lg" style="width:200px; margin:0 auto; display:block;">Open Folder</h2></p> <p id="corpusdisplayer" onclick='Final_UpdateTable("click")' class="btn btn-primary btn-lg" style="width:200px; margin:0 auto; display:block;">Close Term List</h2></p>
</a> </a>
</h4> </h4>
</div> </div>
...@@ -194,7 +215,7 @@ input[type=radio]:checked + label { ...@@ -194,7 +215,7 @@ input[type=radio]:checked + label {
</p> --> </p> -->
<p align="right"> <p align="right">
<!-- <button id="Clean_All" class="btn btn-warning">Clean</button> --> <!-- <button id="Clean_All" class="btn btn-warning">Clean</button> -->
<button id="Save_All" class="btn btn-primary">Save</button> <button id="Save_All" class="btn btn-primary">Save changes permanently</button>
</p> </p>
</div> </div>
...@@ -247,66 +268,6 @@ input[type=radio]:checked + label { ...@@ -247,66 +268,6 @@ input[type=radio]:checked + label {
</div> </div>
<div id="pre_savechanges" class="modal fade">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<h3 class="modal-title">Do you want to apply these to the whole Project as well?:</h3>
</div>
<div class="modal-body">
<div id="stoplist_content">
</div>
<!--
<ul class="nav nav-tabs">
<li class="active"><a id="stoplist" href="#stoplist_content">Stop List</a></li>
<li><a id="maplist" href="#maplist_content">Map List</a></li>
<li><a id="grouplist" href="#grouplist_content">Group List</a></li>
</ul>
<div class="tab-content">
<div id="stoplist_content" class="tab-pane fade in active">
<ul>
<li>jiji01</li>
<li>jiji02</li>
<li>jiji03</li>
</ul>
</div>
<div id="maplist_content" class="tab-pane fade">
qowieuoqiwueowq
</div>
<div id="grouplist_content" class="tab-pane fade">
asdhasjkdhasjdh
</div>
</div>
-->
<div class="modal-footer">
<button onclick="SaveGlobalChanges(false)" id="nope" type="button" class="btn btn-default" data-dismiss="modal">No</button>
<button onclick="SaveGlobalChanges(true)" id="yep" type="button" class="btn btn-primary">Yes</button>
</div>
</div>
</div>
</div>
</div>
<div id="filter_search" style="visibility:hidden"> <div id="filter_search" style="visibility:hidden">
<select id="example-single-optgroups" onchange="SearchFilters(this);"> <select id="example-single-optgroups" onchange="SearchFilters(this);">
......
...@@ -299,7 +299,7 @@ ...@@ -299,7 +299,7 @@
var origQuery = $("#id_name").val() var origQuery = $("#id_name").val()
console.log("printing the results:") console.log("printing the results:")
console.log(origQuery) console.log(origQuery)
testISTEX(origQuery.replace(" ","+"),1000) testISTEX(origQuery.replace(" ","+"),300)
} }
} }
else { else {
...@@ -349,7 +349,7 @@ ...@@ -349,7 +349,7 @@
console.log("enabling "+"#"+value.id) console.log("enabling "+"#"+value.id)
$("#"+value.id).attr('onclick','getGlobalResults(this);'); $("#"+value.id).attr('onclick','getGlobalResults(this);');
// $("#submit_thing").prop('disabled' , false) // $("#submit_thing").prop('disabled' , false)
$("#submit_thing").html("Process a 1000 sample!") $("#submit_thing").html("Process a 300 sample!")
thequeries = data thequeries = data
var N=0,k=0; var N=0,k=0;
...@@ -388,7 +388,7 @@ ...@@ -388,7 +388,7 @@
console.log("enabling "+"#"+value.id) console.log("enabling "+"#"+value.id)
$("#"+value.id).attr('onclick','getGlobalResults(this);'); $("#"+value.id).attr('onclick','getGlobalResults(this);');
// $("#submit_thing").prop('disabled' , false) // $("#submit_thing").prop('disabled' , false)
$("#submit_thing").html("Process a 1000 sample!") $("#submit_thing").html("Process a 300 sample!")
thequeries = data thequeries = data
var N=data.length,k=0; var N=data.length,k=0;
......
from django.shortcuts import redirect from django.shortcuts import redirect
from django.shortcuts import render # from django.shortcuts import render
from django.db import transaction # from django.db import transaction
#
from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseForbidden from django.http import Http404, HttpResponse #, HttpResponseRedirect, HttpResponseForbidden
from django.template.loader import get_template from django.template.loader import get_template
from django.template import Context from django.template import Context
...@@ -13,49 +13,41 @@ from django.db import connection ...@@ -13,49 +13,41 @@ from django.db import connection
# Node, NodeType, Node_Resource, Project, Corpus, \ # Node, NodeType, Node_Resource, Project, Corpus, \
# Ngram, Node_Ngram, NodeNgramNgram, NodeNodeNgram # Ngram, Node_Ngram, NodeNgramNgram, NodeNodeNgram
from node.admin import CorpusForm, ProjectForm, ResourceForm, CustomForm # from node.admin import CorpusForm, ProjectForm, ResourceForm, CustomForm
#
from django.contrib.auth.models import User # from django.contrib.auth.models import User
#
import datetime import datetime
from itertools import * # from itertools import *
from dateutil.parser import parse # from dateutil.parser import parse
#
from django.db import connection # from django.db import connection
from django import forms # from django import forms
#
#
from collections import defaultdict # from collections import defaultdict
#
from parsing.FileParsers import * # from parsing.FileParsers import *
import os # import os
import json import json
import math # import math
# SOME FUNCTIONS # SOME FUNCTIONS
from gargantext_web import settings from gargantext_web import settings
#
# from django.http import *
# from django.shortcuts import render_to_response,redirect
# from django.template import RequestContext
from django.http import * # from gargantext_web.db import *
from django.shortcuts import render_to_response,redirect
from django.template import RequestContext
from django.contrib.auth.decorators import login_required
from django.contrib.auth import authenticate, login, logout
from scrappers.scrap_pubmed.admin import Logger
from gargantext_web.db import * from gargantext_web.db import session,get_session, cache, Node, NodeNgram
from sqlalchemy import func
from sqlalchemy import or_, func
from gargantext_web import about
from rest_v1_0.api import JsonHttpResponse from rest_v1_0.api import JsonHttpResponse
from ngram.lists import listIds, listNgramIds, ngramList , doList
def get_ngrams(request , project_id , corpus_id ): def get_ngrams(request , project_id , corpus_id ):
if not request.user.is_authenticated(): if not request.user.is_authenticated():
return redirect('/login/?next=%s' % request.path) return redirect('/login/?next=%s' % request.path)
...@@ -73,6 +65,8 @@ def get_ngrams(request , project_id , corpus_id ): ...@@ -73,6 +65,8 @@ def get_ngrams(request , project_id , corpus_id ):
project = cache.Node[int(project_id)] project = cache.Node[int(project_id)]
corpus = cache.Node[int(corpus_id)] corpus = cache.Node[int(corpus_id)]
type_doc_id = cache.NodeType['Document'].id type_doc_id = cache.NodeType['Document'].id
# implicit global session
number = session.query(func.count(Node.id)).filter(Node.parent_id==corpus_id, Node.type_id==type_doc_id).all()[0][0] number = session.query(func.count(Node.id)).filter(Node.parent_id==corpus_id, Node.type_id==type_doc_id).all()[0][0]
myamlist_type_id = cache.NodeType['MiamList'].id myamlist_type_id = cache.NodeType['MiamList'].id
miamlist = session.query(Node).filter(Node.parent_id==corpus_id , Node.type_id == myamlist_type_id ).first() miamlist = session.query(Node).filter(Node.parent_id==corpus_id , Node.type_id == myamlist_type_id ).first()
...@@ -129,6 +123,8 @@ def get_journals(request , project_id , corpus_id ): ...@@ -129,6 +123,8 @@ def get_journals(request , project_id , corpus_id ):
project = cache.Node[int(project_id)] project = cache.Node[int(project_id)]
corpus = cache.Node[int(corpus_id)] corpus = cache.Node[int(corpus_id)]
type_doc_id = cache.NodeType['Document'].id type_doc_id = cache.NodeType['Document'].id
# implicit global session
number = session.query(func.count(Node.id)).filter(Node.parent_id==corpus_id, Node.type_id==type_doc_id).all()[0][0] number = session.query(func.count(Node.id)).filter(Node.parent_id==corpus_id, Node.type_id==type_doc_id).all()[0][0]
the_query = """ SELECT hyperdata FROM node_node WHERE id=%d """ % ( int(corpus_id) ) the_query = """ SELECT hyperdata FROM node_node WHERE id=%d """ % ( int(corpus_id) )
...@@ -158,7 +154,10 @@ def get_journals_json(request , project_id, corpus_id ): ...@@ -158,7 +154,10 @@ def get_journals_json(request , project_id, corpus_id ):
user_id = request.user.id user_id = request.user.id
document_type_id = cache.NodeType['Document'].id document_type_id = cache.NodeType['Document'].id
# implicit global session
documents = session.query(Node).filter( Node.parent_id==corpus_id , Node.type_id == document_type_id ).all() documents = session.query(Node).filter( Node.parent_id==corpus_id , Node.type_id == document_type_id ).all()
for doc in documents: for doc in documents:
if "journal" in doc.hyperdata: if "journal" in doc.hyperdata:
journal = doc.hyperdata["journal"] journal = doc.hyperdata["journal"]
...@@ -167,25 +166,20 @@ def get_journals_json(request , project_id, corpus_id ): ...@@ -167,25 +166,20 @@ def get_journals_json(request , project_id, corpus_id ):
JournalsDict[journal] += 1 JournalsDict[journal] += 1
return JsonHttpResponse(JournalsDict) return JsonHttpResponse(JournalsDict)
from gargantext_web.db import session, cache, Node, NodeNgram
from sqlalchemy import or_, func
from sqlalchemy.orm import aliased
def get_corpuses( request , node_ids ): def get_corpuses( request , node_ids ):
ngrams = [int(i) for i in node_ids.split("+") ] ngrams = [int(i) for i in node_ids.split("+") ]
# implicit global session
results = session.query(Node.id,Node.hyperdata).filter(Node.id.in_(ngrams) ).all() results = session.query(Node.id,Node.hyperdata).filter(Node.id.in_(ngrams) ).all()
for r in results: for r in results:
print(r) print(r)
return JsonHttpResponse( [ "tudo" , "bem" ] ) return JsonHttpResponse( [ "tudo" , "bem" ] )
def get_cores( request ): def get_cores( request ):
import multiprocessing import multiprocessing
cpus = multiprocessing.cpu_count() cpus = multiprocessing.cpu_count()
return JsonHttpResponse( {"data":cpus} ) return JsonHttpResponse( {"data":cpus} )
def get_corpus_state( request , corpus_id ): def get_corpus_state( request , corpus_id ):
if not request.user.is_authenticated(): if not request.user.is_authenticated():
return JsonHttpResponse( {"request" : "forbidden"} ) return JsonHttpResponse( {"request" : "forbidden"} )
...@@ -200,8 +194,12 @@ def get_corpus_state( request , corpus_id ): ...@@ -200,8 +194,12 @@ def get_corpus_state( request , corpus_id ):
# processing = corpus.hyperdata['Processing'] # processing = corpus.hyperdata['Processing']
return JsonHttpResponse( processing ) return JsonHttpResponse( processing )
def get_groups( request ): def get_groups( request ):
"""
User groups for current user.id
route: /get_groups
"""
if not request.user.is_authenticated(): if not request.user.is_authenticated():
return JsonHttpResponse( {"request" : "forbidden"} ) return JsonHttpResponse( {"request" : "forbidden"} )
...@@ -222,11 +220,9 @@ def get_groups( request ): ...@@ -222,11 +220,9 @@ def get_groups( request ):
return JsonHttpResponse( common_users ) return JsonHttpResponse( common_users )
def graph_share(request, generic=100, specific=100): def graph_share(request, generic=100, specific=100):
if request.method== 'GET' and "token" in request.GET: if request.method== 'GET' and "token" in request.GET:
import json # import json
le_token = json.loads(request.GET["token"])[0] le_token = json.loads(request.GET["token"])[0]
import base64 import base64
le_query = base64.b64decode(le_token).decode("utf-8") le_query = base64.b64decode(le_token).decode("utf-8")
...@@ -237,11 +233,15 @@ def graph_share(request, generic=100, specific=100): ...@@ -237,11 +233,15 @@ def graph_share(request, generic=100, specific=100):
# resource_id = cache.ResourceType["Pubmed (xml format)"].id # resource_id = cache.ResourceType["Pubmed (xml format)"].id
# corpus = session.query(Node).filter( Node.type_id==resource_id , Node.user_id==user_id , Node.id==corpus_id , Node.type_id == cache.NodeType['Corpus'].id ).first() # corpus = session.query(Node).filter( Node.type_id==resource_id , Node.user_id==user_id , Node.id==corpus_id , Node.type_id == cache.NodeType['Corpus'].id ).first()
# if corpus==None: return JsonHttpResponse( {"request" : "forbidden"} ) # if corpus==None: return JsonHttpResponse( {"request" : "forbidden"} )
# implicit global session
miamlist = session.query(Node).filter( Node.user_id==user_id , Node.parent_id==corpus_id , Node.type_id == cache.NodeType['MiamList'].id ).first() miamlist = session.query(Node).filter( Node.user_id==user_id , Node.parent_id==corpus_id , Node.type_id == cache.NodeType['MiamList'].id ).first()
if miamlist==None: return JsonHttpResponse( {"request" : "forbidden"} ) if miamlist==None: return JsonHttpResponse( {"request" : "forbidden"} )
graphurl = "node_link_share.json?token="+request.GET["token"] graphurl = "node_link_share.json?token="+request.GET["token"]
date = datetime.datetime.now() date = datetime.datetime.now()
t = get_template('explorer_share.html') t = get_template('explorer_share.html')
html = t.render(Context({\ html = t.render(Context({\
'debug': settings.DEBUG, 'debug': settings.DEBUG,
'date' : date,\ 'date' : date,\
...@@ -252,11 +252,10 @@ def graph_share(request, generic=100, specific=100): ...@@ -252,11 +252,10 @@ def graph_share(request, generic=100, specific=100):
return JsonHttpResponse(request.GET["token"]) return JsonHttpResponse(request.GET["token"])
def node_link_share(request): def node_link_share(request):
data = { "request" : "error" } data = { "request" : "error" }
if request.method== 'GET' and "token" in request.GET: if request.method== 'GET' and "token" in request.GET:
import json # import json
le_token = json.loads(request.GET["token"])[0] le_token = json.loads(request.GET["token"])[0]
import base64 import base64
le_query = base64.b64decode(le_token).decode("utf-8") le_query = base64.b64decode(le_token).decode("utf-8")
...@@ -268,6 +267,8 @@ def node_link_share(request): ...@@ -268,6 +267,8 @@ def node_link_share(request):
from analysis.functions import get_cooc from analysis.functions import get_cooc
data = [] data = []
# implicit global session
corpus = session.query(Node).filter( Node.user_id==user_id , Node.id==corpus_id).first() corpus = session.query(Node).filter( Node.user_id==user_id , Node.id==corpus_id).first()
data = get_cooc(request=request, corpus=corpus, type="node_link") data = get_cooc(request=request, corpus=corpus, type="node_link")
...@@ -301,7 +302,10 @@ def share_resource(request , resource_id , group_id) : ...@@ -301,7 +302,10 @@ def share_resource(request , resource_id , group_id) :
# [ getting all childs ids of this project ] # [ getting all childs ids of this project ]
ids2changeowner = [ project2share.id ] ids2changeowner = [ project2share.id ]
# implicit global session
corpuses = session.query(Node.id).filter(Node.user_id == request.user.id, Node.parent_id==resource_id , Node.type_id == cache.NodeType["Corpus"].id ).all() corpuses = session.query(Node.id).filter(Node.user_id == request.user.id, Node.parent_id==resource_id , Node.type_id == cache.NodeType["Corpus"].id ).all()
for corpus in corpuses: for corpus in corpuses:
ids2changeowner.append(corpus.id) ids2changeowner.append(corpus.id)
lists = session.query(Node.id,Node.name).filter(Node.user_id == request.user.id, Node.parent_id==corpus.id ).all() lists = session.query(Node.id,Node.name).filter(Node.user_id == request.user.id, Node.parent_id==corpus.id ).all()
......
from admin.env import *
from gargantext_web.db import session, cache, get_or_create_node
from gargantext_web.db import Node, NodeHyperdata, Hyperdata, Ngram, NodeNgram, NodeNgramNgram, NodeHyperdataNgram
from sqlalchemy import func, alias, asc, desc
import sqlalchemy as sa
from sqlalchemy.orm import aliased
from ngram.group import compute_groups, getStemmer
# corpus = Corpus(272)
corpus_id = 540420
corpus = session.query(Node).filter(Node.id==corpus_id).first()
#group = get_or_create_node(corpus=corpus, nodetype="Group")
stop_id = get_or_create_node(nodetype='StopList',corpus=corpus).id
miam_id = get_or_create_node(nodetype='MiamList',corpus=corpus).id
somme = sa.func.count(NodeNgram.weight)
ngrams = (session.query(Ngram.id, Ngram.terms, somme )
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.join(Node, Node.id == NodeNgram.node_id)
.filter(Node.parent_id==corpus_id, Node.type_id==cache.NodeType['Document'].id)
.group_by(Ngram.id)
.order_by(desc(somme))
.limit(100000)
)
stops = (session.query(Ngram.id, Ngram.terms)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.filter(NodeNgram.node_id == stop_id)
.all()
)
miams = (session.query(Ngram.id, Ngram.terms, somme)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.filter(NodeNgram.node_id == miam_id)
.group_by(Ngram.id, Ngram.terms)
.order_by(desc(somme))
.all()
)
stemmer = getStemmer(corpus)
ws = ['honeybee', 'honeybees']
print(stemmer(ws[0]) == stemmer(ws[1]))
#
#for n in miams:
# if n[1] == 'bees':
# print("!" * 30)
# print(n)
# print("-" * 30)
# else:
# print(n)
#
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment