Commit 951381a2 authored by Mathieu Rodic's avatar Mathieu Rodic

Merge branch 'unstable' of ssh://delanoe.org:1979/gargantext into unstable-201511-advancedcharts

parents bdb6ac85 4d8e6141
...@@ -3,6 +3,7 @@ import linecache ...@@ -3,6 +3,7 @@ import linecache
from time import time from time import time
from gargantext_web.settings import MEDIA_ROOT from gargantext_web.settings import MEDIA_ROOT
from django.db import connection
class DebugTime: class DebugTime:
def __init__(self, prefix): def __init__(self, prefix):
...@@ -19,7 +20,6 @@ class DebugTime: ...@@ -19,7 +20,6 @@ class DebugTime:
self.message = message self.message = message
self.time = time() self.time = time()
def ensure_dir(user): def ensure_dir(user):
''' '''
If user is new, folder does not exist yet, create it then If user is new, folder does not exist yet, create it then
...@@ -46,3 +46,19 @@ def PrintException(): ...@@ -46,3 +46,19 @@ def PrintException():
print('EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj)) print('EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj))
class WorkflowTracking:
def __init__( self ):
self.hola = "mundo"
def processing_(self , corpus , step):
try:
the_query = """ UPDATE node_node SET hyperdata=\'{ \"%s\" : \"%s\"}\' WHERE id=%d """ % ( "Processing", step , corpus.id )
cursor = connection.cursor()
try:
cursor.execute(the_query)
cursor.execute("COMMIT;")
finally:
connection.close()
except :
PrintException()
\ No newline at end of file
...@@ -14,7 +14,7 @@ def do_cooc(corpus=None ...@@ -14,7 +14,7 @@ def do_cooc(corpus=None
, field1='ngrams', field2='ngrams' , field1='ngrams', field2='ngrams'
, miam_id=None, stop_id=None, group_id=None , miam_id=None, stop_id=None, group_id=None
, cvalue_id=None , cvalue_id=None
, n_min=2, n_max=None , n_min=1, n_max=None
, start=None, end=None , start=None, end=None
, limit=1000 , limit=1000
, isMonopartite=True , isMonopartite=True
...@@ -62,7 +62,6 @@ def do_cooc(corpus=None ...@@ -62,7 +62,6 @@ def do_cooc(corpus=None
session.commit() session.commit()
# END # END
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==node_cooc.id).delete() session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==node_cooc.id).delete()
session.commit() session.commit()
...@@ -186,17 +185,16 @@ def do_cooc(corpus=None ...@@ -186,17 +185,16 @@ def do_cooc(corpus=None
cooc = matrix & miam_list cooc = matrix & miam_list
elif miam_id is not None and stop_id is not None and group_id is None : elif miam_id is not None and stop_id is not None and group_id is None :
cooc = matrix & (miam_list - stop_list) cooc = matrix & (miam_list - stop_list)
elif miam_id is not None and stop_id is not None and group_id is not None : elif miam_id is not None and stop_id is not None and group_id is not None :
print("miam_id is not None and stop_id is not None and group_id is not None") print("miam_id is not None and stop_id is not None and group_id is not None")
#cooc = matrix & (miam_list * group_list - stop_list) cooc = matrix & (miam_list * group_list - stop_list)
cooc = matrix & (miam_list - stop_list) #cooc = matrix & (miam_list - stop_list)
elif miam_id is not None and stop_id is None and group_id is not None : elif miam_id is not None and stop_id is None and group_id is not None :
cooc = matrix & (miam_list * group_list) cooc = matrix & (miam_list * group_list)
else : else :
cooc = matrix cooc = matrix
else: else:
cooc = matrix cooc = matrix
#print(cooc)
#print(" x " * 30)
cooc.save(node_cooc.id) cooc.save(node_cooc.id)
return(node_cooc.id) return(node_cooc.id)
from admin.utils import PrintException
from gargantext_web.db import *
from collections import defaultdict
from operator import itemgetter
from django.db import connection, transaction
import math
from math import log,sqrt
import scipy
from gargantext_web.db import get_or_create_node
import pandas as pd
from copy import copy
import numpy as np
import scipy
import networkx as nx
from networkx.readwrite import json_graph
from rest_v1_0.api import JsonHttpResponse
from analysis.louvain import best_partition, generate_dendogram, partition_at_level
from ngram.lists import listIds
from sqlalchemy.orm import aliased
def diag_null(x):
return x - x * scipy.eye(x.shape[0])
def do_distance(cooc_id, field1=None, field2=None, isMonopartite=True, distance='conditional'):
'''
do_distance :: Int -> (Graph, Partition, {ids}, {weight})
'''
authorized = ['conditional', 'distributional', 'cosine']
if distance not in authorized:
distance = 'conditional'
matrix = defaultdict(lambda : defaultdict(float))
ids = defaultdict(lambda : defaultdict(int))
labels = dict()
weight = dict()
Cooc = aliased(NodeNgramNgram)
query = session.query(Cooc).filter(Cooc.node_id==cooc_id).all()
for cooc in query:
matrix[cooc.ngramx_id][cooc.ngramy_id] = cooc.score
matrix[cooc.ngramy_id][cooc.ngramx_id] = cooc.score
ids[cooc.ngramx_id] = (field1, cooc.ngramx_id)
ids[cooc.ngramy_id] = (field2, cooc.ngramy_id)
weight[cooc.ngramx_id] = weight.get(cooc.ngramx_id, 0) + cooc.score
weight[cooc.ngramy_id] = weight.get(cooc.ngramy_id, 0) + cooc.score
x = pd.DataFrame(matrix).fillna(0)
if distance == 'conditional':
x = x / x.sum(axis=1)
#y = y / y.sum(axis=0)
xs = x.sum(axis=1) - x
ys = x.sum(axis=0) - x
# top inclus ou exclus
n = ( xs + ys) / (2 * (x.shape[0] - 1))
# top generic or specific
m = ( xs - ys) / (2 * (x.shape[0] - 1))
n = n.sort(inplace=False)
m = m.sort(inplace=False)
nodes_included = 500 #int(round(size/20,0))
#nodes_excluded = int(round(size/10,0))
nodes_specific = 500 #int(round(size/10,0))
#nodes_generic = int(round(size/10,0))
# TODO use the included score for the node size
n_index = pd.Index.intersection(x.index, n.index[:nodes_included])
# Generic:
#m_index = pd.Index.intersection(x.index, m.index[:nodes_generic])
# Specific:
m_index = pd.Index.intersection(x.index, m.index[-nodes_specific:])
#m_index = pd.Index.intersection(x.index, n.index[:nodes_included])
x_index = pd.Index.union(n_index, m_index)
xx = x[list(x_index)].T[list(x_index)]
# Removing unconnected nodes
xxx = xx.values
threshold = min(xxx.max(axis=1))
matrix_filtered = np.where(xxx >= threshold, xxx, 0)
#matrix_filtered = matrix_filtered.resize((90,90))
G = nx.from_numpy_matrix(np.matrix(matrix_filtered))
G = nx.relabel_nodes(G, dict(enumerate([ ids[id_][1] for id_ in list(xx.columns)])))
elif distance == 'cosine':
scd = defaultdict(lambda : defaultdict(int))
for i in matrix.keys():
for j in matrix.keys():
numerator = sum(
[
matrix[i][k] * matrix[j][k]
for k in matrix.keys()
if i != j and k != i and k != j
]
)
denominator = sqrt(
sum([
matrix[i][k]
for k in matrix.keys()
if k != i and k != j #and matrix[i][k] > 0
])
*
sum([
matrix[i][k]
for k in matrix.keys()
if k != i and k != j #and matrix[i][k] > 0
])
)
try:
scd[i][j] = numerator / denominator
except Exception as error:
scd[i][j] = 0
minmax = min([ max([ scd[i][j] for i in scd.keys()]) for j in scd.keys()])
G = nx.DiGraph()
G.add_edges_from(
[
(i, j, {'weight': scd[i][j]})
for i in scd.keys() for j in scd.keys()
if i != j and scd[i][j] > minmax and scd[i][j] > scd[j][i]
]
)
elif distance == 'distributional':
mi = defaultdict(lambda : defaultdict(int))
total_cooc = x.sum().sum()
for i in matrix.keys():
si = sum([matrix[i][j] for j in matrix[i].keys() if i != j])
for j in matrix[i].keys():
sj = sum([matrix[j][k] for k in matrix[j].keys() if j != k])
if i!=j :
mi[i][j] = log( matrix[i][j] / ((si * sj) / total_cooc) )
r = defaultdict(lambda : defaultdict(int))
for i in matrix.keys():
for j in matrix.keys():
sumMin = sum(
[
min(mi[i][k], mi[j][k])
for k in matrix.keys()
if i != j and k != i and k != j and mi[i][k] > 0
]
)
sumMi = sum(
[
mi[i][k]
for k in matrix.keys()
if k != i and k != j and mi[i][k] > 0
]
)
try:
r[i][j] = sumMin / sumMi
except Exception as error:
r[i][j] = 0
# Need to filter the weak links, automatic threshold here
minmax = min([ max([ r[i][j] for i in r.keys()]) for j in r.keys()])
G = nx.DiGraph()
G.add_edges_from(
[
(i, j, {'weight': r[i][j]})
for i in r.keys() for j in r.keys()
if i != j and r[i][j] > minmax and r[i][j] > r[j][i]
]
)
# degree_max = max([(n, d) for n,d in G.degree().items()], key=itemgetter(1))[1]
# nodes_to_remove = [n for (n,d) in G.degree().items() if d <= round(degree_max/2)]
# G.remove_nodes_from(nodes_to_remove)
# Removing too connected nodes (find automatic way to do it)
#edges_to_remove = [ e for e in G.edges_iter() if
# nodes_to_remove = [n for n in degree if degree[n] <= 1]
# G.remove_nodes_from(nodes_to_remove)
def getWeight(item):
return item[1]
#
# node_degree = sorted(G.degree().items(), key=getWeight, reverse=True)
# #print(node_degree)
# nodes_too_connected = [n[0] for n in node_degree[0:(round(len(node_degree)/5))]]
#
# for n in nodes_too_connected:
# n_edges = list()
# for v in nx.neighbors(G,n):
# #print((n, v), G[n][v]['weight'], ":", (v,n), G[v][n]['weight'])
# n_edges.append(((n, v), G[n][v]['weight']))
#
# n_edges_sorted = sorted(n_edges, key=getWeight, reverse=True)
# #G.remove_edges_from([ e[0] for e in n_edges_sorted[round(len(n_edges_sorted)/2):]])
# #G.remove_edges_from([ e[0] for e in n_edges_sorted[(round(len(nx.neighbors(G,n))/3)):]])
# G.remove_edges_from([ e[0] for e in n_edges_sorted[10:]])
G.remove_nodes_from(nx.isolates(G))
partition = best_partition(G.to_undirected())
return(G,partition,ids,weight)
...@@ -12,6 +12,7 @@ import scipy ...@@ -12,6 +12,7 @@ import scipy
from gargantext_web.db import get_or_create_node from gargantext_web.db import get_or_create_node
from analysis.cooccurrences import do_cooc from analysis.cooccurrences import do_cooc
from analysis.distance import do_distance
import pandas as pd import pandas as pd
from copy import copy from copy import copy
...@@ -26,114 +27,13 @@ from analysis.louvain import best_partition, generate_dendogram, partition_at_le ...@@ -26,114 +27,13 @@ from analysis.louvain import best_partition, generate_dendogram, partition_at_le
from ngram.lists import listIds from ngram.lists import listIds
from sqlalchemy.orm import aliased from sqlalchemy.orm import aliased
def diag_null(x):
return x - x * scipy.eye(x.shape[0])
def do_distance(cooc_id, field1=None, field2=None, isMonopartite=True):
'''
do_distance :: Int -> (Graph, Partition, {ids}, {weight})
'''
#print([n for n in session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).all()])
matrix = defaultdict(lambda : defaultdict(float))
ids = defaultdict(lambda : defaultdict(int))
labels = dict()
weight = dict()
Cooc = aliased(NodeNgramNgram)
query = session.query(Cooc).filter(Cooc.node_id==cooc_id).all()
for cooc in query:
matrix[cooc.ngramx_id][cooc.ngramy_id] = cooc.score
matrix[cooc.ngramy_id][cooc.ngramx_id] = cooc.score
ids[cooc.ngramx_id] = (field1, cooc.ngramx_id)
ids[cooc.ngramy_id] = (field2, cooc.ngramy_id)
weight[cooc.ngramx_id] = weight.get(cooc.ngramx_id, 0) + cooc.score
weight[cooc.ngramy_id] = weight.get(cooc.ngramy_id, 0) + cooc.score
x = pd.DataFrame(matrix).fillna(0)
y = pd.DataFrame(matrix).fillna(0)
#xo = diag_null(x)
#y = diag_null(y)
x = x / x.sum(axis=1)
y = y / y.sum(axis=0)
xs = x.sum(axis=1) - x
ys = x.sum(axis=0) - x
# top inclus ou exclus
n = ( xs + ys) / (2 * (x.shape[0] - 1))
# top generic or specific
m = ( xs - ys) / (2 * (x.shape[0] - 1))
n = n.sort(inplace=False)
m = m.sort(inplace=False)
nodes_included = 500 #int(round(size/20,0))
#nodes_excluded = int(round(size/10,0))
nodes_specific = 500 #int(round(size/10,0))
#nodes_generic = int(round(size/10,0))
# TODO use the included score for the node size
n_index = pd.Index.intersection(x.index, n.index[:nodes_included])
# Generic:
#m_index = pd.Index.intersection(x.index, m.index[:nodes_generic])
# Specific:
m_index = pd.Index.intersection(x.index, m.index[-nodes_specific:])
#m_index = pd.Index.intersection(x.index, n.index[:nodes_included])
x_index = pd.Index.union(n_index, m_index)
xx = x[list(x_index)].T[list(x_index)]
# Removing unconnected nodes
xxx = xx.values
threshold = min(xxx.max(axis=1))
matrix_filtered = np.where(xxx >= threshold, xxx, 0)
#matrix_filtered = matrix_filtered.resize((90,90))
G = nx.from_numpy_matrix(np.matrix(matrix_filtered))
#G = nx.from_numpy_matrix(matrix_filtered, create_using=nx.MultiDiGraph())
G = nx.relabel_nodes(G, dict(enumerate([ ids[id_][1] for id_ in list(xx.columns)])))
# Removing too connected nodes (find automatic way to do it)
#edges_to_remove = [ e for e in G.edges_iter() if
# nodes_to_remove = [n for n in degree if degree[n] <= 1]
# G.remove_nodes_from(nodes_to_remove)
def getWeight(item):
return item[1]
node_degree = sorted(G.degree().items(), key=getWeight, reverse=True)
#print(node_degree)
nodes_too_connected = [n[0] for n in node_degree[0:(round(len(node_degree)/5))]]
for n in nodes_too_connected:
n_edges = list()
for v in nx.neighbors(G,n):
n_edges.append(((n, v), G[n][v]['weight']))
n_edges_sorted = sorted(n_edges, key=getWeight, reverse=True)
#G.remove_edges_from([ e[0] for e in n_edges_sorted[round(len(n_edges_sorted)/2):]])
G.remove_edges_from([ e[0] for e in n_edges_sorted[(round(len(nx.neighbors(G,n))/3)):]])
G.remove_nodes_from(nx.isolates(G))
partition = best_partition(G.to_undirected())
return(G,partition,ids,weight)
def get_cooc(request=None, corpus=None def get_cooc(request=None, corpus=None
, field1='ngrams', field2='ngrams' , field1='ngrams', field2='ngrams'
, cooc_id=None, type='node_link', size=1000 , cooc_id=None, type='node_link', size=1000
, start=None, end=None , start=None, end=None
, hapax=1 , hapax=1
, distance='conditional'
): ):
''' '''
get_ccoc : to compute the graph. get_ccoc : to compute the graph.
...@@ -158,7 +58,8 @@ def get_cooc(request=None, corpus=None ...@@ -158,7 +58,8 @@ def get_cooc(request=None, corpus=None
, miam_id=miam_id, group_id=group_id, stop_id=stop_id, limit=size , miam_id=miam_id, group_id=group_id, stop_id=stop_id, limit=size
, isMonopartite=True, start=start , end=end , hapax=hapax) , isMonopartite=True, start=start , end=end , hapax=hapax)
G, partition, ids, weight = do_distance(cooc_id, field1="ngrams", field2="ngrams", isMonopartite=True) G, partition, ids, weight = do_distance(cooc_id, field1="ngrams", field2="ngrams"
, isMonopartite=True, distance=distance)
if type == "node_link": if type == "node_link":
nodesB_dict = {} nodesB_dict = {}
......
...@@ -111,10 +111,16 @@ class NgramEdit(APIView): ...@@ -111,10 +111,16 @@ class NgramEdit(APIView):
node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus ) node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus )
results = session.query(NodeNgram).filter(NodeNgram.node_id==node_mapList.id ).all() results = session.query(NodeNgram).filter(NodeNgram.node_id==node_mapList.id ).all()
ngram_2del = [int(i) for i in ngram_ids.split('+')] ngram_2del = [int(i) for i in ngram_ids.split('+')]
ngram_2del = session.query(NodeNgram).filter(NodeNgram.node_id==node_mapList.id , NodeNgram.ngram_id.in_(ngram_2del) ).all() ngram_2del_ = session.query(NodeNgram).filter(NodeNgram.node_id==node_mapList.id , NodeNgram.ngram_id.in_(ngram_2del) ).all()
for map_node in ngram_2del: for map_node in ngram_2del_:
session.delete(map_node) session.delete(map_node)
session.commit() session.commit()
node_stopList = get_or_create_node(nodetype='StopList', corpus=corpus )
for ngram_id in ngram_2del:
stop_node = NodeNgram( weight=1.0, ngram_id=ngram_id , node_id=node_stopList.id)
session.add(stop_node)
session.commit()
# [ = = = = / del from map-list = = = = ] # [ = = = = / del from map-list = = = = ]
return Response(None, 204) return Response(None, 204)
...@@ -193,3 +199,5 @@ class Document(APIView): ...@@ -193,3 +199,5 @@ class Document(APIView):
'id': node.id 'id': node.id
} }
return Response(data) return Response(data)
...@@ -17,11 +17,6 @@ def get_team(): ...@@ -17,11 +17,6 @@ def get_team():
''' '''
team = [ team = [
{ 'first_name' : 'Alexandre', 'last_name' : 'Delanoë',
'mail' : 'alexandre+gargantextATdelanoe.org',
'website' : 'http://alexandre.delanoe.org',
'picture' : 'alexandre.jpg',
'role' : 'principal investigator, developer'},
{ 'first_name' : 'David', 'last_name' : 'Chavalarias', { 'first_name' : 'David', 'last_name' : 'Chavalarias',
'mail' : 'david.chavalariasATiscpif.fr', 'mail' : 'david.chavalariasATiscpif.fr',
...@@ -46,6 +41,12 @@ def get_team(): ...@@ -46,6 +41,12 @@ def get_team():
'picture' : 'samuel.jpg', 'picture' : 'samuel.jpg',
'role' : 'developer'}, 'role' : 'developer'},
{ 'first_name' : 'Alexandre', 'last_name' : 'Delanoë',
'mail' : 'alexandre+gargantextATdelanoe.org',
'website' : 'http://alexandre.delanoe.org',
'picture' : 'alexandre.jpg',
'role' : 'principal investigator, developer'},
#{ 'first_name' : '', 'name' : '', 'mail' : '', 'website' : '', 'picture' : ''}, #{ 'first_name' : '', 'name' : '', 'mail' : '', 'website' : '', 'picture' : ''},
# copy paste the line above and write your informations please # copy paste the line above and write your informations please
] ]
...@@ -53,21 +54,37 @@ def get_team(): ...@@ -53,21 +54,37 @@ def get_team():
random.shuffle(team) random.shuffle(team)
return(team) return(team)
def get_sponsors(): def get_partners():
''' '''
Function to get list of each sponsor as dict of institutional informations. Function to get list of each sponsor as dict of institutional informations.
''' '''
sponsors = [ institutions = [
{ 'name' : 'Mines ParisTech', 'website' : 'http://mines-paristech.fr', 'picture' : 'mines.png', 'funds':''}, { 'name' : 'Mines ParisTech', 'website' : 'http://mines-paristech.fr', 'picture' : 'mines.png', 'funds':''},
{ 'name' : 'Institut Pasteur', 'website' : 'http://www.pasteur.fr', 'picture' : 'pasteur.png', 'funds':''}, { 'name' : 'Institut Pasteur', 'website' : 'http://www.pasteur.fr', 'picture' : 'pasteur.png', 'funds':''},
{ 'name' : 'Forccast', 'website' : 'http://forccast.hypotheses.org/', 'picture' : 'forccast.png', 'funds':''},
{ 'name' : 'ADEME', 'website' : 'http://www.ademe.fr', 'picture' : 'ademe.png', 'funds':''}, { 'name' : 'ADEME', 'website' : 'http://www.ademe.fr', 'picture' : 'ademe.png', 'funds':''},
{ 'name' : 'EHESS', 'website' : 'http://www.ehess.fr', 'picture' : 'ehess.png', 'funds':''}, { 'name' : 'EHESS', 'website' : 'http://www.ehess.fr', 'picture' : 'ehess.png', 'funds':''},
#{ 'name' : '', 'website' : '', 'picture' : '', 'funds':''}, #{ 'name' : '', 'website' : '', 'picture' : '', 'funds':''},
# copy paste the line above and write your informations please # copy paste the line above and write your informations please
] ]
random.shuffle(sponsors) labos = [
return(sponsors) { 'name' : 'Centre de Sociologie de l\'innovation', 'website' : 'http://www.csi.mines-paristech.fr/en/', 'picture' : 'csi.png', 'funds':''},
#{ 'name' : '', 'website' : '', 'picture' : '', 'funds':''},
# copy paste the line above and write your informations please
]
grants = [
{ 'name' : 'Forccast', 'website' : 'http://forccast.hypotheses.org/', 'picture' : 'forccast.png', 'funds':''},
{ 'name' : 'Mastodons', 'website' : 'http://www.cnrs.fr/mi/spip.php?article53&lang=fr', 'picture' : 'mastodons.png', 'funds':''},
#{ 'name' : '', 'website' : '', 'picture' : '', 'funds':''},
# copy paste the line above and write your informations please
]
random.shuffle(institutions)
random.shuffle(grants)
return(institutions,labos,grants)
...@@ -23,32 +23,30 @@ def apply_sum(x, y): ...@@ -23,32 +23,30 @@ def apply_sum(x, y):
from parsing.corpustools import parse_resources, extract_ngrams #add_resource, from parsing.corpustools import parse_resources, extract_ngrams #add_resource,
from ngram.lists import ngrams2miam from ngram.lists import ngrams2miam
from admin.utils import PrintException from admin.utils import WorkflowTracking
def update_processing(corpus, step=0):
try:
corpus.hyperdata.update({'Processing' : step})
session.query(Node).filter(Node.id==corpus.id).update({'hyperdata' : corpus.hyperdata})
session.commit()
except :
PrintException()
@shared_task @shared_task
def apply_workflow(corpus_id): def apply_workflow(corpus_id):
update_state = WorkflowTracking()
corpus = session.query(Node).filter(Node.id==corpus_id).first() corpus = session.query(Node).filter(Node.id==corpus_id).first()
update_processing(corpus, 1) update_state.processing_(corpus, "Parsing")
#cProfile.runctx('parse_resources(corpus)', global,locals) #cProfile.runctx('parse_resources(corpus)', global,locals)
parse_resources(corpus) parse_resources(corpus)
update_processing(corpus, 2) update_state.processing_(corpus, "Terms extraction")
extract_ngrams(corpus, ['title', 'abstract'], nlp=True) extract_ngrams(corpus, ['title', 'abstract'], nlp=True)
update_processing(corpus, 3) # update_state.processing_(corpus, "")
ngram_workflow(corpus) ngram_workflow(corpus)
#ngrams2miam(user_id=corpus.user_id, corpus_id=corpus_id) #ngrams2miam(user_id=corpus.user_id, corpus_id=corpus_id)
update_processing(corpus, 0)
print("End of the Workflow for corpus %d" % (corpus_id))
update_state.processing_(corpus, "0")
@shared_task @shared_task
def empty_trash(corpus_id): def empty_trash(corpus_id):
...@@ -63,4 +61,3 @@ def empty_trash(corpus_id): ...@@ -63,4 +61,3 @@ def empty_trash(corpus_id):
node.delete() node.delete()
print("Nodes deleted") print("Nodes deleted")
...@@ -276,5 +276,3 @@ def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hy ...@@ -276,5 +276,3 @@ def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hy
session.commit() session.commit()
#print(parent_id, n.parent_id, n.id, n.name) #print(parent_id, n.parent_id, n.id, n.name)
return(node) return(node)
...@@ -91,11 +91,10 @@ urlpatterns = patterns('', ...@@ -91,11 +91,10 @@ urlpatterns = patterns('',
############################################################################ ############################################################################
url(r'^tests/', include('tests.urls')), url(r'^tests/', include('tests.urls')),
# TODO Samuel, lines below were on your tests, are they still used ?
# can we delete them ?
url(r'^project/(\d+)/corpus/(\d+)/terms/ngrams.json$', samtest.get_ngrams_json),
url(r'^project/(\d+)/corpus/(\d+)/terms$', samtest.get_ngrams), url(r'^project/(\d+)/corpus/(\d+)/terms$', samtest.get_ngrams),
url(r'^project/(\d+)/corpus/(\d+)/stop_list.json$', samtest.get_stoplist) url(r'^api/corpus/(\d+)$', samtest.get_corpus_state),
url(r'^test_cores$', samtest.get_cores)
) )
......
...@@ -77,7 +77,8 @@ def logo(request): ...@@ -77,7 +77,8 @@ def logo(request):
if group == "cnrs": if group == "cnrs":
color = "#093558" color = "#093558"
else: else:
color = "#ff8080" # color of the css adapted to the logo
color = "#AE5C5C"
svg_data = template.render(Context({\ svg_data = template.render(Context({\
'color': color,\ 'color': color,\
})) }))
...@@ -164,13 +165,15 @@ def get_about(request): ...@@ -164,13 +165,15 @@ def get_about(request):
date = datetime.datetime.now() date = datetime.datetime.now()
members = about.get_team() members = about.get_team()
sponsors = about.get_sponsors() institutions,labos,grants = about.get_partners()
html = template.render(Context({\ html = template.render(Context({\
'user': user,\ 'user': user,\
'date': date,\ 'date': date,\
'team': members,\ 'team': members,\
'sponsors':sponsors,\ 'institutions': institutions,\
'labos': labos,\
'grants': grants,\
})) }))
return HttpResponse(html) return HttpResponse(html)
...@@ -342,12 +345,14 @@ def corpus(request, project_id, corpus_id): ...@@ -342,12 +345,14 @@ def corpus(request, project_id, corpus_id):
type_doc_id = cache.NodeType['Document'].id type_doc_id = cache.NodeType['Document'].id
number = session.query(func.count(Node.id)).filter(Node.parent_id==corpus_id, Node.type_id==type_doc_id).all()[0][0] number = session.query(func.count(Node.id)).filter(Node.parent_id==corpus_id, Node.type_id==type_doc_id).all()[0][0]
the_query = """ SELECT hyperdata FROM node_node WHERE id=%d """ % ( int(corpus_id) )
cursor = connection.cursor()
try: try:
processing = corpus.hyperdata['Processing'] cursor.execute(the_query)
except Exception as error: processing = cursor.fetchone()[0]["Processing"]
print(error) except:
processing = 0 processing = "Error"
print('processing', processing)
html = t.render(Context({ html = t.render(Context({
'debug': settings.DEBUG, 'debug': settings.DEBUG,
...@@ -566,13 +571,17 @@ def graph(request, project_id, corpus_id, generic=100, specific=100): ...@@ -566,13 +571,17 @@ def graph(request, project_id, corpus_id, generic=100, specific=100):
project_type_id = cache.NodeType['Project'].id project_type_id = cache.NodeType['Project'].id
corpus_type_id = cache.NodeType['Corpus'].id corpus_type_id = cache.NodeType['Corpus'].id
miamlist_type_id = cache.NodeType['MiamList'].id
miamlist = session.query(Node).filter(Node.user_id == request.user.id , Node.parent_id==corpus_id , Node.type_id == cache.NodeType['MiamList'].id ).first()
graphurl = "corpus/"+str(corpus_id)+"/node_link.json" graphurl = "corpus/"+str(corpus_id)+"/node_link.json"
html = t.render(Context({\ html = t.render(Context({\
'debug': settings.DEBUG, 'debug': settings.DEBUG,
'user' : user,\ 'user': request.user,\
'date' : date,\ 'date' : date,\
'corpus' : corpus,\ 'corpus' : corpus,\
'list_id' : miamlist.id,\
'project' : project,\ 'project' : project,\
'graphfile' : graphurl,\ 'graphfile' : graphurl,\
})) }))
......
...@@ -140,7 +140,7 @@ def project(request, project_id): ...@@ -140,7 +140,7 @@ def project(request, project_id):
parent_id = project_id, parent_id = project_id,
type_id = cache.NodeType['Corpus'].id, type_id = cache.NodeType['Corpus'].id,
language_id = language_id, language_id = language_id,
hyperdata = {'Processing' : 1,} hyperdata = {'Processing' : "Parsing documents",}
) )
session.add(corpus) session.add(corpus)
session.commit() session.commit()
...@@ -212,7 +212,8 @@ def tfidf(request, corpus_id, ngram_ids): ...@@ -212,7 +212,8 @@ def tfidf(request, corpus_id, ngram_ids):
.query(Node, func.sum(NodeNodeNgram.score)) .query(Node, func.sum(NodeNodeNgram.score))
.join(NodeNodeNgram, NodeNodeNgram.nodey_id == Node.id) .join(NodeNodeNgram, NodeNodeNgram.nodey_id == Node.id)
.filter(NodeNodeNgram.nodex_id == tfidf_id) .filter(NodeNodeNgram.nodex_id == tfidf_id)
.filter(NodeNodeNgram.ngram_id.in_(ngram_ids)) .filter(Node.type_id == cache.NodeType['Document'].id)
.filter(or_(*[NodeNodeNgram.ngram_id==ngram_id for ngram_id in ngram_ids]))
.group_by(Node) .group_by(Node)
.order_by(func.sum(NodeNodeNgram.score).desc()) .order_by(func.sum(NodeNodeNgram.score).desc())
.limit(limit) .limit(limit)
...@@ -221,8 +222,21 @@ def tfidf(request, corpus_id, ngram_ids): ...@@ -221,8 +222,21 @@ def tfidf(request, corpus_id, ngram_ids):
# print("in TFIDF:") # print("in TFIDF:")
# print("\tcorpus_id:",corpus_id) # print("\tcorpus_id:",corpus_id)
# convert query result to a list of dicts # convert query result to a list of dicts
if nodes_query is None:
print("TFIDF error, juste take sums")
nodes_query = (session
.query(Node, func.sum(NodeNgram.weight))
.join(NodeNgram, NodeNgram.node_id == Node.id)
.filter(Node.parent_id == corpus_id)
.filter(Node.type_id == cache.NodeType['Document'].id)
.filter(or_(*[NodeNgram.ngram_id==ngram_id for ngram_id in ngram_ids]))
.group_by(Node)
.order_by(func.sum(NodeNgram.weight).desc())
.limit(limit)
)
for node, score in nodes_query: for node, score in nodes_query:
# print("\t corpus:",corpus_id,"\t",node.name) print("\t corpus:",corpus_id,"\t",node.name)
node_dict = { node_dict = {
'id': node.id, 'id': node.id,
'score': score, 'score': score,
......
...@@ -93,7 +93,7 @@ node_types = [ ...@@ -93,7 +93,7 @@ node_types = [
'Project', 'Corpus', 'Document', 'Project', 'Corpus', 'Document',
'MiamList', 'StopList', 'MainList', 'MapList', # TODO MiamList -> MainList 'MiamList', 'StopList', 'MainList', 'MapList', # TODO MiamList -> MainList
'Stem', 'Lem', 'Group', 'Tfidf', 'Tfidf (global)', 'Cvalue', 'Specificity' 'Stem', 'Lem', 'Group', 'Tfidf', 'Tfidf (global)', 'Cvalue', 'Specificity'
, 'Cooccurrence', , 'Cooccurrence', 'Occurrences',
] ]
for node_type in node_types: for node_type in node_types:
......
from admin.env import *
import sys
from node.models import User from node.models import User
from django.core.mail import send_mail from django.core.mail import send_mail
...@@ -53,13 +55,17 @@ def active_user(username, active=True): ...@@ -53,13 +55,17 @@ def active_user(username, active=True):
user.active_user = active user.active_user = active
user.save() user.save()
def mines_account_creation(fichier=None): def mass_account_creation(fichier=None):
if fichier is None: if fichier is None:
fichier = "/home/alexandre/projets/forccast/Tutorat/2014-2015/comptes_gargantext.csv" fichier = "/tmp/comptes.csv"
accounts = open(fichier, "r") accounts = open(fichier, "r")
for line in accounts.readlines(): for line in accounts.readlines():
username, email, password, fin = line.split(',') username, email, password, fin = line.split(',')
create_user(username, email, password=password, notify=False) create_user(username, email, password=password, active=True, notify=False)
#delete_user(username) #delete_user(username)
accounts.close() accounts.close()
if __name__ == "__main__":
mass_account_creation(fichier=sys.argv[1])
...@@ -15,10 +15,15 @@ from sqlalchemy.orm import aliased ...@@ -15,10 +15,15 @@ from sqlalchemy.orm import aliased
from ngram.tools import insert_ngrams from ngram.tools import insert_ngrams
import csv import csv
def compute_mapList(corpus,limit=500): def compute_mapList(corpus,limit=500,n=1):
''' '''
According to Specificities and stoplist, According to Specificities and stoplist,
''' '''
monograms_part = 0.005
monograms_limit = round(limit * monograms_part)
multigrams_limit = limit - monograms_limit
dbg = DebugTime('Corpus #%d - computing Miam' % corpus.id) dbg = DebugTime('Corpus #%d - computing Miam' % corpus.id)
node_miam = get_or_create_node(nodetype='MiamList', corpus=corpus) node_miam = get_or_create_node(nodetype='MiamList', corpus=corpus)
...@@ -33,18 +38,38 @@ def compute_mapList(corpus,limit=500): ...@@ -33,18 +38,38 @@ def compute_mapList(corpus,limit=500):
Spec=aliased(NodeNodeNgram) Spec=aliased(NodeNodeNgram)
top_ngrams = (session.query(Spec.ngram_id, Spec.score) query = (session.query(Spec.ngram_id, Spec.score)
.join(Miam, Spec.ngram_id == Miam.ngram_id) .join(Miam, Spec.ngram_id == Miam.ngram_id)
.join(Ngram, Ngram.id == Spec.ngram_id)
#.outerjoin(Group, Group.ngramy_id == Spec.ngram_id) #.outerjoin(Group, Group.ngramy_id == Spec.ngram_id)
#.outerjoin(Stop, Stop.ngram_id == Spec.ngram_id) #.outerjoin(Stop, Stop.ngram_id == Spec.ngram_id)
.filter(Miam.node_id == node_miam.id) .filter(Miam.node_id == node_miam.id)
#.filter(Group.node_id == node_group.id) #.filter(Group.node_id == node_group.id)
#.filter(Stop.node_id == node_stop.id) #.filter(Stop.node_id == node_stop.id)
.filter(Spec.nodex_id == node_spec.id) .filter(Spec.nodex_id == node_spec.id)
)
top_monograms = (query
.filter(Ngram.n == 1)
.order_by(desc(Spec.score)) .order_by(desc(Spec.score))
.limit(limit) .limit(monograms_limit)
) )
top_multigrams = (query
.filter(Ngram.n >= 2)
.order_by(desc(Spec.score))
.limit(multigrams_limit)
)
stop_ngrams = (session.query(NodeNgram.ngram_id)
.filter(NodeNgram.node_id == node_stop.id)
.all()
)
grouped_ngrams = (session.query(NodeNgramNgram.ngramy_id)
.filter(NodeNgramNgram.node_id == node_group.id)
.all()
)
#print([t for t in top_ngrams]) #print([t for t in top_ngrams])
node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus) node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus)
...@@ -53,7 +78,9 @@ def compute_mapList(corpus,limit=500): ...@@ -53,7 +78,9 @@ def compute_mapList(corpus,limit=500):
data = zip( data = zip(
[node_mapList.id for i in range(1,limit)] [node_mapList.id for i in range(1,limit)]
, [n[0] for n in top_ngrams] , [n[0] for n in list(top_multigrams) + list(top_monograms)
if (n[0],) not in list(stop_ngrams) + list(grouped_ngrams)
]
, [1 for i in range(1,limit)] , [1 for i in range(1,limit)]
) )
#print([d for d in data]) #print([d for d in data])
...@@ -100,37 +127,3 @@ def insert_miam(corpus, ngrams=None, path_file_csv=None): ...@@ -100,37 +127,3 @@ def insert_miam(corpus, ngrams=None, path_file_csv=None):
#compute_mapList(corpus) #compute_mapList(corpus)
#insert_miam(corpus=corpus, path_file_csv="Thesaurus_tag.csv") #insert_miam(corpus=corpus, path_file_csv="Thesaurus_tag.csv")
#def getNgrams(corpus=None, limit_inf=600, limit_sup=3000):
# '''
# getNgrams :: Corpus -> [(Int, String)] -> [(Int, String)]
# For a corpus, gives list of highest Cvalue ngrams and highest TFIDF (global)
# ngrams that have to be grouped with
# '''
# #tfidf_node = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus)
# cvalue_node = get_or_create_node(nodetype='Cvalue', corpus=corpus)
# spec_node = get_or_create_node(nodetype='Specificity', corpus=corpus)
#
#
# #tfidf_ngrams = queryNodeNodeNgram(nodeMeasure_id=tfidf_node.id, corpus_id=corpus.id)
# cvalue_ngrams = queryNodeNodeNgram(nodeMeasure_id=cvalue_node.id, corpus_id=corpus.id, limit=limit_sup)
# spec_ngrams = queryNodeNodeNgram(nodeMeasure_id=spec_node.id, corpus_id=corpus.id, limit=limit_inf)
#
# #print([n for n in tfidf_ngrams])
#
# def list2set(_list):
# _set = set()
# for n in _list:
# _set.add((n[0],n[1]))
# return(_set)
#
# cvalue_set = set()
# spec_set = set()
#
# cvalue_set = list2set(cvalue_ngrams)
# spec_set = list2set(spec_ngrams)
#
# cvalue_setDiff = cvalue_set.difference(spec_set)
#
# return(spec_set,cvalue_setDiff)
#
from gargantext_web.db import session, cache, get_cursor
from gargantext_web.db import Node, NodeNgram, NodeNodeNgram
from gargantext_web.db import get_or_create_node
from admin.utils import DebugTime
def compute_occs(corpus):
dbg = DebugTime('Corpus #%d - OCCURRENCES' % corpus.id)
dbg.show('Calculate occurrences')
occs_node = get_or_create_node(nodetype='Occurrences', corpus=corpus)
#print(occs_node.id)
(session.query(NodeNodeNgram)
.filter(NodeNodeNgram.nodex_id==occs_node.id).delete()
)
session.commit()
db, cursor = get_cursor()
cursor.execute('''
INSERT INTO
%s (nodex_id, nodey_id, ngram_id, score)
SELECT
%d AS nodex_id,
%d AS nodey_id,
nodengram.ngram_id AS ngram_id,
SUM(nodengram.weight) AS score
FROM
%s AS nodengram
INNER JOIN
%s AS node ON nodengram.node_id = node.id
WHERE
node.parent_id = %d
AND
node.type_id = %d
GROUP BY
nodengram.ngram_id
''' % ( NodeNodeNgram.__table__.name
, occs_node.id, corpus.id
, NodeNgram.__table__.name
, Node.__table__.name
, corpus.id
, cache.NodeType['Document'].id
)
)
db.commit()
#data = session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==occs_node.id).all()
#print([n for n in data])
...@@ -127,6 +127,9 @@ def compute_tfidf_global(corpus): ...@@ -127,6 +127,9 @@ def compute_tfidf_global(corpus):
tfidf_node = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus) tfidf_node = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus)
session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==tfidf_node.id).delete()
session.commit()
# compute terms frequency sum # compute terms frequency sum
db, cursor = get_cursor() db, cursor = get_cursor()
...@@ -171,8 +174,7 @@ def compute_tfidf_global(corpus): ...@@ -171,8 +174,7 @@ def compute_tfidf_global(corpus):
INSERT INTO INSERT INTO
tmp__idf(ngram_id, idf) tmp__idf(ngram_id, idf)
SELECT SELECT
node_ngram.ngram_id, node_ngram.ngram_id, -ln(COUNT(*))
-ln(COUNT(*))
FROM FROM
%s AS node_ngram %s AS node_ngram
INNER JOIN INNER JOIN
...@@ -183,10 +185,10 @@ def compute_tfidf_global(corpus): ...@@ -183,10 +185,10 @@ def compute_tfidf_global(corpus):
%s as corpus ON corpus.id = doc.parent_id %s as corpus ON corpus.id = doc.parent_id
WHERE WHERE
doc.language_id = %d AND doc.type_id = %d AND corpus.type_id=%d doc.language_id = %d AND doc.type_id = %d AND corpus.type_id=%d
AND RANDOM() < 0.01 -- AND RANDOM() < 0.01
GROUP BY GROUP BY
node_ngram.ngram_id node_ngram.ngram_id
limit 10000 -- limit 10000
; ;
''' % (Node_Ngram.__table__.name ''' % (Node_Ngram.__table__.name
, Node.__table__.name , Node.__table__.name
...@@ -202,8 +204,7 @@ def compute_tfidf_global(corpus): ...@@ -202,8 +204,7 @@ def compute_tfidf_global(corpus):
INSERT INTO INSERT INTO
tmp__idf(ngram_id, idf) tmp__idf(ngram_id, idf)
SELECT SELECT
node_ngram.ngram_id, node_ngram.ngram_id, -ln(COUNT(*))
-ln(COUNT(*))
FROM FROM
%s AS node_ngram %s AS node_ngram
INNER JOIN INNER JOIN
...@@ -217,7 +218,7 @@ def compute_tfidf_global(corpus): ...@@ -217,7 +218,7 @@ def compute_tfidf_global(corpus):
AND RANDOM() < 0.01 AND RANDOM() < 0.01
GROUP BY GROUP BY
node_ngram.ngram_id node_ngram.ngram_id
limit 10000 -- limit 10000
; ;
''' % (Node_Ngram.__table__.name ''' % (Node_Ngram.__table__.name
, Node.__table__.name , Node.__table__.name
...@@ -238,7 +239,6 @@ def compute_tfidf_global(corpus): ...@@ -238,7 +239,6 @@ def compute_tfidf_global(corpus):
lnD = log(D) lnD = log(D)
cursor.execute('UPDATE tmp__idf SET idf = idf + %f' % (lnD, )) cursor.execute('UPDATE tmp__idf SET idf = idf + %f' % (lnD, ))
# show off # show off
dbg.show('insert tfidf')
cursor.execute(''' cursor.execute('''
INSERT INTO INSERT INTO
%s (nodex_id, nodey_id, ngram_id, score) %s (nodex_id, nodey_id, ngram_id, score)
...@@ -254,6 +254,7 @@ def compute_tfidf_global(corpus): ...@@ -254,6 +254,7 @@ def compute_tfidf_global(corpus):
''' % (NodeNodeNgram.__table__.name, tfidf_node.id, corpus.id, )) ''' % (NodeNodeNgram.__table__.name, tfidf_node.id, corpus.id, ))
db.commit() db.commit()
dbg.show('insert tfidf')
#corpus=session.query(Node).filter(Node.id==244250).first() #corpus=session.query(Node).filter(Node.id==244250).first()
#compute_tfidf_global(corpus) #compute_tfidf_global(corpus)
from gargantext_web.db import session
from gargantext_web.db import Ngram, NodeNgram, NodeNgramNgram
from gargantext_web.db import Ngram, NodeNgramNgram from gargantext_web.db import get_cursor, bulk_insert, get_or_create_node
from gargantext_web.db import get_cursor, bulk_insert
def insert_ngrams_to_list(list_of_ngrams, corpus, list_type='MapList', erase=True):
'''
Works only for Stop and Map
'''
list_node = get_or_create_node(corpus=corpus, nodetype=list_type)
group_node = get_or_create_node(corpus=corpus, nodetype='GroupList')
group_list = (session.query(NodeNgramNgram.ngramy_id)
.filter(NodeNgramNgram.id==group_node.id)
.all()
)
#print(list_node)
if erase == True:
session.query(NodeNgram).filter(NodeNgram.node_id==list_node.id).delete()
session.commit()
def get_id(ngram):
query = session.query(Ngram.id).filter(Ngram.terms==ngram).first()
return(query)
list_to_insert = list()
for ngram in list_of_ngrams:
ngram_candidate = get_id(ngram)
if ngram_candidate is not None:
ngram_id = ngram_candidate[0]
if ngram_id is not None and ngram_id not in group_list:
list_to_insert.append((list_node.id, ngram_id, 1))
#print(list_to_insert)
db, cursor = get_cursor()
bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], [n for n in list_to_insert])
def insert_ngrams(ngrams,get='terms-id'): def insert_ngrams(ngrams,get='terms-id'):
''' '''
...@@ -111,28 +142,3 @@ def insert_nodengramngram(nodengramngram): ...@@ -111,28 +142,3 @@ def insert_nodengramngram(nodengramngram):
db.commit() db.commit()
#def queryNodeNodeNgram(nodeMeasure_id=None, corpus_id=None, limit=None):
# '''
# queryNodeNodeNgram :: Int -> Int -> Int -> (Int, String, Float)
# Get list of ngrams according to a measure related to the corpus: maybe tfidf
# cvalue.
# '''
# query = (session.query(Ngram.id, Ngram.terms, NodeNodeNgram.score)
# .join(NodeNodeNgram, NodeNodeNgram.ngram_id == Ngram.id)
# .join(Node, Node.id == NodeNodeNgram.nodex_id)
# .filter(NodeNodeNgram.nodex_id == nodeMeasure_id)
# .filter(NodeNodeNgram.nodey_id == corpus_id)
# .group_by(Ngram.id, Ngram.terms, NodeNodeNgram.score)
# .order_by(desc(NodeNodeNgram.score))
# )
#
# if limit is None:
# query = query.count()
# elif limit == 0 :
# query = query.all()
# else:
# query = query.limit(limit)
#
# return(query)
#
...@@ -6,58 +6,48 @@ from ngram.stop import compute_stop ...@@ -6,58 +6,48 @@ from ngram.stop import compute_stop
from ngram.group import compute_groups from ngram.group import compute_groups
from gargantext_web.db import get_or_create_node from gargantext_web.db import get_or_create_node
from ngram.mapList import compute_mapList from ngram.mapList import compute_mapList
# from ngram.occurrences import compute_occs
from gargantext_web.db import NodeNgram from gargantext_web.db import session , Node , NodeNgram
#from gargantext_web.celery import update_processing from admin.utils import WorkflowTracking
def ngram_workflow(corpus, n=5000): def ngram_workflow(corpus, n=5000):
''' '''
All the workflow to filter the ngrams. All the workflow to filter the ngrams.
''' '''
update_state = WorkflowTracking()
update_state.processing_(corpus, "Stop words")
compute_stop(corpus) compute_stop(corpus)
update_state.processing_(corpus, "TF-IDF global score")
compute_tfidf_global(corpus) compute_tfidf_global(corpus)
part = round(n * 0.9) part = round(n * 0.9)
compute_cvalue(corpus,limit=1000) # size # compute_cvalue(corpus,limit=1000) # size
part = round(part * 0.8) # part = round(part * 0.8)
print('spec part:', part) #print('spec part:', part)
update_state.processing_(corpus, "Specificity score")
compute_specificity(corpus,limit=part) compute_specificity(corpus,limit=part)
part = round(part * 0.8) part = round(part * 0.8)
limit_inf = round(part * 1) limit_inf = round(part * 1)
limit_sup = round(part * 5) limit_sup = round(part * 5)
print(limit_inf,limit_sup) #print(limit_inf,limit_sup)
update_state.processing_(corpus, "Synonyms")
compute_groups(corpus,limit_inf=limit_inf, limit_sup=limit_sup) compute_groups(corpus,limit_inf=limit_inf, limit_sup=limit_sup)
update_state.processing_(corpus, "Map list terms")
compute_mapList(corpus,limit=1000) # size compute_mapList(corpus,limit=1000) # size
update_state.processing_(corpus, "TF-IDF local score")
compute_tfidf(corpus) compute_tfidf(corpus)
# update_state.processing_(corpus, "OCCS local score")
# compute_occs(corpus)
#corpus=session.query(Node).filter(Node.id==540420).first()
#corpus=session.query(Node).filter(Node.id==559637).first()
#update_processing(corpus, 0)
check_stop = False
if check_stop:
stop = get_or_create_node(corpus=corpus,nodetype='StopList')
#session.query(NodeNgram).filter(NodeNgram.node_id==stop.id).delete()
#session.commit()
stop_ngrams = (session.query(Ngram)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.filter(NodeNgram.node_id==stop.id)
.all()
)
print([n for n in stop_ngrams])
...@@ -269,42 +269,42 @@ class Node(CTENode): ...@@ -269,42 +269,42 @@ class Node(CTENode):
for ngram_text, weight in associations.items() for ngram_text, weight in associations.items()
]) ])
@current_app.task(filter=task_method) # @current_app.task(filter=task_method)
def workflow(self, keys=None, ngramsextractorscache=None, ngramscaches=None, verbose=False): # def workflow(self, keys=None, ngramsextractorscache=None, ngramscaches=None, verbose=False):
import time # import time
total = 0 # total = 0
print("LOG::TIME: In workflow() parse_resources()") # print("LOG::TIME: In workflow() parse_resources()")
start = time.time() # start = time.time()
self.hyperdata['Processing'] = 1 # self.hyperdata['Processing'] = 1
self.save() # self.save()
self.parse_resources() # self.parse_resources()
end = time.time() # end = time.time()
total += (end - start) # total += (end - start)
print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" parse_resources() [s]",(end - start)) # print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" parse_resources() [s]",(end - start))
print("LOG::TIME: In workflow() / parse_resources()") # print("LOG::TIME: In workflow() / parse_resources()")
start = time.time() # start = time.time()
print("LOG::TIME: In workflow() extract_ngrams()") # print("LOG::TIME: In workflow() extract_ngrams()")
print("\n- - - - - - - - - -") # print("\n- - - - - - - - - -")
type_document = NodeType.objects.get(name='Document') # type_document = NodeType.objects.get(name='Document')
self.children.filter(type_id=type_document.pk).extract_ngrams(keys=['title',]) # self.children.filter(type_id=type_document.pk).extract_ngrams(keys=['title',])
end = time.time() # end = time.time()
print("- - - - - - - - - - \n") # print("- - - - - - - - - - \n")
total += (end - start) # total += (end - start)
print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" extract_ngrams() [s]",(end - start)) # print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" extract_ngrams() [s]",(end - start))
print("LOG::TIME: In workflow() / extract_ngrams()") # print("LOG::TIME: In workflow() / extract_ngrams()")
start = time.time() # start = time.time()
print("In workflow() do_tfidf()") # print("In workflow() do_tfidf()")
from analysis.functions import do_tfidf # from analysis.functions import do_tfidf
do_tfidf(self) # do_tfidf(self)
end = time.time() # end = time.time()
total += (end - start) # total += (end - start)
print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" do_tfidf() [s]",(end - start)) # print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" do_tfidf() [s]",(end - start))
print("LOG::TIME: In workflow() / do_tfidf()") # print("LOG::TIME: In workflow() / do_tfidf()")
print("In workflow() END") # print("In workflow() END")
self.hyperdata['Processing'] = 0 # self.hyperdata['Processing'] = 0
self.save() # self.save()
class Node_Hyperdata(models.Model): class Node_Hyperdata(models.Model):
node = models.ForeignKey(Node, on_delete=models.CASCADE) node = models.ForeignKey(Node, on_delete=models.CASCADE)
......
...@@ -23,10 +23,12 @@ from ..NgramsExtractors import * ...@@ -23,10 +23,12 @@ from ..NgramsExtractors import *
from admin.utils import PrintException from admin.utils import PrintException
class EuropressFileParser(FileParser): class EuropressFileParser(FileParser):
def _parse_header(self, header):
pass
def _parse(self, file): def _parse(self, file):
localeEncoding = "fr_FR" localeEncoding = "fr_FR"
codif = "UTF-8" codif = "UTF-8"
format_date = re.compile('.*\d{4}.*', re.UNICODE)
if isinstance(file, str): if isinstance(file, str):
file = open(file, 'rb') file = open(file, 'rb')
...@@ -71,6 +73,7 @@ class EuropressFileParser(FileParser): ...@@ -71,6 +73,7 @@ class EuropressFileParser(FileParser):
# parse all the articles, one by one # parse all the articles, one by one
try: try:
for html_article in html_articles: for html_article in html_articles:
print('article')
hyperdata = {} hyperdata = {}
...@@ -87,6 +90,36 @@ class EuropressFileParser(FileParser): ...@@ -87,6 +90,36 @@ class EuropressFileParser(FileParser):
header = html_article.xpath(header_xpath)[0].text header = html_article.xpath(header_xpath)[0].text
hyperdata.update(self._parse_header(header))
hyperdata['publication_year'] = hyperdata['publication_date'].strftime('%Y')
hyperdata['publication_month'] = hyperdata['publication_date'].strftime('%m')
hyperdata['publication_day'] = hyperdata['publication_date'].strftime('%d')
try:
title = paragraph_list(html_article.xpath(title_xpath))
hyperdata['title'] = title[0]
except:
pass
try:
text = paragraph_list(html_article.xpath(text_xpath))
hyperdata['abstract'] = ' '.join([ ' <p> ' + p + ' </p> ' for p in title[1:] + text])
except:
pass
yield hyperdata
except :
PrintException()
pass
class EuropressFileParser_fr(EuropressFileParser):
def _parse_header(self, header):
format_date = re.compile('.*\d{4}.*', re.UNICODE)
hyperdata = dict()
if header is not None: if header is not None:
header = header.split(', ') header = header.split(', ')
if format_date.match(header[0]): if format_date.match(header[0]):
...@@ -102,33 +135,38 @@ class EuropressFileParser(FileParser): ...@@ -102,33 +135,38 @@ class EuropressFileParser(FileParser):
date = header[2] date = header[2]
try: try:
hyperdata['publication_date'] = dateparser.parse(date.strip(), languages=['fr', 'en']) hyperdata['publication_date'] = dateparser.parse(date.strip(), languages=['fr'])
except: except:
hyperdata['publication_date'] = timezone.now().strftime("%Y-%m-%d %H:%M:%S") hyperdata['publication_date'] = timezone.now().strftime("%Y-%m-%d %H:%M:%S")
hyperdata['publication_year'] = hyperdata['publication_date'].strftime('%Y') return(hyperdata)
hyperdata['publication_month'] = hyperdata['publication_date'].strftime('%m')
hyperdata['publication_day'] = hyperdata['publication_date'].strftime('%d')
#print(hyperdata['publication_date']) #print(hyperdata['publication_date'])
class EuropressFileParser_en(EuropressFileParser):
def _parse_header(self, header):
format_date = re.compile('.*\d{4}.*', re.UNICODE)
if header is not None:
header = header.split(', ')
if format_date.match(header[0]):
date = header[0]
elif format_date.match(header[1]):
hyperdata['rubrique'] = header[0]
date = header[1]
try: try:
title = paragraph_list(html_article.xpath(title_xpath)) hyperdata['page'] = header[2].split(' ')[1]
hyperdata['title'] = title[0]
except: except:
pass pass
else:
date = header[2]
try: try:
text = paragraph_list(html_article.xpath(text_xpath)) hyperdata['publication_date'] = dateparser.parse(date.strip(), languages=['fr'])
hyperdata['abstract'] = ' '.join([ ' <p> ' + p + ' </p> ' for p in title[1:] + text])
except: except:
pass hyperdata['publication_date'] = timezone.now().strftime("%Y-%m-%d %H:%M:%S")
yield hyperdata
except :
PrintException()
pass
if __name__ == "__main__": if __name__ == "__main__":
e = EuropressFileParser() e = EuropressFileParser()
......
import re
import locale
from lxml import etree
from lxml.etree import tostring
from lxml.html import html5parser
from itertools import chain
from datetime import datetime, date
from django.utils import timezone
import dateutil.parser
import dateparser
import sys
#sys.path.append('/srv/gargantext')
#from admin.env import *
#from parsing.FileParsers.FileParser import FileParser
from .FileParser import FileParser
#from parsing.NgramsExtractors import *
from ..NgramsExtractors import *
from admin.utils import PrintException
class EuropressFileParser_en(FileParser):
def _parse(self, file):
localeEncoding = "fr_FR"
codif = "UTF-8"
format_page = re.compile('p\. .*', re.UNICODE)
def parse_date(date, lang):
d = dateparser.parse(date.strip(), languages=[lang])
return d
if isinstance(file, str):
file = open(file, 'rb')
contents = file.read()
encoding = self.detect_encoding(contents)
if encoding != "utf-8":
try:
contents = contents.decode("latin1", errors='replace').encode(codif)
except:
PrintException()
html_parser = etree.HTMLParser(encoding=codif)
html = etree.fromstring(contents, html_parser)
html_parser = html5parser.etree.HTMLParser(encoding=codif)
html = html5parser.etree.fromstring(contents, html_parser)
html_articles = html.xpath('//article')
name_xpath = "./header/div/span[@class = 'DocPublicationName']"
header_xpath = "./header/div/span[@class = 'DocHeader']"
title_xpath = "./header/div[@class='titreArticle']/descendant-or-self::*"
text_xpath = "./section/div[@class='DocText']/descendant-or-self::*"
def paragraph_list(data_xpath):
result = list()
for elem in data_xpath:
if elem.text is not None:
if elem.text.strip() != '':
if elem.tag == 'p':
result.append(elem.text)
else:
if len(result) > 0:
result.append(result.pop() + elem.text)
else:
result.append(elem.text)
return result
# parse all the articles, one by one
try:
for html_article in html_articles:
hyperdata = {}
try:
pub_name = html_article.xpath(name_xpath)[0].text
name = pub_name.split(', ')
hyperdata['journal'] = name[0]
hyperdata['number'] = name[1]
except:
try:
hyperdata['journal'] = pub_name.strip()
except:
pass
#print(hyperdata['publication_date'])
try:
title = paragraph_list(html_article.xpath(title_xpath))
hyperdata['title'] = title[0]
except:
pass
header = html_article.xpath(header_xpath)[0].text
if header is not None:
header = header.split(', ')
header = list(filter(lambda x: format_page.match(x) is None, header))
print(header)
if parse_date(header[0], 'en') is not None:
date = ' '.join(header[0:])
elif parse_date(header[1], 'en') is not None:
date = ' '.join(header[1:])
elif parse_date(header[2], 'en') is not None:
date = ' '.join(header[2:])
elif parse_date(header[3], 'en') is not None:
date = ' '.join(header[3:])
else:
date = '2016'
try:
hyperdata['publication_date'] = dateparser.parse(date.strip(), languages=['en'])
except:
hyperdata['publication_date'] = timezone.now().strftime("%Y-%m-%d %H:%M:%S")
try:
hyperdata['publication_year'] = hyperdata['publication_date'].strftime('%Y')
hyperdata['publication_month'] = hyperdata['publication_date'].strftime('%m')
hyperdata['publication_day'] = hyperdata['publication_date'].strftime('%d')
except:
print(hyperdata['title'])
print(date)
try:
text = paragraph_list(html_article.xpath(text_xpath))
hyperdata['abstract'] = ' '.join([ ' <p> ' + p + ' </p> ' for p in title[1:] + text])
except:
pass
yield hyperdata
except :
PrintException()
pass
if __name__ == "__main__":
e = EuropressFileParser()
hyperdata = e.parse(str(sys.argv[1]))
for h in hyperdata:
try:
print(h['journal'], ":", h['publication_date'])
except:
pass
import re
import locale
from lxml import etree
from lxml.etree import tostring
from lxml.html import html5parser
from itertools import chain
from datetime import datetime, date
from django.utils import timezone
import dateutil.parser
import dateparser
import sys
#sys.path.append('/srv/gargantext')
#from admin.env import *
#from parsing.FileParsers.FileParser import FileParser
from .FileParser import FileParser
#from parsing.NgramsExtractors import *
from ..NgramsExtractors import *
from admin.utils import PrintException
class EuropressFileParser_fr(FileParser):
def _parse(self, file):
localeEncoding = "fr_FR"
codif = "UTF-8"
format_date = re.compile('.*\d{4}.*', re.UNICODE)
def parse_date(date, lang):
d = dateparser.parse(date.strip(), languages=[lang])
return d
if isinstance(file, str):
file = open(file, 'rb')
contents = file.read()
encoding = self.detect_encoding(contents)
if encoding != "utf-8":
try:
contents = contents.decode("latin1", errors='replace').encode(codif)
except:
PrintException()
html_parser = etree.HTMLParser(encoding=codif)
html = etree.fromstring(contents, html_parser)
html_parser = html5parser.etree.HTMLParser(encoding=codif)
html = html5parser.etree.fromstring(contents, html_parser)
html_articles = html.xpath('//article')
name_xpath = "./header/div/span[@class = 'DocPublicationName']"
header_xpath = "./header/div/span[@class = 'DocHeader']"
title_xpath = "./header/div[@class='titreArticle']/descendant-or-self::*"
text_xpath = "./section/div[@class='DocText']/descendant-or-self::*"
def paragraph_list(data_xpath):
result = list()
for elem in data_xpath:
if elem.text is not None:
if elem.text.strip() != '':
if elem.tag == 'p':
result.append(elem.text)
else:
if len(result) > 0:
result.append(result.pop() + elem.text)
else:
result.append(elem.text)
return result
# parse all the articles, one by one
try:
for html_article in html_articles:
hyperdata = {}
try:
pub_name = html_article.xpath(name_xpath)[0].text
name = pub_name.split(', ')
hyperdata['journal'] = name[0]
hyperdata['number'] = name[1]
except:
try:
hyperdata['journal'] = pub_name.strip()
except:
pass
header = html_article.xpath(header_xpath)[0].text
if header is not None:
header = header.split(', ')
if parse_date(header[0], 'fr') is not None:
date = header[0]
elif parse_date(header[1], 'fr') is not None:
hyperdata['rubrique'] = header[0]
date = header[1]
try:
hyperdata['page'] = header[2].split(' ')[1]
except:
pass
elif parse_date(header[2], 'fr') is not None:
date = header[2]
elif parse_date(header[0], 'en') is not None:
date = ' '.join(header[0:])
elif parse_date(header[1], 'en') is not None:
date = ' '.join(header[1:])
elif parse_date(header[2], 'en') is not None:
date = ' '.join(header[2:])
try:
hyperdata['publication_date'] = dateparser.parse(date.strip(), languages=['fr', 'en'])
except:
hyperdata['publication_date'] = timezone.now().strftime("%Y-%m-%d %H:%M:%S")
hyperdata['publication_year'] = hyperdata['publication_date'].strftime('%Y')
hyperdata['publication_month'] = hyperdata['publication_date'].strftime('%m')
hyperdata['publication_day'] = hyperdata['publication_date'].strftime('%d')
#print(hyperdata['publication_date'])
try:
title = paragraph_list(html_article.xpath(title_xpath))
hyperdata['title'] = title[0]
except:
pass
try:
text = paragraph_list(html_article.xpath(text_xpath))
hyperdata['abstract'] = ' '.join([ ' <p> ' + p + ' </p> ' for p in title[1:] + text])
except:
pass
yield hyperdata
except :
PrintException()
pass
if __name__ == "__main__":
e = EuropressFileParser()
hyperdata = e.parse(str(sys.argv[1]))
for h in hyperdata:
try:
print(h['journal'], ":", h['publication_date'])
except:
pass
...@@ -3,6 +3,7 @@ from .IsiFileParser import IsiFileParser ...@@ -3,6 +3,7 @@ from .IsiFileParser import IsiFileParser
from .JstorFileParser import JstorFileParser from .JstorFileParser import JstorFileParser
from .ZoteroFileParser import ZoteroFileParser from .ZoteroFileParser import ZoteroFileParser
from .PubmedFileParser import PubmedFileParser from .PubmedFileParser import PubmedFileParser
from .EuropressFileParser import EuropressFileParser from .EuropressFileParser_en import EuropressFileParser_en
from .EuropressFileParser_fr import EuropressFileParser_fr
from .ISTex import ISTex from .ISTex import ISTex
from .CSVParser import CSVParser from .CSVParser import CSVParser
...@@ -7,8 +7,8 @@ parsers = { ...@@ -7,8 +7,8 @@ parsers = {
'Zotero (RIS format)' : ZoteroFileParser, 'Zotero (RIS format)' : ZoteroFileParser,
'Jstor (RIS format)' : JstorFileParser, 'Jstor (RIS format)' : JstorFileParser,
#'Europress' : EuropressFileParser, #'Europress' : EuropressFileParser,
'Europress (French)' : EuropressFileParser, 'Europress (French)' : EuropressFileParser_fr,
'Europress (English)' : EuropressFileParser, 'Europress (English)' : EuropressFileParser_en,
'CSVParser' : CSVParser, 'CSVParser' : CSVParser,
'ISTex' : ISTex, 'ISTex' : ISTex,
} }
......
...@@ -11,7 +11,7 @@ import datetime ...@@ -11,7 +11,7 @@ import datetime
import copy import copy
from gargantext_web.views import move_to_trash from gargantext_web.views import move_to_trash
from gargantext_web.db import session, Node, NodeNgram, NodeNgramNgram, NodeNodeNgram, Ngram, Hyperdata, Node_Ngram\ from gargantext_web.db import session, cache, Node, NodeNgram, NodeNgramNgram, NodeNodeNgram, Ngram, Hyperdata, Node_Ngram\
, NodeType, Node_Hyperdata , NodeType, Node_Hyperdata
from gargantext_web.validation import validate, ValidationException from gargantext_web.validation import validate, ValidationException
from node import models from node import models
...@@ -139,6 +139,50 @@ class NodesChildrenNgrams(APIView): ...@@ -139,6 +139,50 @@ class NodesChildrenNgrams(APIView):
], ],
}) })
class NodesChildrenNgramsIds(APIView):
def get(self, request, node_id):
# query ngrams
ParentNode = aliased(Node)
ngrams_query = (session
.query(Node.id, func.sum(Node_Ngram.weight).label('count'))
.join(Node_Ngram, Node_Ngram.node_id == Node.id)
.join(Ngram, Ngram.id == Node_Ngram.ngram_id)
.filter(Node.parent_id == node_id)
.filter(Node.type_id == cache.NodeType['Document'].id)
.group_by(Node.id)
# .group_by(Ngram)
.order_by(func.sum(Node_Ngram.weight).desc())
)
# filters
if 'startwith' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.startswith(request.GET['startwith']))
if 'contain' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.contains(request.GET['contain']))
#if 'doesnotcontain' in request.GET:
# ngrams_query = ngrams_query.filter(not_(Ngram.terms.contains(request.GET['doesnotcontain'])))
# pagination
offset = int(request.GET.get('offset', 0))
limit = int(request.GET.get('limit', 20))
total = ngrams_query.count()
# return formatted result
return JsonHttpResponse({
'pagination': {
'offset': offset,
'limit': limit,
'total': total,
},
'data': [
{
'id': node,
'count': count
}
for node, count in ngrams_query[offset : offset+limit]
],
})
from gargantext_web.db import get_or_create_node from gargantext_web.db import get_or_create_node
class Ngrams(APIView): class Ngrams(APIView):
......
...@@ -22,20 +22,24 @@ class Graph(APIView): ...@@ -22,20 +22,24 @@ class Graph(APIView):
format_ = request.GET.get('format', 'json') format_ = request.GET.get('format', 'json')
type_ = request.GET.get('type', 'node_link') type_ = request.GET.get('type', 'node_link')
hapax = request.GET.get('hapax', 1) hapax = request.GET.get('hapax', 1)
distance = request.GET.get('distance', 'conditional')
corpus = session.query(Node).filter(Node.id==corpus_id).first() corpus = session.query(Node).filter(Node.id==corpus_id).first()
accepted_field1 = ['ngrams', 'journal', 'source', 'authors'] accepted_field1 = ['ngrams', 'journal', 'source', 'authors']
accepted_field2 = ['ngrams',] accepted_field2 = ['ngrams',]
options = ['start', 'end', 'hapax'] options = ['start', 'end', 'hapax', 'distance']
if field1 in accepted_field1 : if field1 in accepted_field1 :
if field2 in accepted_field2 : if field2 in accepted_field2 :
if start is not None and end is not None : if start is not None and end is not None :
data = get_cooc(corpus=corpus,field1=field1, field2=field2, start=start, end=end, hapax=hapax) data = get_cooc(corpus=corpus,field1=field1, field2=field2
, start=start, end=end
, hapax=hapax, distance=distance)
else: else:
data = get_cooc(corpus=corpus,field1=field1, field2=field2, hapax=hapax) data = get_cooc(corpus=corpus,field1=field1, field2=field2
, hapax=hapax, distance = distance)
if format_ == 'json': if format_ == 'json':
return JsonHttpResponse(data) return JsonHttpResponse(data)
else: else:
......
This diff is collapsed.
...@@ -6,6 +6,8 @@ from rest_v1_0 import api, ngrams, graph ...@@ -6,6 +6,8 @@ from rest_v1_0 import api, ngrams, graph
from annotations import views from annotations import views
import tests.ngramstable.views as samtest
urlpatterns = patterns('', urlpatterns = patterns('',
# REST URLS # REST URLS
# What is REST ? # What is REST ?
...@@ -15,6 +17,7 @@ urlpatterns = patterns('', ...@@ -15,6 +17,7 @@ urlpatterns = patterns('',
url(r'nodes$', api.NodesList.as_view()), url(r'nodes$', api.NodesList.as_view()),
url(r'nodes/(\d+)$', api.Nodes.as_view()), url(r'nodes/(\d+)$', api.Nodes.as_view()),
url(r'nodes/(\d+)/children/ngrams$', api.NodesChildrenNgrams.as_view()), # => repeated children ? url(r'nodes/(\d+)/children/ngrams$', api.NodesChildrenNgrams.as_view()), # => repeated children ?
url(r'nodes/(\d+)/children/ids$', api.NodesChildrenNgramsIds.as_view()), # => repeated children ?
# NGRAMS table & annotations # NGRAMS table & annotations
url(r'node/(\d+)/ngrams$' , ngrams.Ngrams.as_view()), url(r'node/(\d+)/ngrams$' , ngrams.Ngrams.as_view()),
...@@ -22,7 +25,9 @@ urlpatterns = patterns('', ...@@ -22,7 +25,9 @@ urlpatterns = patterns('',
url(r'node/(\d+)/ngrams/keep$', ngrams.Keep.as_view()), url(r'node/(\d+)/ngrams/keep$', ngrams.Keep.as_view()),
# url(r'node/(?P<list_id>[0-9]+)/ngrams/keep/(?P<ngram_ids>[0-9,\+]+)+$' , ngrams.Keep.as_view()), # url(r'node/(?P<list_id>[0-9]+)/ngrams/keep/(?P<ngram_ids>[0-9,\+]+)+$' , ngrams.Keep.as_view()),
url(r'node/(?P<list_id>[0-9]+)/ngrams/(?P<ngram_ids>[0-9,\+]+)+$', views.NgramEdit.as_view()), url(r'node/(?P<list_id>[0-9]+)/ngrams/(?P<ngram_ids>[0-9,\+]+)+$', views.NgramEdit.as_view()),
url(r'node/(\d+)/ngrams/list$' , ngrams.List.as_view()), url(r'node/(?P<corpus_id>[0-9]+)/ngrams/list/(?P<list_name>\w+)$' , ngrams.List.as_view()),
url(r'node/corpus/(?P<node_ids>[0-9,\+]+)+$' , samtest.get_corpuses),
#url(r'nodes/(\d+)/children/hyperdata$', api.NodesChildrenMetatadata.as_view()), #url(r'nodes/(\d+)/children/hyperdata$', api.NodesChildrenMetatadata.as_view()),
#url(r'nodes/(\d+)/children/hyperdata$', api.NodesChildrenMetatadata.as_view()), #url(r'nodes/(\d+)/children/hyperdata$', api.NodesChildrenMetatadata.as_view()),
......
...@@ -45,19 +45,22 @@ class MedlineFetcher: ...@@ -45,19 +45,22 @@ class MedlineFetcher:
query = query.replace(' ', '%20') query = query.replace(' ', '%20')
eSearch = '%s/esearch.fcgi?db=%s&retmax=1&usehistory=y&term=%s' %(self.pubMedEutilsURL, self.pubMedDB, query) eSearch = '%s/esearch.fcgi?db=%s&retmax=1&usehistory=y&term=%s' %(self.pubMedEutilsURL, self.pubMedDB, query)
try:
eSearchResult = urlopen(eSearch) eSearchResult = urlopen(eSearch)
data = eSearchResult.read() data = eSearchResult.read()
root = etree.XML(data) root = etree.XML(data)
findcount = etree.XPath("/eSearchResult/Count/text()") findcount = etree.XPath("/eSearchResult/Count/text()")
count = findcount(root)[0] count = findcount(root)[0]
findquerykey = etree.XPath("/eSearchResult/QueryKey/text()") findquerykey = etree.XPath("/eSearchResult/QueryKey/text()")
queryKey = findquerykey(root)[0] queryKey = findquerykey(root)[0]
findwebenv = etree.XPath("/eSearchResult/WebEnv/text()") findwebenv = etree.XPath("/eSearchResult/WebEnv/text()")
webEnv = findwebenv(root)[0] webEnv = findwebenv(root)[0]
except:
count=0
queryKey=False
webEnv=False
origQuery=False
values = { "query":origQuery , "count": int(str(count)), "queryKey": queryKey , "webEnv":webEnv } values = { "query":origQuery , "count": int(str(count)), "queryKey": queryKey , "webEnv":webEnv }
return values return values
...@@ -173,8 +176,13 @@ class MedlineFetcher: ...@@ -173,8 +176,13 @@ class MedlineFetcher:
self.q.join() self.q.join()
print('time:',time.perf_counter() - start) print('time:',time.perf_counter() - start)
Total = 0
Fails = 0
for globalresults in self.firstResults: for globalresults in self.firstResults:
# globalresults = self.medlineEsearch(pubmedquery) # globalresults = self.medlineEsearch(pubmedquery)
Total += 1
if globalresults["queryKey"]==False:
Fails += 1
if globalresults["count"]>0: if globalresults["count"]>0:
N+=globalresults["count"] N+=globalresults["count"]
queryhyperdata = { queryhyperdata = {
...@@ -198,4 +206,7 @@ class MedlineFetcher: ...@@ -198,4 +206,7 @@ class MedlineFetcher:
if query["retmax"]==0: query["retmax"]+=1 if query["retmax"]==0: query["retmax"]+=1
print(query["string"],"\t[",k,">",query["retmax"],"]") print(query["string"],"\t[",k,">",query["retmax"],"]")
if ((Fails+1)/(Total+1))==1 : # for identifying the epic fail or connection error
thequeries = [False]
return thequeries return thequeries
...@@ -130,7 +130,7 @@ def doTheQuery(request , project_id): ...@@ -130,7 +130,7 @@ def doTheQuery(request , project_id):
parent_id = project_id, parent_id = project_id,
type_id = cache.NodeType['Corpus'].id, type_id = cache.NodeType['Corpus'].id,
language_id = None, language_id = None,
hyperdata = {'Processing' : 1,} hyperdata = {'Processing' : "Parsing documents",}
) )
session.add(corpus) session.add(corpus)
session.commit() session.commit()
...@@ -243,7 +243,7 @@ def testISTEX(request , project_id): ...@@ -243,7 +243,7 @@ def testISTEX(request , project_id):
parent_id = project_id, parent_id = project_id,
type_id = cache.NodeType['Corpus'].id, type_id = cache.NodeType['Corpus'].id,
language_id = None, language_id = None,
hyperdata = {'Processing' : 1,} hyperdata = {'Processing' : "Parsing documents",}
) )
session.add(corpus) session.add(corpus)
session.commit() session.commit()
......
#!/bin/bash
FILE=$(date +%Y%m%d-%H:%M:%S.log)
source /srv/gargantext_env/bin/activate
touch /var/log/gargantext/celery/$FILE && ./manage.py celery worker --loglevel=info >> $FILE
#!/bin/bash
FILE=$(date +%Y%m%d-%H:%M:%S.log)
touch /var/log/gargantext/uwsgi/$FILE && uwsgi gargantext.ini >> $FILE
This diff is collapsed.
This diff is collapsed.
static/img/logo.png

3.41 KB | W: | H:

static/img/logo.png

39.2 KB | W: | H:

static/img/logo.png
static/img/logo.png
static/img/logo.png
static/img/logo.png
  • 2-up
  • Swipe
  • Onion skin
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
width="210mm"
height="297mm"
id="svg2"
version="1.1"
inkscape:version="0.48.5 r10040"
sodipodi:docname="mastodons.svg"
inkscape:export-filename="/srv/gargantext/static/img/sponsors/mastodons.png"
inkscape:export-xdpi="61.073017"
inkscape:export-ydpi="61.073017">
<defs
id="defs4">
<linearGradient
id="linearGradient3782">
<stop
style="stop-color:#09097e;stop-opacity:1;"
offset="0"
id="stop3784" />
<stop
id="stop3790"
offset="0.5"
style="stop-color:#09097e;stop-opacity:0.49803922;" />
<stop
style="stop-color:#09097e;stop-opacity:0;"
offset="1"
id="stop3786" />
</linearGradient>
</defs>
<sodipodi:namedview
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageopacity="0.0"
inkscape:pageshadow="2"
inkscape:zoom="7.0998446"
inkscape:cx="68.070083"
inkscape:cy="793.17743"
inkscape:document-units="px"
inkscape:current-layer="layer1"
showgrid="false"
inkscape:window-width="963"
inkscape:window-height="762"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:window-maximized="0" />
<metadata
id="metadata7">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title></dc:title>
</cc:Work>
</rdf:RDF>
</metadata>
<g
inkscape:label="Calque 1"
inkscape:groupmode="layer"
id="layer1">
<path
style="fill:#04047e;fill-opacity:1;stroke:#191559;stroke-width:2.68011474999999999;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;fill-rule:nonzero;opacity:1"
d="m 67.957124,219.89012 c -21.951494,0 -40.977621,8.69677 -50.335489,21.40826 4.292013,3.79798 7.034932,9.55548 7.047344,16.00434 l 0,0.0396 c 0,7.23734 -3.440309,13.61318 -8.653863,17.32815 8.645806,13.99801 28.643543,23.80892 51.942008,23.80892 23.496654,0 43.642766,-9.96848 52.167926,-24.15469 -4.08915,-3.80631 -6.68337,-9.43968 -6.68337,-15.71784 0,-6.0475 2.40661,-11.48933 6.23781,-15.28316 -8.76845,-13.80251 -28.630779,-23.4335 -51.722366,-23.4335 z"
id="path2989"
inkscape:export-filename="/srv/gargantext/static/img/sponsors/mastodons.png"
inkscape:export-xdpi="150.35899"
inkscape:export-ydpi="150.35899"
inkscape:connector-curvature="0" />
<text
xml:space="preserve"
style="font-size:29.44009972px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#00ffff;fill-opacity:1;stroke:none;font-family:Courier;-inkscape-font-specification:Courier"
x="160.47124"
y="119.38621"
id="text2985"
sodipodi:linespacing="125%"
transform="scale(0.43195474,2.3150573)"
inkscape:export-filename="/srv/gargantext/static/img/sponsors/mastodons.png"
inkscape:export-xdpi="150.35899"
inkscape:export-ydpi="150.35899"><tspan
sodipodi:role="line"
id="tspan2987"
x="160.47124"
y="119.38621">M<tspan
style="fill:#ffffff"
id="tspan2985">a</tspan>s<tspan
style="fill:#ffffff"
id="tspan2988">t</tspan>o<tspan
style="fill:#ffffff"
id="tspan2990">n</tspan>d<tspan
style="fill:#ffffff"
id="tspan2992">o</tspan>n<tspan
style="fill:#ffffff"
id="tspan2996">s</tspan></tspan></text>
</g>
</svg>
...@@ -9,14 +9,14 @@ ...@@ -9,14 +9,14 @@
xmlns="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
width="48px" width="640"
height="48px" height="480"
id="svg4362" id="svg3041"
version="1.1" version="1.1"
inkscape:version="0.48.5 r10040" inkscape:version="0.48.5 r10040"
sodipodi:docname="logo.svg"> sodipodi:docname="logo.svg">
<defs <defs
id="defs4364" /> id="defs3043" />
<sodipodi:namedview <sodipodi:namedview
id="base" id="base"
pagecolor="#ffffff" pagecolor="#ffffff"
...@@ -24,20 +24,19 @@ ...@@ -24,20 +24,19 @@
borderopacity="1.0" borderopacity="1.0"
inkscape:pageopacity="0.0" inkscape:pageopacity="0.0"
inkscape:pageshadow="2" inkscape:pageshadow="2"
inkscape:zoom="5.6897594" inkscape:zoom="0.86750285"
inkscape:cx="-11.235831" inkscape:cx="574.44134"
inkscape:cy="3.8560006" inkscape:cy="214.55006"
inkscape:current-layer="layer1"
showgrid="true"
inkscape:grid-bbox="true"
inkscape:document-units="px" inkscape:document-units="px"
inkscape:current-layer="layer1"
showgrid="false"
inkscape:window-width="1360" inkscape:window-width="1360"
inkscape:window-height="762" inkscape:window-height="762"
inkscape:window-x="0" inkscape:window-x="0"
inkscape:window-y="0" inkscape:window-y="0"
inkscape:window-maximized="0" /> inkscape:window-maximized="0" />
<metadata <metadata
id="metadata4367"> id="metadata3046">
<rdf:RDF> <rdf:RDF>
<cc:Work <cc:Work
rdf:about=""> rdf:about="">
...@@ -49,49 +48,87 @@ ...@@ -49,49 +48,87 @@
</rdf:RDF> </rdf:RDF>
</metadata> </metadata>
<g <g
inkscape:label="Calque 1"
inkscape:groupmode="layer"
id="layer1" id="layer1"
inkscape:label="Layer 1" transform="translate(0,-572.36218)">
inkscape:groupmode="layer">
<rect <rect
style="fill:#fffcfc;fill-opacity:1;stroke:none" style="fill:#ffffff;fill-opacity:1;stroke:none"
id="rect3755" id="rect2998"
width="29.70249" width="410.37329"
height="31.108515" height="315.84909"
x="0" x="102.59332"
y="-0.1566938" y="641.98889"
inkscape:export-filename="/srv/gargantext/static/img/logo.png" inkscape:export-filename="/srv/gargantext/static/img/logo.png"
inkscape:export-xdpi="53" inkscape:export-xdpi="200"
inkscape:export-ydpi="53" /> inkscape:export-ydpi="200" />
<g
inkscape:export-ydpi="53.799999"
inkscape:export-xdpi="53.799999"
inkscape:export-filename="/srv/gargantext/static/img/logo.png"
style="fill:#ff8080;fill-opacity:0.82014388"
id="g3835"
transform="matrix(0.2422549,0,0,0.23374214,-49.789462,-7.9055988)">
<path <path
inkscape:export-ydpi="100" inkscape:export-ydpi="200"
inkscape:export-xdpi="100" inkscape:export-xdpi="200"
inkscape:export-filename="/home/alexandre/projets/gargantext.py/gargantext_core/shared/LogoSimple.png" inkscape:export-filename="/srv/gargantext/static/img/logo.png"
id="path3837" id="path3950"
d="m 206.24721,35.28586 0,129.5 67.78125,0 0,-8.625 c -9.86526,-0.47262 -18.57934,-2.63259 -25.5625,-6.28125 -18.65918,-9.74237 -29.875,-28.26535 -29.875,-49.1875 0,-31.71741 21.11877,-52.8149 55.4375,-55.1875 l 0,-10.21875 -67.78125,0 z m 67.78125,10.21875 0,8.5 c 1.74191,-0.16369 3.53543,-0.28125 5.37499,-0.28125 6.91081,0 13.295,1.44116 19.6875,4.15625 l 2.40625,2.875 2.59375,14.53125 9.6875,0 0,-25.375 c -11.40283,-3.03451 -22.61727,-4.65625 -33.15625,-4.65625 -2.24526,0 -4.44959,0.10177 -6.59374,0.25 z m 0,8.5 c -23.28864,2.18852 -37.65625,18.81513 -37.65625,45.562503 0,27.600037 14.44681,45.025437 37.65625,47.812497 l 0,-93.375 z m 0,93.375 0,8.78125 c 1.36224,0.0653 2.75177,0.0937 4.15624,0.0937 10.19344,0 22.1324,-1.88915 35.78125,-5.5625 l 0,-38.1875 2.9375,-2.21875 9.5,-0.8125 0,-6.5625 -43.21875,0 0,6.5625 12.28125,0.8125 2.9375,2.21875 0,33.21875 c -6.73804,1.4374 -12.61466,2.09375 -17.625,2.09375 -2.32322,0 -4.57592,-0.17643 -6.74999,-0.4375 z" d="m 202.38568,655.35804 0,249.27214 130.47086,0 0,-16.6021 c -18.98946,-0.90974 -35.76303,-5.06743 -49.20478,-12.09066 -35.91671,-18.75291 -57.50584,-54.40745 -57.50584,-94.68011 0,-61.05225 40.65113,-101.66242 106.71062,-106.22939 l 0,-19.66988 -130.47086,0 z m 130.47086,19.66988 0,16.36149 c 3.35297,-0.31508 6.80528,-0.54137 10.34622,-0.54137 13.30249,0 25.59129,2.77406 37.8961,8.00029 l 4.63174,5.53403 4.99267,27.97093 18.64728,0 0,-48.84386 c -21.94909,-5.84107 -43.53556,-8.96273 -63.82184,-8.96273 -4.32186,0 -8.56494,0.19589 -12.69217,0.48122 z m 0,16.36149 c -44.82787,4.21264 -72.48382,36.2169 -72.48382,87.70241 0,53.1268 27.8084,86.66863 72.48382,92.03339 l 0,-179.7358 z m 0,179.7358 0,16.90287 c 2.62215,0.12569 5.29683,0.18036 8.00027,0.18036 19.62116,0 42.60224,-3.63639 68.87466,-10.70715 l 0,-73.50641 5.65434,-4.27083 18.28637,-1.56397 0,-12.63203 -83.19097,0 0,12.63203 23.63995,1.56397 5.65434,4.27083 0,63.94215 c -12.96993,2.76683 -24.28172,4.03022 -33.92603,4.03022 -4.47193,0 -8.80811,-0.3396 -12.99293,-0.84213 z"
style="font-size:166.11251831px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#ff8080;fill-opacity:0.82014388;stroke:none;font-family:Bitstream Charter;-inkscape-font-specification:Bitstream Charter" style="font-size:166.11251831px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#800000;fill-opacity:1;stroke:none;font-family:Bitstream Charter;-inkscape-font-specification:Bitstream Charter"
inkscape:connector-curvature="0" /> inkscape:connector-curvature="0" />
<path <path
inkscape:export-ydpi="100" inkscape:export-ydpi="200"
inkscape:export-xdpi="100" inkscape:export-xdpi="200"
transform="translate(611.62306,-400.10238)" inkscape:export-filename="/srv/gargantext/static/img/logo.png"
transform="matrix(1.9248814,0,0,1.9248814,982.68611,-182.71269)"
sodipodi:open="true" sodipodi:open="true"
sodipodi:end="6.1660663" sodipodi:end="6.1660663"
sodipodi:start="0" sodipodi:start="0"
d="m -312.87112,480.17926 c 0,4.97881 -4.03612,9.01493 -9.01493,9.01493 -4.97881,0 -9.01493,-4.03612 -9.01493,-9.01493 0,-4.97881 4.03612,-9.01493 9.01493,-9.01493 4.57131,0 8.41901,3.42153 8.95317,7.96152" d="m -312.87112,480.17926 a 9.0149298,9.0149298 0 1 1 -0.0618,-1.05341"
sodipodi:ry="9.0149298" sodipodi:ry="9.0149298"
sodipodi:rx="9.0149298" sodipodi:rx="9.0149298"
sodipodi:cy="480.17926" sodipodi:cy="480.17926"
sodipodi:cx="-321.88605" sodipodi:cx="-321.88605"
id="path3839" id="path3952"
style="fill:#ff8080;fill-opacity:0.82014388;stroke:none" style="fill:#ffcc00;fill-opacity:1;stroke:none"
sodipodi:type="arc" /> sodipodi:type="arc" />
</g> <flowRoot
</g> xml:space="preserve"
id="flowRoot3130"
style="font-size:12px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#800000;fill-opacity:1;stroke:none;font-family:Sans"
transform="translate(-222.82792,732.12538)"
inkscape:export-filename="/srv/gargantext/static/img/logo.png"
inkscape:export-xdpi="200"
inkscape:export-ydpi="200"><flowRegion
id="flowRegion3132"><rect
id="rect3134"
width="1090.0853"
height="476.31992"
x="327.0256"
y="148.23489"
style="fill:#800000" /></flowRegion><flowPara
id="flowPara3136"
style="font-size:64px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;fill:#800000;font-family:Sawasdee;-inkscape-font-specification:Sawasdee">Gargan<flowSpan
style="font-size:72px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:Courier;-inkscape-font-specification:Courier;fill:#800000"
id="flowSpan3019">text</flowSpan></flowPara></flowRoot> <flowRoot
xml:space="preserve"
id="flowRoot3138"
style="fill:black;stroke:none;stroke-opacity:1;stroke-width:1px;stroke-linejoin:miter;stroke-linecap:butt;fill-opacity:1;font-family:Sans;font-style:normal;font-weight:normal;font-size:12px;line-height:125%;letter-spacing:0px;word-spacing:0px"
inkscape:export-filename="/srv/gargantext/static/img/logo.png"
inkscape:export-xdpi="200"
inkscape:export-ydpi="200"><flowRegion
id="flowRegion3140"><rect
id="rect3142"
width="127.96654"
height="37.916012"
x="338.87436"
y="77.142372" /></flowRegion><flowPara
id="flowPara3144" /></flowRoot> <flowRoot
xml:space="preserve"
id="flowRoot3021"
style="fill:black;stroke:none;stroke-opacity:1;stroke-width:1px;stroke-linejoin:miter;stroke-linecap:butt;fill-opacity:1;font-family:Sans;font-style:normal;font-weight:normal;font-size:12px;line-height:125%;letter-spacing:0px;word-spacing:0px"
inkscape:export-filename="/srv/gargantext/static/img/logo.png"
inkscape:export-xdpi="200"
inkscape:export-ydpi="200"><flowRegion
id="flowRegion3023"><rect
id="rect3025"
width="3.9310031"
height="24.568769"
x="739.02856"
y="423.98322" /></flowRegion><flowPara
id="flowPara3027" /></flowRoot> </g>
</svg> </svg>
...@@ -293,11 +293,6 @@ function Main_test( Data , SearchFilter ) { ...@@ -293,11 +293,6 @@ function Main_test( Data , SearchFilter ) {
// console.log(Data[i]["date"]+" : originalRecords["+arr_id+"] <- "+orig_id+" | "+Data[i]["name"]) // console.log(Data[i]["date"]+" : originalRecords["+arr_id+"] <- "+orig_id+" | "+Data[i]["name"])
} }
// $("#move2trash").prop('disabled', true);
var t0 = AjaxRecords[0].date.split("-").map(Number) var t0 = AjaxRecords[0].date.split("-").map(Number)
var t1 = AjaxRecords.slice(-1)[0].date.split("-").map(Number) var t1 = AjaxRecords.slice(-1)[0].date.split("-").map(Number)
oldest = t0; oldest = t0;
...@@ -458,6 +453,7 @@ function Main_test( Data , SearchFilter ) { ...@@ -458,6 +453,7 @@ function Main_test( Data , SearchFilter ) {
var the_content = $("#filter_search").html(); var the_content = $("#filter_search").html();
$(""+the_content).insertAfter("#dynatable-query-search-my-ajax-table") $(""+the_content).insertAfter("#dynatable-query-search-my-ajax-table")
// .insertAfter("#dynatable-query-search-my-ajax-table") // .insertAfter("#dynatable-query-search-my-ajax-table")
return "OK" return "OK"
......
This diff is collapsed.
...@@ -35,23 +35,38 @@ ...@@ -35,23 +35,38 @@
<div class="panel-body"> <div class="panel-body">
<div class="container"> <div class="container">
<ul> <ul>
<li>Version 1.0</li>
<li>Version 2.0</li>
<ul> <ul>
<li>[Start] Beta Version </li> <li>[NAME] Red Lemon</li>
<li>[Law] Licence of Gargantext is GPL v3+ </li> <li>[NLP] Turbo Parser, MELT</li>
<li>[FEATURE] Ngrams Table management</li>
<li>[FEATURE] Annotation local view</li>
<li>[FEATURE] Lexical Graph with temporal filter</li>
<li>[FEATURE] Graph bi-Partite</li>
</ul> </ul>
<li>Version 1.0.5</li>
<li>Versions from 1.0 to 1.9</li>
<ul> <ul>
<li>Bug resolution: [Import] xml zipped from Mac</li> <li>[NAME] Rose Bonbon</li>
<li>Bug resolution: [Import] french accents in filenames</li> <li>[Law] Licence of Gargantext is (and will be for next versions) AGPL</li>
<li>New features: [Advanced chart] ngrams completion</li> <li>[NLP] Turbo Parser, TreeTagger</li>
<li>New features: [Duplicates management] button to delete all duplicates</li> <li>[FEATURE] Advanced Chart</li>
<li>[FEATURE] Remove duplicates</li>
</ul> </ul>
<li>Version 1.0.6</li>
<li>Versions from 0.1 to 0.9</li>
<ul> <ul>
<li>Bug resolution: [Advanced chart] one can make comparisons with different corpora at different scales</li> <li>[NAME] Black Salade</li>
<li>Bug resolution: [Graph] Graph link can not be executed until workflow is finished.</li> <li>[Law] Licence of Gargantext is GPL v3+ </li>
<li>[NLP] NLTK, TreeTagger</li>
<li>[FEATURE] Graph Explorer</li>
</ul> </ul>
</ul> </ul>
</div> </div>
</div> </div>
...@@ -146,28 +161,52 @@ ...@@ -146,28 +161,52 @@
</div> </div>
</div> </div>
{% if sponsors %}
<div class="panel panel-default"> <div class="panel panel-default">
<div class="panel-heading"> <div class="panel-heading">
<h2 class="panel-title"> <h2 class="panel-title">
<center> <center>
<h2>Sponsors</h2> <h2>Institutional, research and financial support</h2>
<h3>Host institutions</h3>
<a href="http://www.cnrs.fr" target="_blank" > <a href="http://www.cnrs.fr" target="_blank" >
<img src="{% static "img/sponsors/cnrs.png"%}" alt="CNRS" style="height:100px"> <img src="{% static "img/sponsors/cnrs.png"%}" alt="CNRS" style="height:100px">
</a> </a>
<a href="http://www.iscpif.fr" target="_blank" > <a href="http://www.iscpif.fr" target="_blank" >
<img src="{% static "img/sponsors/iscpif.svg"%}" style="height:100px"> <img src="{% static "img/sponsors/iscpif.svg"%}" style="height:100px">
</a> </a>
{% for sponsor in sponsors %} <a href="http://cams.ehess.fr" target="_blank" >
<a href="{{ sponsor.website }}" target="_blank" > <img src="{% static "img/sponsors/cams.jpg"%}" style="height:100px">
<img src="{% static "img/sponsors/"%}{{ sponsor.picture }}" style="height:100px"> </a>
<h3>Institutional Partners</h3>
<p>
{% for institution in institutions %}
<a href="{{ institution.website }}" target="_blank" >
<img src="{% static "img/sponsors/"%}{{ institution.picture }}" style="height:100px">
</a>
{% endfor %}
</p>
<h4>Laboratory Partners</h4>
<p>
{% for labo in labos %}
<a href="{{ labo.website }}" target="_blank" >
<img src="{% static "img/sponsors/"%}{{ labo.picture }}" style="height:50px">
</a>
{% endfor %}
</p>
<h4>Grants</h4>
{% for grant in grants %}
<a href="{{ grant.website }}" target="_blank" >
<img src="{% static "img/sponsors/"%}{{ grant.picture }}" style="height:100px">
</a> </a>
{% endfor %} {% endfor %}
</center> </center>
</div> </div>
</div> </div>
</div> </div>
{% endif %}
......
This diff is collapsed.
...@@ -104,6 +104,10 @@ th a { ...@@ -104,6 +104,10 @@ th a {
<div id="filter_search" style="visibility:hidden"> <div id="filter_search" style="visibility:hidden">
<span style="font-size:70%;">
<input title="Search in Titles" type="checkbox" checked onclick="return false">TI</input>&nbsp;
<input title="Search in Abstracts" type="checkbox">AB</input>
</span>&nbsp;&nbsp;
<select id="example-single-optgroups" onchange="SearchFilters(this);"> <select id="example-single-optgroups" onchange="SearchFilters(this);">
<!-- <optgroup label=""> --> <!-- <optgroup label=""> -->
<option id="filter_all" value="filter_all">All</option> <option id="filter_all" value="filter_all">All</option>
......
...@@ -45,13 +45,7 @@ ...@@ -45,13 +45,7 @@
</ul> </ul>
'>Manage</a> '>Manage</a>
<!--
<div class="progress">
<div class="progress-bar progress-bar-striped active" role="progressbar" aria-valuenow="70" aria-valuemin="0" aria-valuemax="100" style="width: 90%">
<span class="sr-only">45% Complete</span>
</div>
</div>
--!>
{% if number == 0 %} {% if number == 0 %}
<a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.id }}/">Add documents</a></p> <a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.id }}/">Add documents</a></p>
...@@ -65,8 +59,7 @@ ...@@ -65,8 +59,7 @@
<center> <center>
<a type="button" class="btn btn-default {% if view == "documents" %}active{%endif%}" href="/project/{{project.id}}/corpus/{{ corpus.id }}/documents">{{number}} Documents</a> <a type="button" class="btn btn-default {% if view == "documents" %}active{%endif%}" href="/project/{{project.id}}/corpus/{{ corpus.id }}/documents">{{number}} Documents</a>
<a type="button" class="btn btn-default {% if view == "journals" %}active{%endif%}" href="/project/{{project.id}}/corpus/{{ corpus.id }}/journals">Journals</a> <a type="button" class="btn btn-default {% if view == "journals" %}active{%endif%}" href="/project/{{project.id}}/corpus/{{ corpus.id }}/journals">Journals</a>
{% if processing == 0 or processing == "0" %}
{% if processing == 0 %}
<a type="button" class="btn btn-default {% if view == "terms" %}active{%endif%}" href="/project/{{project.id}}/corpus/{{ corpus.id }}/terms">Terms (Bêta)</a> <a type="button" class="btn btn-default {% if view == "terms" %}active{%endif%}" href="/project/{{project.id}}/corpus/{{ corpus.id }}/terms">Terms (Bêta)</a>
{% endif %} {% endif %}
</center> </center>
...@@ -92,21 +85,24 @@ ...@@ -92,21 +85,24 @@
</div> </div>
</div> </div>
<span style="display:none;" id="process_state">{{processing}}</span>
<span style="display:none;" id="corpus_id">{{corpus.id}}</span>
<div class="col-md-6"> <div class="col-md-6">
<div class="jumbotron"> <div class="jumbotron">
{% if processing > 0 %} {% if processing == 0 or processing == "0" %}
<h3> <img width="20px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img> Networks (later)</h3> <h3> Networks </h3>
<ol> <ol>
<li>Terms</li> <li data-url="/project/{{project.id}}/corpus/{{ corpus.id }}/explorer?field1=ngrams&amp;field2=ngrams" onclick='gotoexplorer(this)'><a>Terms</a></li>
<li>Journals and Terms</li> <li data-url="/project/{{project.id}}/corpus/{{ corpus.id }}/explorer?field1=journal&amp;field2=ngrams" onclick='gotoexplorer(this)'><a>Journals and Terms</a></li>
<li>Authors and Terms</li> <li>Authors and Terms</li>
</ol> </ol>
{% else %} {% else %}
<h3> Networks </h3>
<h3><img width="20px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img> Networks </h3>
<h6>(Updating: <i id="process_id" data-since="date" >{{processing}}</i>)</h6>
<ol> <ol>
<li data-url="/project/{{project.id}}/corpus/{{ corpus.id }}/explorer?field1=ngrams&amp;field2=ngrams" onclick='gotoexplorer(this)'><a>Terms</a></li> <li>Terms</li>
<li data-url="/project/{{project.id}}/corpus/{{ corpus.id }}/explorer?field1=journal&amp;field2=ngrams" onclick='gotoexplorer(this)'><a>Journals and Terms</a></li> <li>Journals and Terms</li>
<li>Authors and Terms</li> <li>Authors and Terms</li>
</ol> </ol>
{% endif %} {% endif %}
...@@ -140,6 +136,35 @@ ...@@ -140,6 +136,35 @@
return window.open(url_,'_blank'); return window.open(url_,'_blank');
} }
var refresh_time = 10000 //ms
function corpus_monitorer() {
var url_ = "/api/corpus/"+$("#corpus_id").text()
$.ajax({
type: "GET",
url: url_,
dataType: "json",
success : function(data, textStatus, jqXHR) {
if(data["Processing"]=="0") {
window.location.reload()
} else {
$("#process_id").html(data["Processing"]+"...")
}
},
error: function(exception) {
console.log("exception!:"+exception.status)
}
});
}
if( $("#process_state").text()=="0" ) {
// workflow : finished!
} else {
setInterval(corpus_monitorer ,refresh_time);
}
</script> </script>
......
...@@ -43,6 +43,9 @@ tr:hover { ...@@ -43,6 +43,9 @@ tr:hover {
.table-hover tbody tr:hover td, .table-hover tbody tr:hover th { .table-hover tbody tr:hover td, .table-hover tbody tr:hover th {
background-color: #F5A9A9; background-color: #F5A9A9;
}*/ }*/
.normal {
color: black;
}
.delete { .delete {
color:red; color:red;
...@@ -69,15 +72,15 @@ tr:hover { ...@@ -69,15 +72,15 @@ tr:hover {
border: 1px solid yellow; border: 1px solid yellow;
} }
#group_flag {
}
.dynatable-record-count { .dynatable-record-count {
font-size: 0.7em; font-size: 0.7em;
} }
.dynatable-pagination-links { .dynatable-pagination-links {
font-size: 0.7em; font-size: 0.7em;
} }
input[type=radio] {
display:none;
}
input[type=radio] + label { input[type=radio] + label {
display:inline-block; display:inline-block;
...@@ -124,6 +127,13 @@ input[type=radio]:checked + label { ...@@ -124,6 +127,13 @@ input[type=radio]:checked + label {
{% block content %} {% block content %}
<div id="content_loader">
<br>
<center>
<img width="10%" src="{% static "img/ajax-loader.gif"%}"></img>
</center>
<br>
</div>
<div class="container"> <div class="container">
<div class="container"> <div class="container">
...@@ -179,11 +189,10 @@ input[type=radio]:checked + label { ...@@ -179,11 +189,10 @@ input[type=radio]:checked + label {
</table> </table>
</p> --> </p> -->
<p align="right"> <p align="right">
<button id="Clean_All" class="btn btn-warning">Clean</button> <!-- <button id="Clean_All" class="btn btn-warning">Clean</button> -->
<button id="Save_All" class="btn btn-primary">Save</button> <button id="Save_All" class="btn btn-primary">Save</button>
</p> </p>
</div> </div>
</div> </div>
</div> </div>
...@@ -192,32 +201,65 @@ input[type=radio]:checked + label { ...@@ -192,32 +201,65 @@ input[type=radio]:checked + label {
</div> </div>
<div id="savemodal" class="modal fade"> <div id="corpuses" class="modal fade">
<div class="modal-dialog"> <div class="modal-dialog">
<div class="modal-content"> <div class="modal-content">
<div class="modal-header"> <div class="modal-header">
<button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button> <button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>
<h4 class="modal-title">Group NGrams</h4> <h3 class="modal-title">Adding a list from another corpus</h3>
</div> </div>
<div class="modal-body form-horizontal"> <div class="modal-body form-horizontal">
Do you want to merge this elements before continuing?:
<div id="to_group"></div> <div class="form-inline">
<label class="control-label">
Which list do you want?</label>
<label class="radio">
<input value="miam" name="whichlist" disabled type="radio">MiamList
</label>
<label class="radio">
<input value="stop" name="whichlist" checked type="radio">StopList
</label>
</div> </div>
<h4>Choose one corpus:</h4>
<div style="color:red;" id="selected_corpus"></div>
<div id="user_portfolio"></div>
<div class="modal-footer"> <div class="modal-footer">
<button id="closesavemodal" type="button" class="btn btn-default" data-dismiss="modal">Close</button> <button id="closecorpuses" type="button" class="btn btn-default" data-dismiss="modal">Close</button>
<button type="button" class="btn btn-primary" onclick="GroupNGrams();">Save</button> <button id="add_corpus_tab" type="button" class="btn btn-primary" disabled onclick='printCorpuses();'>Add Tab</button>
</div> </div>
</div> </div>
</div> </div>
</div> </div>
</div>
<div id="filter_search" style="visibility:hidden">
<select id="example-single-optgroups" onchange="SearchFilters(this);">
<!-- <optgroup label=""> -->
<option id="filter_all" value="filter_all">All</option>
<!-- <option id="filter_title" value="filter_title">Title</option> -->
<!-- <option id="filter_date" value="filter_date">Date</option> -->
<!-- </optgroup> -->
<!-- <optgroup label="Duplicates"> -->
<!-- <option value="filter_doi">By DOI</option> -->
<option id="filter_map-list" value="filter_map-list">Map-List</option>
<option id="filter_stop-list" value="filter_stop-list">Stop-List</option>
<!-- </optgroup> -->
</select>
<button id="ImportList" onclick="GetUserPortfolio(); $('#corpuses').modal('show');" class="btn btn-warning">Import a Corpus-List</button>
</div>
<script type="text/javascript" src="{% static "js/jquery/jquery.min.js" %}"></script> <script type="text/javascript" src="{% static "js/jquery/jquery.min.js" %}"></script>
<script src="{% static "js/charts/bootstrap.min.js" %}"></script> <script src="{% static "js/charts/bootstrap.min.js" %}"></script>
<script src="{% static "js/libs/jquery/jquery.ba-dotimeout.min.js" %}" type="text/javascript"></script> <script src="{% static "js/libs/jquery/jquery.ba-dotimeout.min.js" %}" type="text/javascript"></script>
......
...@@ -136,6 +136,7 @@ ...@@ -136,6 +136,7 @@
<li> <li>
<a> <a>
<div id="graphid" style="visibility: hidden;">{{graphfile}}</div> <div id="graphid" style="visibility: hidden;">{{graphfile}}</div>
<input type="hidden" id="list_id" value="{{ list_id }}"></input>
<div id="jquerytemplatenb" style="visibility: hidden;">{{user.id}}</div> <div id="jquerytemplatenb" style="visibility: hidden;">{{user.id}}</div>
</a> </a>
</li> </li>
...@@ -181,8 +182,6 @@ ...@@ -181,8 +182,6 @@
</ul> </ul>
<ul class="nav navbar-nav navbar-right"> <ul class="nav navbar-nav navbar-right">
<li><a> <li><a>
<input type="checkbox" id="checkboxdiv" onclick="alertCheckBox(this);">Add</input> <input type="checkbox" id="checkboxdiv" onclick="alertCheckBox(this);">Add</input>
...@@ -194,7 +193,18 @@ ...@@ -194,7 +193,18 @@
</a></li> </a></li>
</ul> </ul>
<ul class="nav navbar-nav navbar-right">
<li>
<a>
<img width="17%" title="Compare with other corpus!" onclick="GetUserPortfolio(); $('#corpuses').modal('show');" src="{% static "js/libs/img2/INTER.png" %}"></img>
</a>
</li>
</ul>
<div class="colorgraph_div"></div> <div class="colorgraph_div"></div>
<div class="sizegraph_div"></div>
<!----> <!---->
...@@ -287,6 +297,7 @@ ...@@ -287,6 +297,7 @@
<div id="leftcolumn"> <div id="leftcolumn">
<div id="tips"></div> <div id="tips"></div>
<div id="names"></div> <div id="names"></div>
<div id="ngrams_actions"></div>
<br> <br>
...@@ -314,7 +325,7 @@ ...@@ -314,7 +325,7 @@
<ul class='etabs'> <ul class='etabs'>
<li id="tabmed" class='tab active'><a href="#tabs3">Medline Pubs</a></li> <li id="tabmed" class='tab active'><a href="#tabs3">Medline Pubs</a></li>
<li id="tabgps" class='tab'><a onclick="$('#corpuses').modal('show');">+</a></li> <li id="tabgps" class='tab'><a href="#tabs3"></a></li>
</ul> </ul>
<div class='panel-container'> <div class='panel-container'>
......
...@@ -19,14 +19,23 @@ ...@@ -19,14 +19,23 @@
<div class="col-md-4 content"> <div class="col-md-4 content">
<h1>Gargantext</h1> <h1>Gargantext</h1>
<p>A web platform to explore text-mining</p> <p>A web platform to explore text-mining</p>
<a class="btn btn-primary btn-lg" href="/projects" title="Click and test by yourself">Test Gargantext</a> <a class="btn btn-primary btn-lg" href="/projects" title="Click and test by yourself">Test Gargantext
</a>
<p>
<span class="glyphicon glyphicon-warning-sign" aria-hidden="true"></span>
<small>
<i>
Some features may not work without a javascript optimized browser (Chromium for instance).
</i>
</small>
</p>
</div> </div>
<div class="col-md-2 content"></div> <div class="col-md-2 content"></div>
<div class="col-md-2 content"></div> <div class="col-md-2 content"></div>
<div class="col-md-2 content"> <div class="col-md-2 content">
<p class="right"> <p class="right">
<div style="border:15px"> <div style="border:15px">
<img src="{% static "img/logo.png"%}" title="Logo designed by anoe" style="100px; height:150px; border:3px solid white"> <img src="{% static "img/logo.png"%}" title="Logo designed by dacha and anoe" style="100px; height:150px; border:3px solid white">
</div> </div>
</p> </p>
</div> </div>
......
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
<div class="navbar-collapse collapse"> <div class="navbar-collapse collapse">
<ul class="nav navbar-nav"> <ul class="nav navbar-nav">
<!-- <li><a href="/admin/">Admin/</a></li> --!>
<li><a href="/about/" title="More informations about the project, its sponsors and its authors.">About</a> <li><a href="/about/" title="More informations about the project, its sponsors and its authors.">About</a>
</li> </li>
{% if user.is_authenticated %} {% if user.is_authenticated %}
...@@ -75,7 +74,7 @@ ...@@ -75,7 +74,7 @@
<hr> <hr>
<footer> <footer>
<p>Gargantext, version 1.0.6, <a href="http://www.cnrs.fr" target="blank" title="Institution that enables this project.">Copyrights CNRS {{ date.year }}</a>, <p>Gargantext, version 2.0, <a href="http://www.cnrs.fr" target="blank" title="Institution that enables this project.">Copyrights CNRS {{ date.year }}</a>,
<a href="http://www.gnu.org/licenses/agpl-3.0.html" target="blank" title="Legal instructions of the project.">Licence aGPLV3</a>.</p> <a href="http://www.gnu.org/licenses/agpl-3.0.html" target="blank" title="Legal instructions of the project.">Licence aGPLV3</a>.</p>
</footer> </footer>
......
...@@ -21,6 +21,13 @@ ...@@ -21,6 +21,13 @@
{ font-size:x-small;} { font-size:x-small;}
</style> </style>
<script type="text/javascript">
</script>
{% endblock %} {% endblock %}
...@@ -266,7 +273,10 @@ ...@@ -266,7 +273,10 @@
success: function(data) { success: function(data) {
console.log("in doTheQuery() Ajax.Success:") console.log("in doTheQuery() Ajax.Success:")
console.log(data) console.log(data)
setTimeout(
function() {
location.reload(); location.reload();
}, 3000);
}, },
error: function(result) { error: function(result) {
console.log("in doTheQuery(). Data not found"); console.log("in doTheQuery(). Data not found");
...@@ -333,6 +343,7 @@ ...@@ -333,6 +343,7 @@
xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken")); xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
}, },
success: function(data) { success: function(data) {
console.log("SUCCESS")
console.log("in getGlobalResults") console.log("in getGlobalResults")
console.log(data) console.log(data)
console.log("enabling "+"#"+value.id) console.log("enabling "+"#"+value.id)
...@@ -349,12 +360,15 @@ ...@@ -349,12 +360,15 @@
$('#submit_thing').prop('disabled', false); $('#submit_thing').prop('disabled', false);
} else { } else {
$("#theresults").html("<i> <b>"+pubmedquery+"</b>: No results!.</i><br>") $("#theresults").html("<i> <b>"+pubmedquery+"</b>: No results!.</i><br>")
if(data[0]==false)
$("#theresults").html("Pubmed connection error!</i><br>")
$('#submit_thing').prop('disabled', true); $('#submit_thing').prop('disabled', true);
} }
}, },
error: function(result) { error: function(result) {
console.log("Data not found"); $("#theresults").html("Pubmed connection error!</i><br>")
$('#submit_thing').prop('disabled', true);
} }
}); });
} }
...@@ -501,7 +515,10 @@ ...@@ -501,7 +515,10 @@
success: function(data) { success: function(data) {
console.log("ajax_success: in testISTEX()") console.log("ajax_success: in testISTEX()")
console.log(data) console.log(data)
setTimeout(
function() {
location.reload(); location.reload();
}, 5000);
}, },
error: function(result) { error: function(result) {
console.log("in testISTEX(). Data not found"); console.log("in testISTEX(). Data not found");
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
<link rel="stylesheet" href="{% static "css/bootstrap.css" %}"> <link rel="stylesheet" href="{% static "css/bootstrap.css" %}">
<script src="{% static "js/jquery/jquery.min.js" %}" type="text/javascript"></script> <script src="{% static "js/jquery/jquery.min.js" %}" type="text/javascript"></script>
{% endblock %} {% endblock %}
...@@ -43,7 +44,7 @@ ...@@ -43,7 +44,7 @@
data-content=' data-content='
<ul> <ul>
<li> Rename </li> <li> Rename </li>
<li> Add new corpus </li> <li><a href="/project/{{ project.id }}">Add new corpus</a></li>
<li><a href="/delete/{{ project.id }}">Delete</a></li> <li><a href="/delete/{{ project.id }}">Delete</a></li>
</ul> </ul>
'>Manage</button> '>Manage</button>
......
# Without this, we couldn't use the Django environment
from admin.env import *
from ngram.stemLem import *
from ngram.lists import *
#user = session.query(User).all()[0]
user = session.query(User).filter(User.username=='alexandre').first()
print('Current user is:', user.username)
project = session.query(Node).filter(Node.name == 'Test').first()
if project is None:
project = Node(
name = 'Test',
type_id = cache.NodeType['Project'].id,
user_id = user.id
)
session.add(project)
session.commit()
#corpora = session.query(Node).filter(Node.parent_id == project.id,
# Node.type_id == cache.NodeType['Corpus'].id
# ).delete()
#
#models.Node.objects(parent_id = project.id, type_id = cache.NodeType['Corpus']).all().delete()
#
corpus = session.query(Node).filter(Node.parent_id == project.id,
Node.type_id == cache.NodeType['Corpus'].id).first()
print('Corpus is', corpus)
if corpus is None:
corpus = Node(
parent_id = project.id,
name = 'Test Corpus',
type_id = cache.NodeType['Corpus'].id,
user_id = user.id
)
session.add(corpus)
session.commit()
add_resource(corpus,
file = '/srv/gargantext_lib/data_samples/pubmed.zip',
# #file = '/srv/gargantext_lib/data_samples/pubmed_2013-04-01_HoneyBeesBeeBees.xml',
type_id = cache.ResourceType['Pubmed (xml format)'].id,
)
parse_resources(corpus)
extract_ngrams(corpus, ('title', 'abstract'))
compute_tfidf(corpus)
doc_id = session.query(Node.id).filter(Node.parent_id == corpus.id,
Node.type_id == cache.NodeType['Document'].id).all()[2]
print('Miam list', listIds(typeList='MiamList', corpus_id=corpus.id, user_id=user.id)[0][0])
# Stemming the corpus
print('Working on corpus:', corpus.id, corpus.name)
stem_id = stem_corpus(corpus_id=corpus.id)
print('Stem Node.id is', stem_id)
#for typeList in ['MiamList', 'StopList', 'MainList', 'Group']:
# n = listIds(user_id=user.id,
# corpus_id=corpus.id,
# typeList=typeList)
# #print(n[0][0])
# print('Test having list_id')
# print(n, listNgramIds(list_id=n[0][0])[:3])
#
stop_list_id = listIds(user_id=user.id,
corpus_id=corpus.id,
typeList='StopList')[0][0]
miam_list_id = listIds(user_id=user.id,
corpus_id=corpus.id,
typeList='MiamList')[0][0]
print('StopList', stop_list_id)
print('MiamList', miam_list_id)
print(session.query(Node.id).filter(Node.parent_id==corpus.id, Node.type_id==cache.NodeType['WhiteList'].id).first())
#ngrams2miam(user_id=user.id, corpus_id=corpus.id)
doc_ngram_list = listNgramIds(corpus_id=corpus.id, doc_id=doc_id, user_id=user.id)
print(doc_ngram_list)
#print(listNgramIds(list_id=stop_list_id, user_id=user.id, corpus_id=corpus.id))
#type_list='MiamList'
#try:
# d = doList(type_list=type_list, user_id = user.id, corpus_id = corpus.id, limit=150)
## print('Size of the ' + type_list + ' list:',
## session.query(NodeNgram).filter(NodeNgram.node_id == d).count()
## )
#except:
# PrintException()
##
#print(listNgramIds(list_id=miam_list_id, user_id=user.id, corpus_id=corpus.id))
#
#ngram_id = listNgramIds(list_id=miam_list_id, user_id=user.id, corpus_id=corpus.id)[0][0]
#print('ngram_id', ngram_id)
#
#ngramList(do='add', ngram_ids=[ngram_id,], list_id=stop_list_id)
# print('Test having typeList and corpus.id')
# print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, user_id=user.id)[:3])
##
# print('Test having typeList and corpus.id and doc_id')
# print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, doc_id=doc_id, user_id=user.id)[:3])
import threading
from queue import Queue
# import time
import random
from gargantext_web.db import session, Node_Ngram
class ChunkedSELECTS:
def __init__(self):
self.q = Queue()
self.firstResults = []
self.lock = threading.Lock() # lock to serialize console output
self.ngrams_dict = {}
def worker_sql_action(self , docs_list):
data = {}
for d in docs_list:
# this_ngrams = session.query(Node_Ngram.ngram_id).filter( Node_Ngram.node_id==d).all()
this_ngrams = session.query(Node_Ngram.ngram_id,Node_Ngram.weight).filter( Node_Ngram.node_id==d).all()
filtered_ngrams = []
for n in this_ngrams:
if n[0] in self.ngrams_dict:
# filtered_ngrams.append( n[0] )
filtered_ngrams.append( [ n[0] , int(n[1]) ] )
data[d] = filtered_ngrams
with self.lock:
# print(threading.current_thread().name, str(len(docs_list))+" OK")
return data
def worker_sql(self):
while True:
item = self.q.get()
results = []
try:
result = self.worker_sql_action(item)
except:
result = False
self.firstResults.append(result)
self.q.task_done()
def chunks(self , l , n):
for i in range(0, len(l), n):
yield l[i:i+n]
...@@ -56,43 +56,6 @@ from rest_v1_0.api import JsonHttpResponse ...@@ -56,43 +56,6 @@ from rest_v1_0.api import JsonHttpResponse
from ngram.lists import listIds, listNgramIds, ngramList , doList from ngram.lists import listIds, listNgramIds, ngramList , doList
def test_page(request , project_id , corpus_id):
if not request.user.is_authenticated():
return redirect('/login/?next=%s' % request.path)
try:
offset = int(project_id)
offset = int(corpus_id)
except ValueError:
raise Http404()
t = get_template('tests/test_select-boostrap.html')
user = cache.User[request.user.username].id
date = datetime.datetime.now()
project = cache.Node[int(project_id)]
corpus = cache.Node[int(corpus_id)]
type_doc_id = cache.NodeType['Document'].id
number = session.query(func.count(Node.id)).filter(Node.parent_id==corpus_id, Node.type_id==type_doc_id).all()[0][0]
try:
processing = corpus.hyperdata['Processing']
except Exception as error:
print(error)
processing = 0
html = t.render(Context({
'debug': settings.DEBUG,
'user': request.user.username,
'date': date,
'project': project,
'corpus' : corpus,
'processing' : processing,
'number' : number,
}))
return HttpResponse(html)
def get_ngrams(request , project_id , corpus_id ): def get_ngrams(request , project_id , corpus_id ):
if not request.user.is_authenticated(): if not request.user.is_authenticated():
return redirect('/login/?next=%s' % request.path) return redirect('/login/?next=%s' % request.path)
...@@ -122,7 +85,7 @@ def get_ngrams(request , project_id , corpus_id ): ...@@ -122,7 +85,7 @@ def get_ngrams(request , project_id , corpus_id ):
html = t.render(Context({ html = t.render(Context({
'debug': settings.DEBUG, 'debug': settings.DEBUG,
'user': request.user.username, 'user': request.user,
'date': date, 'date': date,
'project': project, 'project': project,
'corpus' : corpus, 'corpus' : corpus,
...@@ -133,32 +96,6 @@ def get_ngrams(request , project_id , corpus_id ): ...@@ -133,32 +96,6 @@ def get_ngrams(request , project_id , corpus_id ):
return HttpResponse(html) return HttpResponse(html)
def get_stoplist(request , corpus_id , doc_id):
"""Get All for a doc id"""
user_id = request.user.id
whitelist_type_id = cache.NodeType['WhiteList'].id
document_type_id = cache.NodeType['Document'].id
miam_id = listIds(typeList='MiamList', user_id=request.user.id, corpus_id=corpus_id)[0][0]
count_min = 2
size = 1000
corpus_id = int(corpus_id)
lists = dict()
for list_type in ['StopList']:
list_id = list()
list_id = listIds(user_id=request.user.id, corpus_id=int(corpus_id), typeList=list_type)
lists["%s" % list_id[0][0]] = list_type
doc_ngram_list = listNgramIds(corpus_id=corpus_id, list_id=list_id[0][0], doc_id=list_id[0][0], user_id=request.user.id)
StopList = {}
for n in doc_ngram_list:
StopList[ n[0] ] = True
results = StopList.keys() #[ "hola" , "mundo" ]
return JsonHttpResponse(StopList)
def get_journals(request , project_id , corpus_id ): def get_journals(request , project_id , corpus_id ):
if not request.user.is_authenticated(): if not request.user.is_authenticated():
...@@ -187,7 +124,7 @@ def get_journals(request , project_id , corpus_id ): ...@@ -187,7 +124,7 @@ def get_journals(request , project_id , corpus_id ):
html = t.render(Context({ html = t.render(Context({
'debug': settings.DEBUG, 'debug': settings.DEBUG,
'user': request.user.username, 'user': request.user,
'date': date, 'date': date,
'project': project, 'project': project,
'corpus' : corpus, 'corpus' : corpus,
...@@ -216,115 +153,32 @@ def get_journals_json(request , project_id, corpus_id ): ...@@ -216,115 +153,32 @@ def get_journals_json(request , project_id, corpus_id ):
from gargantext_web.db import session, cache, Node, NodeNgram from gargantext_web.db import session, cache, Node, NodeNgram
from sqlalchemy import or_, func from sqlalchemy import or_, func
from sqlalchemy.orm import aliased from sqlalchemy.orm import aliased
def get_ngrams_json(request , project_id, corpus_id ):
results = ["holaaaa" , "mundo"]
user_id = request.user.id
whitelist_type_id = cache.NodeType['WhiteList'].id def get_corpuses( request , node_ids ):
document_type_id = cache.NodeType['Document'].id ngrams = [int(i) for i in node_ids.split("+") ]
miam_id = listIds(typeList='MiamList', user_id=request.user.id, corpus_id=corpus_id)[0][0] results = session.query(Node.id,Node.hyperdata).filter(Node.id.in_(ngrams) ).all()
count_min = 2 for r in results:
size = 1000 print(r)
return JsonHttpResponse( [ "tudo" , "bem" ] )
corpus_id = int(corpus_id)
lists = dict()
for list_type in ['StopList']: def get_cores( request ):
list_id = list() import multiprocessing
list_id = listIds(user_id=request.user.id, corpus_id=int(corpus_id), typeList=list_type) cpus = multiprocessing.cpu_count()
lists["%s" % list_id[0][0]] = list_type return JsonHttpResponse( {"data":cpus} )
doc_ngram_list = listNgramIds(corpus_id=corpus_id, list_id=list_id[0][0], doc_id=list_id[0][0], user_id=request.user.id)
StopList = {}
for n in doc_ngram_list: def get_corpus_state( request , corpus_id ):
StopList[ n[0] ] = True if not request.user.is_authenticated():
return JsonHttpResponse( {"request" : "forbidden"} )
processing = ["Waiting"]
# [ Get Uniq_Occs ] the_query = """ SELECT hyperdata FROM node_node WHERE id=%d """ % ( int(corpus_id) )
myamlist_type_id = cache.NodeType['MiamList'].id cursor = connection.cursor()
myamlist = session.query(Node).filter(Node.user_id == user_id , Node.parent_id==corpus_id , Node.type_id == myamlist_type_id ).first() try:
myamlists = session.query(Node).filter(Node.user_id == user_id , Node.parent_id==corpus_id , Node.type_id == myamlist_type_id ).all() cursor.execute(the_query)
# sql_average = """SELECT avg(weight) as Average FROM node_node_ngram WHERE node_node_ngram.node_id=%d""" % (myamlist.id) processing = cursor.fetchone()[0]
# cursor = connection.cursor() finally:
# cursor.execute(sql_average) connection.close()
# avg_result = cursor.fetchone()[0] # processing = corpus.hyperdata['Processing']
# threshold = min (10 , math.sqrt(avg_result) ) return JsonHttpResponse( processing )
\ No newline at end of file
# OCCs = session.query(Node_Ngram).filter( Node_Ngram.node_id==myamlist.id , Node_Ngram.weight >= threshold ).all()
# [ / Get Uniq_Occs ]
Miam = aliased(NodeNgram)
sql_average = (session.query(NodeNgram.ngram_id, func.sum(NodeNgram.weight))
.join(Node, Node.id == NodeNgram.node_id)
.join(Miam, Miam.ngram_id == NodeNgram.ngram_id)
.filter(Node.parent_id == corpus_id, Node.type_id==cache.NodeType['Document'].id)
.filter(Miam.node_id==myamlist.id)
.group_by(NodeNgram.ngram_id)
.all()
)
# print([n for n in sql_average])
OCCs = {}
for ngram in sql_average:
OCCs [ ngram[0] ] = ngram[1]
# [ Initializing Ngrams_Scores with occ_uniq ]
Ngrams_Scores = {}
for ngram in OCCs:
if ngram not in StopList:
if ngram not in Ngrams_Scores:
Ngrams_Scores[ngram] = {}
Ngrams_Scores[ngram]["scores"] = {
"occ_uniq": round(OCCs[ngram]),
"tfidf_sum": 0.0
}
# [ / Initializing Ngrams_Scores with occ_uniq ]
# [ Getting TF-IDF scores (sum per each ngram) ]
NgramTFIDF = session.query(NodeNodeNgram).filter( NodeNodeNgram.nodex_id==corpus_id ).all()
for ngram in NgramTFIDF:
if ngram.ngram_id not in StopList:
if ngram.ngram_id in Ngrams_Scores:
Ngrams_Scores[ngram.ngram_id]["scores"]["tfidf_sum"] += ngram.score
# [ / Getting TF-IDF scores ]
# [ Preparing JSON-Array full of Scores! ]
Metrics = {
"ngrams":[],
"scores": {}
}
ngrams_ids = Ngrams_Scores.keys()
query = session.query(Ngram).filter(Ngram.id.in_( ngrams_ids ))
ngrams_data = query.all()
for ngram in ngrams_data:
if ngram.id not in StopList:
occ_uniq = occ_uniq = Ngrams_Scores[ngram.id]["scores"]["occ_uniq"]
Ngrams_Scores[ngram.id]["name"] = ngram.terms
Ngrams_Scores[ngram.id]["id"] = ngram.id
Ngrams_Scores[ngram.id]["scores"]["tfidf"] = Ngrams_Scores[ngram.id]["scores"]["tfidf_sum"] / occ_uniq
del Ngrams_Scores[ngram.id]["scores"]["tfidf_sum"]
Metrics["ngrams"].append( Ngrams_Scores[ngram.id] )
Metrics["scores"] = {
"initial":"occ_uniq",
"nb_docs":1,
"orig_nb_ngrams":1,
"nb_ngrams":len(Metrics["ngrams"]),
# "occs_threshold":threshold
}
# [ / Preparing JSON-Array full of Scores! ]
# print("miamlist:",myamlist.id)
# print("sql avg:",sql_average)
# print (avg_result)
# print ("LALALALALALALALLLALALALALA")
return JsonHttpResponse(Metrics)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment