Commit b82a67ad authored by PkSM3's avatar PkSM3

[UPDATE] pull unstable and explorer view OK

parents 6b2cbdd1 7af355a0
...@@ -25,7 +25,7 @@ def get_session(): ...@@ -25,7 +25,7 @@ def get_session():
from aldjemy.core import get_engine from aldjemy.core import get_engine
alias = 'default' alias = 'default'
connection = connections[alias] connection = connections[alias]
engine = create_engine("postgresql+psycopg2://alexandre:C8kdcUrAQy66U@localhost/gargandb", engine = create_engine("postgresql+psycopg2://gargantua:C8kdcUrAQy66U@localhost/gargandb",
use_native_hstore=True) use_native_hstore=True)
Session = sessionmaker(bind=engine) Session = sessionmaker(bind=engine)
return Session() return Session()
......
...@@ -249,35 +249,3 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150 ...@@ -249,35 +249,3 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150
#print(data) #print(data)
return data return data
from analysis.tfidf import tfidf
def do_tfidf(corpus, reset=True):
# print("=========== doing tfidf ===========")
with transaction.atomic():
if reset==True:
NodeNodeNgram.objects.filter(nodex=corpus).delete()
if isinstance(corpus, Node) and corpus.type.name == "Corpus":
# print("\n- - - - - - - - - - ")
# # for i in Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")):
for document in Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")):
# print("the doc:",document)
for node_ngram in Node_Ngram.objects.filter(node=document):
# print("\tngram:",node_ngram.ngram)
try:
nnn = NodeNodeNgram.objects.get(nodex=corpus, nodey=document, ngram=node_ngram.ngram)
# print("\t\tTRY")
except:
score = tfidf(corpus, document, node_ngram.ngram)
nnn = NodeNodeNgram(nodex=corpus, nodey=node_ngram.node, ngram=node_ngram.ngram, score=score)
nnn.save()
# print("\t\t",node_ngram.ngram," : ",score)
# print("- - - - - - - - - - \n")
else:
print("Only corpus implemented yet, you put instead:", type(corpus))
...@@ -21,7 +21,7 @@ def get_session(): ...@@ -21,7 +21,7 @@ def get_session():
from aldjemy.core import get_engine from aldjemy.core import get_engine
alias = 'default' alias = 'default'
connection = connections[alias] connection = connections[alias]
engine = create_engine("postgresql+psycopg2://alexandre:C8kdcUrAQy66U@localhost/gargandb", engine = create_engine("postgresql+psycopg2://gargantua:C8kdcUrAQy66U@localhost/gargandb",
use_native_hstore=True) use_native_hstore=True)
Session = sessionmaker(bind=engine) Session = sessionmaker(bind=engine)
return Session() return Session()
......
#from .celery import app as async_app
...@@ -11,9 +11,9 @@ from sqlalchemy import text, distinct ...@@ -11,9 +11,9 @@ from sqlalchemy import text, distinct
from sqlalchemy.sql import func from sqlalchemy.sql import func
from sqlalchemy.orm import aliased from sqlalchemy.orm import aliased
from gargantext_web.views import move_to_trash
from .db import * from .db import *
from node import models
def DebugHttpResponse(data): def DebugHttpResponse(data):
return HttpResponse('<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), )) return HttpResponse('<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), ))
...@@ -47,10 +47,14 @@ _ngrams_order_columns = { ...@@ -47,10 +47,14 @@ _ngrams_order_columns = {
} }
from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from rest_framework.permissions import IsAuthenticated
from rest_framework.views import APIView from rest_framework.views import APIView
from rest_framework.response import Response from rest_framework.response import Response
from rest_framework.exceptions import APIException as _APIException from rest_framework.exceptions import APIException as _APIException
class APIException(_APIException): class APIException(_APIException):
def __init__(self, message, code=500): def __init__(self, message, code=500):
self.status_code = code self.status_code = code
...@@ -200,7 +204,7 @@ class NodesChildrenDuplicates(APIView): ...@@ -200,7 +204,7 @@ class NodesChildrenDuplicates(APIView):
count = len(duplicate_nodes) count = len(duplicate_nodes)
for node in duplicate_nodes: for node in duplicate_nodes:
print("deleting node ",node.id) print("deleting node ",node.id)
node.delete() move_to_trash(node.id)
# print(delete_query) # print(delete_query)
# # delete_query.delete(synchronize_session=True) # # delete_query.delete(synchronize_session=True)
# session.flush() # session.flush()
...@@ -552,11 +556,13 @@ class NodesChildrenQueries(APIView): ...@@ -552,11 +556,13 @@ class NodesChildrenQueries(APIView):
class NodesList(APIView): class NodesList(APIView):
authentication_classes = (SessionAuthentication, BasicAuthentication)
def get(self, request): def get(self, request):
print("user id : " + str(request.user))
query = (session query = (session
.query(Node.id, Node.name, NodeType.name.label('type')) .query(Node.id, Node.name, NodeType.name.label('type'))
.filter(Node.user_id == request.session._session_cache['_auth_user_id']) .filter(Node.user_id == int(request.user.id))
.join(NodeType) .join(NodeType)
) )
if 'type' in request.GET: if 'type' in request.GET:
...@@ -579,6 +585,8 @@ class Nodes(APIView): ...@@ -579,6 +585,8 @@ class Nodes(APIView):
return JsonHttpResponse({ return JsonHttpResponse({
'id': node.id, 'id': node.id,
'name': node.name, 'name': node.name,
'parent_id': node.parent_id,
'type': cache.NodeType[node.type_id].name,
# 'type': node.type__name, # 'type': node.type__name,
#'metadata': dict(node.metadata), #'metadata': dict(node.metadata),
'metadata': node.metadata, 'metadata': node.metadata,
...@@ -589,13 +597,19 @@ class Nodes(APIView): ...@@ -589,13 +597,19 @@ class Nodes(APIView):
# it should take the subnodes into account as well, # it should take the subnodes into account as well,
# for better constistency... # for better constistency...
def delete(self, request, node_id): def delete(self, request, node_id):
user = request.user
node = session.query(Node).filter(Node.id == node_id).first() node = session.query(Node).filter(Node.id == node_id).first()
msgres = ""
msgres = str()
try: try:
node.delete()
msgres = node_id+" deleted!" move_to_trash(node_id)
except: msgres = node_id+" moved to Trash"
msgres ="error deleting: "+node_id
except Exception as error:
msgres ="error deleting : " + node_id + str(error)
return JsonHttpResponse({ return JsonHttpResponse({
'deleted': msgres, 'deleted': msgres,
...@@ -611,7 +625,7 @@ class CorpusController: ...@@ -611,7 +625,7 @@ class CorpusController:
raise ValidationError('Corpora are identified by an integer.', 400) raise ValidationError('Corpora are identified by an integer.', 400)
corpusQuery = session.query(Node).filter(Node.id == corpus_id).first() corpusQuery = session.query(Node).filter(Node.id == corpus_id).first()
# print(str(corpusQuery)) # print(str(corpusQuery))
# raise Http404("C'est toujours ça de pris.") # raise Http404("404 error.")
if not corpusQuery: if not corpusQuery:
raise Http404("No such corpus: %d" % (corpus_id, )) raise Http404("No such corpus: %d" % (corpus_id, ))
corpus = corpusQuery.first() corpus = corpusQuery.first()
......
# -*- coding: utf-8 -*-
#import os
#import djcelery
#
#from celery import Celery
#
#from django.conf import settings
#
## set the default Django settings module for the 'celery' program.
#os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'gargantext_web.settings')
#
#app = Celery('gargantext_web')
#
#
#app.conf.update(
# CELERY_RESULT_BACKEND='djcelery.backends.database:DatabaseBackend',
#)
#
#
#app.conf.update(
# CELERY_RESULT_BACKEND='djcelery.backends.cache:CacheBackend',
#)
#
## Using a string here means the worker will not have to
## pickle the object when using Windows.
##app.config_from_object('django.conf:settings')
#app.autodiscover_tasks(lambda: settings.INSTALLED_APPS)
#
from celery import shared_task
from node import models
#@app.task(bind=True)
@shared_task
def debug_task(request):
print('Request: {0!r}'.format(request))
from gargantext_web.db import session, Node
@shared_task
def apply_sum(x, y):
print(x+y)
print(session.query(Node.name).first())
from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf
@shared_task
def apply_workflow(corpus_id):
corpus = session.query(Node).filter(Node.id==corpus_id).first()
parse_resources(corpus)
try:
print("-" *60)
# With Django ORM
corpus_django = models.Node.objects.get(id=corpus_id)
corpus_django.metadata['Processing'] = 0
corpus_django.save()
print("-" *60)
#TODO With SLA ORM (KO why?)
# corpus.metadata['Processing'] = 0
# session.add(corpus)
# session.flush()
except Exception as error:
print(error)
extract_ngrams(corpus, ['title'])
compute_tfidf(corpus)
...@@ -2,6 +2,7 @@ from gargantext_web import settings ...@@ -2,6 +2,7 @@ from gargantext_web import settings
from node import models from node import models
__all__ = ['literalquery', 'session', 'cache', 'Session', 'bulk_insert', 'engine', 'get_cursor'] __all__ = ['literalquery', 'session', 'cache', 'Session', 'bulk_insert', 'engine', 'get_cursor']
...@@ -56,6 +57,23 @@ for model_name, model in models.__dict__.items(): ...@@ -56,6 +57,23 @@ for model_name, model in models.__dict__.items():
NodeNgram = Node_Ngram NodeNgram = Node_Ngram
NodeResource = Node_Resource NodeResource = Node_Resource
# manually declare the Node table...
from datetime import datetime
from sqlalchemy.types import *
from sqlalchemy.schema import Column, ForeignKey
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import relationship, aliased
# class Node(Base):
# __tablename__ = 'node_node'
# id = Column(Integer, primary_key=True)
# user_id = Column(Integer, ForeignKey('auth_user.id', ondelete='CASCADE'), index=True, nullable=False)
# type_id = Column(Integer, ForeignKey('node_nodetype.id', ondelete='CASCADE'), index=True, nullable=False)
# name = Column(String(255))
# language_id = Column(Integer, ForeignKey('node_language.id', ondelete='CASCADE'), index=True, nullable=False)
# date = Column(DateTime(), default=datetime.utcnow, nullable=True)
# metadata = Column(JSONB, default={}, nullable=False)
# debugging tool, to translate SQLAlchemy queries to string # debugging tool, to translate SQLAlchemy queries to string
...@@ -67,7 +85,6 @@ def literalquery(statement, dialect=None): ...@@ -67,7 +85,6 @@ def literalquery(statement, dialect=None):
purposes only. Executing SQL statements with inline-rendered user values is purposes only. Executing SQL statements with inline-rendered user values is
extremely insecure. extremely insecure.
""" """
from datetime import datetime
import sqlalchemy.orm import sqlalchemy.orm
if isinstance(statement, sqlalchemy.orm.Query): if isinstance(statement, sqlalchemy.orm.Query):
if dialect is None: if dialect is None:
......
...@@ -14,11 +14,33 @@ BASE_DIR = os.path.dirname(os.path.dirname(__file__)) ...@@ -14,11 +14,33 @@ BASE_DIR = os.path.dirname(os.path.dirname(__file__))
PROJECT_PATH = os.path.join(BASE_DIR, os.pardir) PROJECT_PATH = os.path.join(BASE_DIR, os.pardir)
PROJECT_PATH = os.path.abspath(PROJECT_PATH) PROJECT_PATH = os.path.abspath(PROJECT_PATH)
######################################################################
# ASYNCHRONOUS TASKS
import djcelery import djcelery
djcelery.setup_loader() djcelery.setup_loader()
BROKER_URL = 'amqp://guest:guest@localhost:5672/' BROKER_URL = 'amqp://guest:guest@localhost:5672/'
CELERY_IMPORTS=("node.models",)
CELERY_IMPORTS=("node.models","gargantext_web.celery")
#
#from celery import Celery
#
#app = Celery('gargantext_web')
#
#app.conf.update(
# CELERY_RESULT_BACKEND='djcelery.backends.database:DatabaseBackend',
#)
#
#
#app.conf.update(
# CELERY_RESULT_BACKEND='djcelery.backends.cache:CacheBackend',
#)
#
######################################################################
# Quick-start development settings - unsuitable for production # Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/1.6/howto/deployment/checklist/ # See https://docs.djangoproject.com/en/1.6/howto/deployment/checklist/
...@@ -48,8 +70,16 @@ TEMPLATE_DIRS = ( ...@@ -48,8 +70,16 @@ TEMPLATE_DIRS = (
#ALLOWED_HOSTS = ['*',] #ALLOWED_HOSTS = ['*',]
ALLOWED_HOSTS = ['localhost', 'master.polemic.be', 'beta.gargantext.org'] ALLOWED_HOSTS = ['localhost',
'gargantext.org',
'stable.gargantext.org',
'dev.gargantext.org',
'iscpif.gargantext.org',
'mines.gargantext.org',
'beta.gargantext.org',
'garg-dev.iscpif.fr',
'garg-stable.iscpif.fr',
]
# Application definition # Application definition
...@@ -82,6 +112,16 @@ MIDDLEWARE_CLASSES = ( ...@@ -82,6 +112,16 @@ MIDDLEWARE_CLASSES = (
'django.middleware.clickjacking.XFrameOptionsMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware',
) )
REST_SESSION_LOGIN = False
REST_FRAMEWORK = {
'DEFAULT_AUTHENTICATION_CLASSES': (
'rest_framework.authentication.TokenAuthentication',
'rest_framework.authentication.SessionAuthentication',
),
'DEFAULT_PERMISSION_CLASSES': (
'rest_framework.permissions.AllowAny',
),
}
WSGI_APPLICATION = 'wsgi.application' WSGI_APPLICATION = 'wsgi.application'
...@@ -93,7 +133,7 @@ DATABASES = { ...@@ -93,7 +133,7 @@ DATABASES = {
'default': { 'default': {
'ENGINE': 'django.db.backends.postgresql_psycopg2', 'ENGINE': 'django.db.backends.postgresql_psycopg2',
'NAME': 'gargandb', 'NAME': 'gargandb',
'USER': 'alexandre', 'USER': 'gargantua',
'PASSWORD': 'C8kdcUrAQy66U', 'PASSWORD': 'C8kdcUrAQy66U',
#'USER': 'gargantext', #'USER': 'gargantext',
#'PASSWORD': 'C8krdcURAQy99U', #'PASSWORD': 'C8krdcURAQy99U',
......
from celery import shared_task
from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf
@shared_task
def apply_workflow(corpus):
parse_resources(corpus)
extract_ngrams(corpus, ['title'])
compute_tfidf(corpus)
...@@ -33,7 +33,7 @@ urlpatterns = patterns('', ...@@ -33,7 +33,7 @@ urlpatterns = patterns('',
# Project Management # Project Management
url(r'^projects/$', views.projects), url(r'^projects/$', views.projects),
url(r'^project/(\d+)/$', views_optimized.project), url(r'^project/(\d+)/$', views_optimized.project),
url(r'^delete/(\d+)$', views.trash_node), # => api.node('id' = id, children = 'True', copies = False) url(r'^delete/(\d+)$', views.delete_node), # => api.node('id' = id, children = 'True', copies = False)
# Corpus management # Corpus management
url(r'^project/(\d+)/corpus/(\d+)/$', views.corpus), url(r'^project/(\d+)/corpus/(\d+)/$', views.corpus),
......
...@@ -46,6 +46,8 @@ from sqlalchemy import or_, func ...@@ -46,6 +46,8 @@ from sqlalchemy import or_, func
from gargantext_web import about from gargantext_web import about
def login_user(request): def login_user(request):
logout(request) logout(request)
username = password = '' username = password = ''
...@@ -199,7 +201,6 @@ def home_view(request): ...@@ -199,7 +201,6 @@ def home_view(request):
t = get_template('home.html') t = get_template('home.html')
user = request.user user = request.user
date = datetime.datetime.now() date = datetime.datetime.now()
html = t.render(Context({\ html = t.render(Context({\
'user': user,\ 'user': user,\
'date': date,\ 'date': date,\
...@@ -455,7 +456,21 @@ def empty_trash(): ...@@ -455,7 +456,21 @@ def empty_trash():
node.delete() node.delete()
def trash_node(request, node_id): def move_to_trash(node_id):
try:
node = session.query(Node).filter(Node.id == node_id).first()
previous_type_id = node.type_id
node.type_id = cache.NodeType['Trash'].id
session.add(node)
session.commit()
return(previous_type_id)
except Exception as error:
print("can not move to trash Node" + node_id + ":" + error)
def delete_node(request, node_id):
# do we have a valid user? # do we have a valid user?
user = request.user user = request.user
...@@ -466,52 +481,18 @@ def trash_node(request, node_id): ...@@ -466,52 +481,18 @@ def trash_node(request, node_id):
if node.user_id != user.id: if node.user_id != user.id:
return HttpResponseForbidden() return HttpResponseForbidden()
previous_type_id = node.type_id previous_type_id = move_to_trash(node_id)
node.type_id = cache.NodeType['Trash'].id
session.add(node) if previous_type_id == cache.NodeType['Corpus'].id:
session.commit() return HttpResponseRedirect('/project/' + str(node.parent_id))
else:
if previous_type_id == cache.NodeType['Project'].id:
return HttpResponseRedirect('/projects/') return HttpResponseRedirect('/projects/')
elif previous_type_id == cache.NodeType['Corpus'].id:
return HttpResponseRedirect('/project/' + str(session.query(Node.id).filter(Node.id==node.parent_id).first()[0]))
if settings.DEBUG == True: if settings.DEBUG == True:
empty_trash() empty_trash()
def delete_node(request, node_id):
#nodes = session.query(Node).filter(or_(Node.id == node_id, Node.parent_id == node_id)).all()
# try:
# resources = session.query(Node_Resource).filter(Node_Resource.node_id==node_id).all()
# if resources is not None:
# for resource in resources:
# session.delete(resource)
#
# except Exception as error:
# print(error)
#
# node = session.query(Node).filter(Node.id == node_id).first()
# if node is not None:
# session.delete(node)
# session.commit()
node = models.Node.objects.get(id=node_id)
with transaction.atomic():
try:
node.children.delete()
except Exception as error:
print(error)
node.delete()
if node.type_id == cache.NodeType['Project'].id:
return HttpResponseRedirect('/projects/')
elif node.type_id == cache.NodeType['Corpus'].id:
return HttpResponseRedirect('/project/' + node_id)
def delete_corpus(request, project_id, node_id): def delete_corpus(request, project_id, node_id):
# ORM Django # ORM Django
......
...@@ -7,6 +7,7 @@ from sqlalchemy.orm import aliased ...@@ -7,6 +7,7 @@ from sqlalchemy.orm import aliased
from collections import defaultdict from collections import defaultdict
from datetime import datetime from datetime import datetime
from time import sleep
from threading import Thread from threading import Thread
from node.admin import CustomForm from node.admin import CustomForm
...@@ -14,14 +15,14 @@ from gargantext_web.db import * ...@@ -14,14 +15,14 @@ from gargantext_web.db import *
from gargantext_web.settings import DEBUG, MEDIA_ROOT from gargantext_web.settings import DEBUG, MEDIA_ROOT
from gargantext_web.api import JsonHttpResponse from gargantext_web.api import JsonHttpResponse
import json import json
import re
from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf
def project(request, project_id): from gargantext_web.celery import apply_workflow
# SQLAlchemy session def project(request, project_id):
session = Session()
# do we have a valid project id? # do we have a valid project id?
try: try:
...@@ -51,7 +52,7 @@ def project(request, project_id): ...@@ -51,7 +52,7 @@ def project(request, project_id):
# ... sqlalchemy.func by Resource.type_id is the guilty # ... sqlalchemy.func by Resource.type_id is the guilty
# ISSUE L51 # ISSUE L51
corpus_query = (session corpus_query = (session
.query(Node.id, Node.name, func.count(ChildrenNode.id)) .query(Node.id, Node.name, func.count(ChildrenNode.id), Node.metadata['Processing'])
#.query(Node.id, Node.name, Resource.type_id, func.count(ChildrenNode.id)) #.query(Node.id, Node.name, Resource.type_id, func.count(ChildrenNode.id))
#.join(Node_Resource, Node_Resource.node_id == Node.id) #.join(Node_Resource, Node_Resource.node_id == Node.id)
#.join(Resource, Resource.id == Node_Resource.resource_id) #.join(Resource, Resource.id == Node_Resource.resource_id)
...@@ -66,8 +67,10 @@ def project(request, project_id): ...@@ -66,8 +67,10 @@ def project(request, project_id):
documents_count_by_resourcetype = defaultdict(int) documents_count_by_resourcetype = defaultdict(int)
corpora_count = 0 corpora_count = 0
corpusID_dict = {} corpusID_dict = {}
for corpus_id, corpus_name, document_count in corpus_query:
for corpus_id, corpus_name, document_count, processing in corpus_query:
#print(corpus_id, processing)
# Not optimized GOTO ISSUE L51 # Not optimized GOTO ISSUE L51
resource_type_id = (session.query(Resource.type_id) resource_type_id = (session.query(Resource.type_id)
.join(Node_Resource, Node_Resource.resource_id == Resource.id) .join(Node_Resource, Node_Resource.resource_id == Resource.id)
...@@ -82,9 +85,10 @@ def project(request, project_id): ...@@ -82,9 +85,10 @@ def project(request, project_id):
resourcetype = cache.ResourceType[resource_type_id] resourcetype = cache.ResourceType[resource_type_id]
resourcetype_name = resourcetype.name resourcetype_name = resourcetype.name
corpora_by_resourcetype[resourcetype_name].append({ corpora_by_resourcetype[resourcetype_name].append({
'id': corpus_id, 'id' : corpus_id,
'name': corpus_name, 'name' : corpus_name,
'count': document_count, 'count' : document_count,
'processing': processing,
}) })
documents_count_by_resourcetype[resourcetype_name] += document_count documents_count_by_resourcetype[resourcetype_name] += document_count
corpora_count += 1 corpora_count += 1
...@@ -93,7 +97,7 @@ def project(request, project_id): ...@@ -93,7 +97,7 @@ def project(request, project_id):
# do the donut # do the donut
total_documents_count = sum(documents_count_by_resourcetype.values()) total_documents_count = sum(documents_count_by_resourcetype.values())
donut = [ donut = [
{ 'source': key, { 'source': re.sub(' \(.*$', '', key),
'count': value, 'count': value,
'part' : round(value * 100 / total_documents_count) if total_documents_count else 0, 'part' : round(value * 100 / total_documents_count) if total_documents_count else 0,
} }
...@@ -112,20 +116,21 @@ def project(request, project_id): ...@@ -112,20 +116,21 @@ def project(request, project_id):
resourcetype = cache.ResourceType[form.cleaned_data['type']] resourcetype = cache.ResourceType[form.cleaned_data['type']]
# which default language shall be used? # which default language shall be used?
if resourcetype.name == "europress_french": if resourcetype.name == "Europress (French)":
language_id = cache.Language['fr'].id language_id = cache.Language['fr'].id
elif resourcetype.name == "europress_english": elif resourcetype.name == "Europress (English)":
language_id = cache.Language['en'].id language_id = cache.Language['en'].id
else: else:
language_id = None language_id = None
# corpus node instanciation as a Django model # corpus node instanciation as a Django model
corpus = Node( corpus = Node(
name = name, name = name,
user_id = request.user.id, user_id = request.user.id,
parent_id = project_id, parent_id = project_id,
type_id = cache.NodeType['Corpus'].id, type_id = cache.NodeType['Corpus'].id,
language_id = language_id, language_id = language_id,
metadata = {'Processing' : 1,}
) )
session.add(corpus) session.add(corpus)
session.commit() session.commit()
...@@ -142,25 +147,25 @@ def project(request, project_id): ...@@ -142,25 +147,25 @@ def project(request, project_id):
) )
# let's start the workflow # let's start the workflow
try: try:
def apply_workflow(corpus): if DEBUG is False:
parse_resources(corpus) apply_workflow.apply_async((corpus.id,),)
extract_ngrams(corpus, ['title'])
compute_tfidf(corpus)
if DEBUG:
apply_workflow(corpus)
else: else:
thread = Thread(target=apply_workflow, args=(corpus, ), daemon=True) #apply_workflow(corpus)
thread.start() thread = Thread(target=apply_workflow, args=(corpus.id, ), daemon=True)
thread.start()
except Exception as error: except Exception as error:
print('WORKFLOW ERROR') print('WORKFLOW ERROR')
print(error) print(error)
# redirect to the main project page # redirect to the main project page
# TODO need to wait before response (need corpus update)
sleep(1)
return HttpResponseRedirect('/project/' + str(project_id)) return HttpResponseRedirect('/project/' + str(project_id))
else: else:
print('ERROR: BAD FORM') print('ERROR: BAD FORM')
else: else:
form = CustomForm() form = CustomForm()
# HTML output # HTML output
return render(request, 'project.html', { return render(request, 'project.html', {
'form' : form, 'form' : form,
......
...@@ -39,10 +39,10 @@ In PostreSQL ...@@ -39,10 +39,10 @@ In PostreSQL
3) psql 3) psql
4) CREATE USER alexandre WITH PASSWORD 'C8kdcUrAQy66U'; 4) CREATE USER gargantua WITH PASSWORD 'C8kdcUrAQy66U';
(see gargantext_web/settings.py, DATABASES = { ... }) (see gargantext_web/settings.py, DATABASES = { ... })
5) CREATE DATABASE gargandb WITH OWNER alexandre; 5) CREATE DATABASE gargandb WITH OWNER gargantua;
6) Ctrl + D 6) Ctrl + D
...@@ -80,7 +80,7 @@ Last steps of configuration ...@@ -80,7 +80,7 @@ Last steps of configuration
Warning: for ln, path has to be absolute! Warning: for ln, path has to be absolute!
5) patch CTE: 5) patch CTE:
patch /srv/gargantext_env/lib/python3.4/site-packages/cte_tree/models.py /srv/gargantext/init/cte_tree.models.diff patch /srv/gargantext_env/lib/python3.4/site-packages/cte_tree/models.py /srv/gargantext/init/patches/cte_tree.models.diff
6) init nodetypes and main variables 6) init nodetypes and main variables
/srv/gargantext/manage.py shell < /srv/gargantext/init/init.py /srv/gargantext/manage.py shell < /srv/gargantext/init/init.py
......
sudo apt-get install postgresql
sudo apt-get install postgresql-contrib
sudo apt-get install python-virtualenv
sudo apt-get install libpng12-dev
sudo apt-get install libpng-dev
sudo apt-cache search freetype
sudo apt-get install libfreetype6-dev
sudo apt-cache search python-dev
sudo apt-get install python-dev
sudo apt-get install libpq-dev
sudo apt-get postgresql-contrib
sudo aptèget install libpq-dev
# Pour avoir toutes les dependences de matpolotlib (c'est sale, trouver
sudo apt-get build-dep python-matplotlib
#Paquets Debian a installer
# easy_install -U distribute (matplotlib)
#lxml
sudo apt-get install libffi-dev
sudo apt-get install libxml2-dev
sudo apt-get install libxslt1-dev
# ipython readline
sudo apt-get install libncurses5-dev
sudo apt-get install pandoc
# scipy:
sudo apt-get install gfortran
sudo apt-get install libopenblas-dev
sudo apt-get install liblapack-dev
#nlpserver
sudo apt-get install libgflags-dev
sudo aptitude install libgoogle-glog-dev
source /srv/gargantext_env/bin/activate
pip3 install git+https://github.com/mathieurodic/aldjemy.git
ALTER TABLE ONLY node_node
ALTER COLUMN metadata
DROP NOT NULL
;
ALTER TABLE ONLY node_node
ALTER COLUMN metadata
DROP DEFAULT
;
ALTER TABLE ONLY node_node
ALTER COLUMN metadata
TYPE JSON
USING hstore_to_json(metadata)
;
ALTER TABLE ONLY node_node
ALTER COLUMN metadata
SET DEFAULT '{}'::json
;
ALTER TABLE ONLY node_node
ALTER COLUMN metadata
SET NOT NULL
;
...@@ -104,30 +104,15 @@ except Exception as error: ...@@ -104,30 +104,15 @@ except Exception as error:
# In[33]: # In[33]:
try: from parsing.parsers_config import parsers
typePubmed = ResourceType.objects.get(name='pubmed')
typeIsi = ResourceType.objects.get(name='isi')
typeRis = ResourceType.objects.get(name='ris')
typePresseFrench = ResourceType.objects.get(name='europress_french')
typePresseEnglish = ResourceType.objects.get(name='europress_english')
except Exception as error: ResourceType.objects.all().delete()
print(error)
for key in parsers.keys():
typePubmed = ResourceType(name='pubmed') try:
typePubmed.save() ResourceType.objects.get_or_create(name=key)
except Exception as error:
typeIsi = ResourceType(name='isi') print("Ressource Error: ", error)
typeIsi.save()
typeRis = ResourceType(name='ris')
typeRis.save()
typePresseFrench = ResourceType(name='europress_french')
typePresseFrench.save()
typePresseEnglish = ResourceType(name='europress_english')
typePresseEnglish.save()
# In[34]: # In[34]:
......
...@@ -4,6 +4,7 @@ psql -d gargandb -f init.sql ...@@ -4,6 +4,7 @@ psql -d gargandb -f init.sql
sleep 2 sleep 2
../manage.py syncdb ../manage.py syncdb
psql -d gargandb -f init2.sql psql -d gargandb -f init2.sql
......
ALTER TABLE ONLY node_node ALTER COLUMN date SET DEFAULT CURRENT_DATE;
-- ALTER TABLE ONLY node_node ALTER COLUMN metadata SET DEFAULT ''::hstore;
-- ALTER TABLE ONLY node_node ALTER COLUMN metadata TYPE JSONB;
-- ALTER TABLE ONLY node_node ALTER COLUMN metadata SET DEFAULT '{}'::JSONB;
...@@ -54,16 +54,13 @@ import pycountry ...@@ -54,16 +54,13 @@ import pycountry
Language.objects.all().delete() Language.objects.all().delete()
for language in pycountry.languages: for language in pycountry.languages:
if 'alpha2' in language.__dict__: if 'alpha2' in language.__dict__:
Language( models.Language(
iso2 = language.alpha2, iso2 = language.alpha2,
iso3 = language.bibliographic, iso3 = language.bibliographic,
fullname = language.name, fullname = language.name,
implemented = 1 if language.alpha2 in ['en', 'fr'] else 0, implemented = 1 if language.alpha2 in ['en', 'fr'] else 0,
).save() ).save()
english = Language.objects.get(iso2='en')
french = Language.objects.get(iso2='fr')
# Integration: users # Integration: users
...@@ -99,56 +96,10 @@ for node_type in node_types: ...@@ -99,56 +96,10 @@ for node_type in node_types:
print('Initialize resource...') print('Initialize resource...')
resources = [ from parsing.parsers_config import parsers
'pubmed', 'isi', 'ris', 'europress_french', 'europress_english']
for resource in resources:
models.ResourceType.objects.get_or_create(name=resource)
# TODO
# here some tests
# add a new project and some corpora to test it
# Integration: project
#
#print('Initialize project...')
#try:
# project = Node.objects.get(name='Bees project')
#except:
# project = Node(name='Bees project', type=typeProject, user=me)
# project.save()
#
# Integration: corpus
#print('Initialize corpus...')
#try:
# corpus_pubmed = Node.objects.get(name='PubMed corpus')
#except:
# corpus_pubmed = Node(parent=project, name='PubMed corpus', type=typeCorpus, user=me)
# corpus_pubmed.save()
#
#print('Initialize resource...')
#corpus_pubmed.add_resource(
# # file='./data_samples/pubmed.zip',
# #file='./data_samples/pubmed_2013-04-01_HoneyBeesBeeBees.xml',
# file='/srv/gargantext_lib/data_samples/pubmed.xml',
# type=typePubmed,
# user=me
#)
#
#for resource in corpus_pubmed.get_resources():
# print('Resource #%d - %s - %s' % (resource.id, resource.digest, resource.file))
#
## print('Parse corpus #%d...' % (corpus_pubmed.id, ))
# corpus_pubmed.parse_resources(verbose=True)
# print('Extract corpus #%d...' % (corpus_pubmed.id, ))
# corpus_pubmed.children.all().extract_ngrams(['title',])
# print('Parsed corpus #%d.' % (corpus_pubmed.id, ))
for parser in parsers.keys():
models.ResourceType.objects.get_or_create(name=parser)
......
# See /usr/share/postfix/main.cf.dist for a commented, more complete version
# Debian specific: Specifying a file name will cause the first
# line of that file to be used as the name. The Debian default
# is /etc/mailname.
#myorigin = /etc/mailname
smtpd_banner = $myhostname ESMTP $mail_name (Debian)
biff = no
# appending .domain is the MUA's job.
append_dot_mydomain = no
# Uncomment the next line to generate "delayed mail" warnings
#delay_warning_time = 4h
readme_directory = no
# TLS parameters
smtpd_tls_cert_file=/etc/ssl/certs/ssl-cert-snakeoil.pem
smtpd_tls_key_file=/etc/ssl/private/ssl-cert-snakeoil.key
smtpd_use_tls=yes
smtpd_tls_session_cache_database = btree:${data_directory}/smtpd_scache
smtp_tls_session_cache_database = btree:${data_directory}/smtp_scache
# See /usr/share/doc/postfix/TLS_README.gz in the postfix-doc package for
# information on enabling SSL in the smtp client.
myhostname = garg-dev.iscpif.fr
alias_maps = hash:/etc/aliases
alias_database = hash:/etc/aliases
myorigin = /etc/mailname
mydestination = garg-dev.iscpif.fr, localhost.iscpif.fr, , localhost
relayhost = smtp.iscpif.fr
mynetworks = 127.0.0.0/8 [::ffff:127.0.0.0]/104 [::1]/128
mailbox_size_limit = 0
recipient_delimiter = +
inet_interfaces = all
#!/bin/bash
apt-get install sudo
sudo apt-get install postfix
# copy from tina
sudo cp 0*cf /etc/postfix/main.cf
sudo postfix reload
sed -i 's/wheezy/jessie/g' /etc/apt/sources.list
sudo aptitude update
sudo aptitude dist-upgrade
# dpkg-reconfigure locales => add GB
ssh-keygen
cat ~/.ssh/id_rsa.pub | mail alexandre@delanoe.org -s "Key Server $(hostname)"
echo "Put ~/.ssh/id_rsa.pub on remote to enable git pull please and press enter"
read answer
sudo mkdir /srv/gargantext
cd /srv
chown gargantua:www-data gargantext
git clone ssh orign ssh://gitolite@delanoe.org:1979/gargantext
#!/bin/dash
# TODO do apt-get install --force-yes --force-yes
apt-get install --force-yes postgresql
apt-get install --force-yes postgresql-contrib
apt-get install --force-yes rabbitmq-server
apt-get install --force-yes tmux
apt-get install --force-yes uwsgi uwsgi-plugin-python3
apt-get install --force-yes python3.4-venv
#apt-get install --force-yes python-virtualenv
apt-get install --force-yes libpng12-dev
apt-get install --force-yes libpng-dev
apt-get install --force-yes libfreetype6-dev
apt-get install --force-yes python-dev
apt-get install --force-yes libpq-dev
apt-get install --force-yes libpq-dev
#apt-get build-dep python-matplotlib
#apt-get install --force-yes python-matplotlib
#Paquets Debian a installer
# easy_install --force-yes -U distribute (matplotlib)
#lxml
apt-get install --force-yes libffi-dev
apt-get install --force-yes libxml2-dev
apt-get install --force-yes libxslt1-dev
# ipython readline
apt-get install --force-yes libncurses5-dev
apt-get install --force-yes pandoc
# scipy:
apt-get install --force-yes gfortran
apt-get install --force-yes libopenblas-dev
apt-get install --force-yes liblapack-dev
#nlpserver
apt-get install --force-yes libgflags-dev
aptitude install --force-yes libgoogle-glog-dev
# MElt
# soon
## SERVER Configuration
# server configuration
apt-get install --force-yes nginx
# UWSGI with pcre support
apt-get install --force-yes libpcre3 libpcre3-dev
apt-get install --force-yes python3-pip
pip3 install --force-yes uwsgi
#!/bin/dash
sudo mkdir /srv/gargantext_env
sudo chown -R gargantua:www-data /srv/gargantext_env
pyvenv3 /srv/gargantext_env
source /srv/gargantext_env/bin/activate
pip install --upgrade pip
pip install -r 3-requirements.txt
pip3 install git+https://github.com/mathieurodic/aldjemy.git
patch /srv/gargantext_env/lib/python3.4/site-packages/cte_tree/models.py /srv/gargantext/init/patches/cte_tree.models.diff
...@@ -5,14 +5,14 @@ MarkupSafe==0.23 ...@@ -5,14 +5,14 @@ MarkupSafe==0.23
Pillow==2.5.3 Pillow==2.5.3
Pygments==1.6 Pygments==1.6
RandomWords==0.1.12 RandomWords==0.1.12
SQLAlchemy==0.9.8 SQLAlchemy==0.9.9
South==1.0 South==1.0
aldjemy==0.3.10 aldjemy==0.3.10
amqp==1.4.6 amqp==1.4.6
anyjson==0.3.3 anyjson==0.3.3
bibtexparser==0.6.0 bibtexparser==0.6.0
billiard==3.3.0.18 billiard==3.3.0.19
celery==3.1.15 celery==3.1.17
certifi==14.05.14 certifi==14.05.14
cffi==0.8.6 cffi==0.8.6
chardet==2.3.0 chardet==2.3.0
...@@ -36,7 +36,7 @@ djangorestframework==3.0.0 ...@@ -36,7 +36,7 @@ djangorestframework==3.0.0
gensim==0.10.3 gensim==0.10.3
graphviz==0.4 graphviz==0.4
ipython==2.2.0 ipython==2.2.0
kombu==3.0.23 kombu==3.0.24
lxml==3.4.1 lxml==3.4.1
#matplotlib==1.4.0 #matplotlib==1.4.0
networkx==1.9 networkx==1.9
...@@ -52,7 +52,7 @@ pydot2==1.0.33 ...@@ -52,7 +52,7 @@ pydot2==1.0.33
pyparsing==2.0.2 pyparsing==2.0.2
python-dateutil==2.2 python-dateutil==2.2
python-igraph==0.7 python-igraph==0.7
pytz==2014.7 pytz==2015.2
pyzmq==14.3.1 pyzmq==14.3.1
readline==6.2.4.1 readline==6.2.4.1
redis==2.10.3 redis==2.10.3
......
#!/bin/bash
cd /tmp/
wget http://docs.delanoe.org/gargantext_lib.tar.bz2
cd /srv/
sudo mkdir gargantext_lib
sudo chown -R gargantua:www-data /srv/gargantext_lib
tar xvjf /tmp/gargantext_lib.tar.bz2
sudo chown -R gargantua:www-data /srv/gargantext_lib
cd /srv/gargantext_lib/js
git pull origin master git@github.com:PkSM3/garg.git
In PostreSQL
-------------
1) Ensure postgres is started: sudo /etc/init.d/postgresql start
2) sudo su postgres
3) psql
4) CREATE USER gargantua WITH PASSWORD 'C8kdcUrAQy66U';
(see gargantext_web/settings.py, DATABASES = { ... })
5) CREATE DATABASE gargandb WITH OWNER gargantua;
6) Ctrl + D
7) psql gargandb
6) CREATE EXTENSION hstore;
7) Ctrl + D
# the upstream component nginx needs to connect to
upstream gargantext {
server unix:///tmp/gargantext.sock; # for a file socket
#server 127.0.0.1:8001; # for a web port socket (we'll use this first)
}
# configuration of the server
server {
# the port your site will be served on
listen 8002;
# the domain name it will serve for
server_name localhost; # substitute your machine's IP address or FQDN
charset utf-8;
# max upload size
client_max_body_size 75M; # adjust to taste
# Django media
location /media {
alias /var/www/gargantext/media; # your Django project's media files - amend as required
}
location /static {
alias /var/www/gargantext/static; # your Django project's static files - amend as required
}
# Finally, send all non-media requests to the Django server.
location / {
uwsgi_pass gargantext;
include uwsgi_params;
}
}
# django.ini file
[uwsgi]
env = DJANGO_SETTINGS_MODULE=gargantext_web.settings
#module = django.core.handlers.wsgi:WSGIHandler()
#touch-reload= /tmp/gargantext.reload
# the base directory
chdir = /srv/gargantext
# Django's wsgi file
#module = wsgi
wsgi-file = /srv/gargantext/wsgi.py
# the virtualenv
home = /srv/gargantext_env/
# master
master = true
# maximum number of processes
processes = 10
# the socket (use the full path to be safe)
socket = /tmp/gargantext.sock
# with appropriate permissions - *may* be needed
chmod-socket = 666
# clear environment on exit
vacuum = true
pidfile = /tmp/gargantext.pid
# respawn processes taking more than 20 seconds
harakiri = 120
# limit the project to 128 MB
#limit-as = 128
# respawn processes after serving 5000 requests
max-requests = 5000
# background the process & log
#daemonize = /var/log/uwsgi/gargantext.log
uid = 1000
gid = 1000
#!/bin/dash
#
echo "Copy nginx configuration in sites available"
sudo cp 4-NGINX_gargantext.conf /etc/nginx/sites-available
echo "Enable site"
cd /etc/nginx/sites-enable
sudo ln -s ../sites-available/gargantext.conf
sudo service nginx restart
echo "Copy UWSGI configuration"
sudo cp 4-UWSGI_gargantext.ini /etc/uwsgi/
sudo service uwsgi restart
#!/bin/bash
git checkout stable
source /srv/gargantext_env/bin/activate
cd /srv/gargantext
./manage.py collectstatic
chown -R gargantua:www-data /var/www/gargantext
ALTER TABLE ONLY node_node ALTER COLUMN date SET DEFAULT CURRENT_DATE ;
for tbl in `psql -qAt -c "select tablename from pg_tables where schemaname = 'public';" gargandb` ; do
psql -c "alter table $tbl owner to gargantua" gargandb ;
done
ALTER TABLE ONLY node_node ALTER COLUMN date SET DEFAULT CURRENT_DATE ;
ALTER TABLE ONLY node_node ALTER COLUMN metadata DROP NOT NULL ;
ALTER TABLE ONLY node_node ALTER COLUMN metadata DROP DEFAULT ;
ALTER TABLE ONLY node_node ALTER COLUMN metadata TYPE JSONB USING hstore_to_json(metadata)::jsonb ;
ALTER TABLE ONLY node_node ALTER COLUMN metadata SET DEFAULT '{}'::jsonb ;
ALTER TABLE ONLY node_node ALTER COLUMN metadata SET NOT NULL ;
ALTER TABLE ONLY node_node ALTER COLUMN date SET DEFAULT CURRENT_DATE ;
...@@ -29,6 +29,7 @@ from celery import current_app ...@@ -29,6 +29,7 @@ from celery import current_app
import os import os
import subprocess import subprocess
from parsing.parsers_config import parsers
# Some usefull functions # Some usefull functions
# TODO: start the function name with an underscore (private) # TODO: start the function name with an underscore (private)
...@@ -194,15 +195,19 @@ class Node(CTENode): ...@@ -194,15 +195,19 @@ class Node(CTENode):
print("= = = = = = = = = = =\n") print("= = = = = = = = = = =\n")
for node_resource in self.node_resource.filter(parsed=False): for node_resource in self.node_resource.filter(parsed=False):
resource = node_resource.resource resource = node_resource.resource
parser = defaultdict(lambda:FileParser.FileParser, { parser = defaultdict(lambda:FileParser.FileParser, parsers
'istext' : ISText, # {
'pubmed' : PubmedFileParser, # 'istext' : ISText,
'isi' : IsiFileParser, # 'pubmed' : PubmedFileParser,
'ris' : RisFileParser, # 'isi' : IsiFileParser,
'europress' : EuropressFileParser, # 'ris' : RisFileParser,
'europress_french' : EuropressFileParser, # 'RIS (Jstor)' : JstorFileParser,
'europress_english' : EuropressFileParser, # 'europress' : EuropressFileParser,
})[resource.type.name]() # 'europress_french' : EuropressFileParser,
# 'europress_english' : EuropressFileParser,
# }
)[resource.type.name]()
metadata_list += parser.parse(str(resource.file)) metadata_list += parser.parse(str(resource.file))
type_id = NodeType.objects.get(name='Document').id type_id = NodeType.objects.get(name='Document').id
langages_cache = LanguagesCache() langages_cache = LanguagesCache()
......
from .RisFileParser import RisFileParser from .RisFileParser import RisFileParser
class IsiFileParser(RisFileParser): class IsiFileParser(RisFileParser):
_parameters = { _parameters = {
......
from .RisFileParser import RisFileParser
class JstorFileParser(RisFileParser):
_parameters = {
b"ER": {"type": "delimiter"},
b"TI": {"type": "metadata", "key": "title", "separator": " "},
b"AU": {"type": "metadata", "key": "authors", "separator": ", "},
b"UR": {"type": "metadata", "key": "doi"},
b"Y1": {"type": "metadata", "key": "publication_year"},
b"PD": {"type": "metadata", "key": "publication_month"},
b"LA": {"type": "metadata", "key": "language_iso2"},
b"AB": {"type": "metadata", "key": "abstract", "separator": " "},
b"WC": {"type": "metadata", "key": "fields"},
}
from .RisFileParser import RisFileParser from .RisFileParser import RisFileParser
from .IsiFileParser import IsiFileParser from .IsiFileParser import IsiFileParser
from .JstorFileParser import JstorFileParser
from .PubmedFileParser import PubmedFileParser from .PubmedFileParser import PubmedFileParser
from .EuropressFileParser import EuropressFileParser from .EuropressFileParser import EuropressFileParser
from .ISText import ISText from .ISText import ISText
...@@ -7,8 +7,7 @@ from math import log ...@@ -7,8 +7,7 @@ from math import log
from gargantext_web.db import * from gargantext_web.db import *
from .FileParsers import * from .parsers_config import parsers as _parsers
class DebugTime: class DebugTime:
...@@ -31,18 +30,12 @@ class DebugTime: ...@@ -31,18 +30,12 @@ class DebugTime:
# keep all the parsers in a cache # keep all the parsers in a cache
class Parsers(defaultdict): class Parsers(defaultdict):
_parsers = { def __init__(self):
'pubmed' : PubmedFileParser, self._parsers = _parsers
'istex' : ISText,
'isi' : IsiFileParser,
'ris' : RisFileParser,
'europress' : EuropressFileParser,
'europress_french' : EuropressFileParser,
'europress_english' : EuropressFileParser,
}
def __missing__(self, key): def __missing__(self, key):
if key not in self._parsers: #print(self._parsers.keys())
if key not in self._parsers.keys():
raise NotImplementedError('No such parser: "%s"' % (key)) raise NotImplementedError('No such parser: "%s"' % (key))
parser = self._parsers[key]() parser = self._parsers[key]()
self[key] = parser self[key] = parser
...@@ -238,11 +231,13 @@ def extract_ngrams(corpus, keys): ...@@ -238,11 +231,13 @@ def extract_ngrams(corpus, keys):
terms = ' '.join([token for token, tag in ngram]).lower() terms = ' '.join([token for token, tag in ngram]).lower()
# TODO BUG here # TODO BUG here
if n == 1: if n == 1:
tag_id = cache.Tag[ngram[0][1]].id #tag_id = cache.Tag[ngram[0][1]].id
#tag_id = 1 tag_id = 1
#print('tag_id', tag_id) #print('tag_id', tag_id)
elif n > 1: elif n > 1:
tag_id = cache.Tag['NN'].id tag_id = 1
#tag_id = cache.Tag[ngram[0][1]].id
#tag_id = cache.Tag['NN'].id
#tag_id = 14 #tag_id = 14
#print('tag_id_2', tag_id) #print('tag_id_2', tag_id)
node_ngram_list[node_id][terms] += 1 node_ngram_list[node_id][terms] += 1
......
from .FileParsers import *
parsers = {
'Pubmed (xml format)' : PubmedFileParser,
'Web of Science (ISI format)' : IsiFileParser,
'Scopus (RIS format)' : RisFileParser,
'Zotero (RIS format)' : RisFileParser,
'Jstor (RIS format)' : JstorFileParser,
#'Europress' : EuropressFileParser,
'Europress (French)' : EuropressFileParser,
'Europress (English)' : EuropressFileParser,
}
...@@ -34,13 +34,15 @@ from gargantext_web.api import JsonHttpResponse ...@@ -34,13 +34,15 @@ from gargantext_web.api import JsonHttpResponse
from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf
from gargantext_web.celery import apply_workflow
from time import sleep
def getGlobalStats(request ): def getGlobalStats(request ):
print(request.method) print(request.method)
alist = ["bar","foo"] alist = ["bar","foo"]
if request.method == "POST": if request.method == "POST":
N = 1000 N = 100
query = request.POST["query"] query = request.POST["query"]
print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" query =", query ) print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" query =", query )
print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" N =", N ) print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" N =", N )
...@@ -81,9 +83,6 @@ def getGlobalStatsISTEXT(request ): ...@@ -81,9 +83,6 @@ def getGlobalStatsISTEXT(request ):
def doTheQuery(request , project_id): def doTheQuery(request , project_id):
alist = ["hola","mundo"] alist = ["hola","mundo"]
# SQLAlchemy session
session = Session()
# do we have a valid project id? # do we have a valid project id?
try: try:
project_id = int(project_id) project_id = int(project_id)
...@@ -120,7 +119,7 @@ def doTheQuery(request , project_id): ...@@ -120,7 +119,7 @@ def doTheQuery(request , project_id):
urlreqs.append( instancia.medlineEfetchRAW( yearquery ) ) urlreqs.append( instancia.medlineEfetchRAW( yearquery ) )
alist = ["tudo fixe" , "tudo bem"] alist = ["tudo fixe" , "tudo bem"]
resourcetype = cache.ResourceType["pubmed"] resourcetype = cache.ResourceType["Pubmed (xml format)"]
# corpus node instanciation as a Django model # corpus node instanciation as a Django model
corpus = Node( corpus = Node(
...@@ -129,6 +128,7 @@ def doTheQuery(request , project_id): ...@@ -129,6 +128,7 @@ def doTheQuery(request , project_id):
parent_id = project_id, parent_id = project_id,
type_id = cache.NodeType['Corpus'].id, type_id = cache.NodeType['Corpus'].id,
language_id = None, language_id = None,
metadata = {'Processing' : 1,}
) )
session.add(corpus) session.add(corpus)
session.commit() session.commit()
...@@ -165,18 +165,15 @@ def doTheQuery(request , project_id): ...@@ -165,18 +165,15 @@ def doTheQuery(request , project_id):
if dwnldsOK == 0: return JsonHttpResponse(["fail"]) if dwnldsOK == 0: return JsonHttpResponse(["fail"])
try: try:
def apply_workflow(corpus): if not DEBUG:
parse_resources(corpus) apply_workflow.apply_async((corpus.id,),)
extract_ngrams(corpus, ['title'])
compute_tfidf(corpus)
if DEBUG:
apply_workflow(corpus)
else: else:
thread = threading.Thread(target=apply_workflow, args=(corpus, ), daemon=True) thread = threading.Thread(target=apply_workflow, args=(corpus.id, ), daemon=True)
thread.start() thread.start()
except Exception as error: except Exception as error:
print('WORKFLOW ERROR') print('WORKFLOW ERROR')
print(error) print(error)
sleep(1)
return HttpResponseRedirect('/project/' + str(project_id)) return HttpResponseRedirect('/project/' + str(project_id))
data = alist data = alist
......
...@@ -83,11 +83,12 @@ ...@@ -83,11 +83,12 @@
<li>{{ key }}</li> <li>{{ key }}</li>
<ul> <ul>
{% for corpus in corpora %} {% for corpus in corpora %}
<li> {% ifnotequal corpus.count 0 %} <li>
<a href="/project/{{project.id}}/corpus/{{corpus.id}}"> {{corpus.name}} </a> , {{ corpus.count }} Documents {% ifequal corpus.processing 1 %}
{% else %}
{{corpus.name}} : <img width="20px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img> Processing, drink a cup of tea, and refresh the page :) {{corpus.name}} : <img width="20px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img> Processing, drink a cup of tea, and refresh the page :)
{% endifnotequal %} {% else %}
<a href="/project/{{project.id}}/corpus/{{corpus.id}}"> {{corpus.name}} </a> , {{ corpus.count }} Documents
{% endifequal %}
<button type="button" class="btn btn-xs btn-default" data-container="body" data-toggle="popover" data-placement="bottom" <button type="button" class="btn btn-xs btn-default" data-container="body" data-toggle="popover" data-placement="bottom"
data-content=' data-content='
<ul> <ul>
...@@ -321,7 +322,7 @@ ...@@ -321,7 +322,7 @@
console.log("theType:") console.log("theType:")
console.log(theType) console.log(theType)
if(theType=="pubmed") { if(theType=="Pubmed (xml format)") {
$.ajax({ $.ajax({
// contentType: "application/json", // contentType: "application/json",
url: window.location.origin+"/tests/pubmedquery", url: window.location.origin+"/tests/pubmedquery",
...@@ -336,7 +337,7 @@ ...@@ -336,7 +337,7 @@
console.log("enabling "+"#"+value.id) console.log("enabling "+"#"+value.id)
$("#"+value.id).attr('onclick','getGlobalResults(this);'); $("#"+value.id).attr('onclick','getGlobalResults(this);');
// $("#submit_thing").prop('disabled' , false) // $("#submit_thing").prop('disabled' , false)
$("#submit_thing").html("Process a 1000 sample!") $("#submit_thing").html("Process a 100 sample!")
thequeries = data thequeries = data
var N=0,k=0; var N=0,k=0;
...@@ -425,7 +426,7 @@ ...@@ -425,7 +426,7 @@
$( "#id_name" ).on('input',function(e){ $( "#id_name" ).on('input',function(e){
console.log($(this).val()) console.log($(this).val())
if(theType=="pubmed") testPUBMED( $(this).val() ) if(theType=="Pubmed (xml format)") testPUBMED( $(this).val() )
}); });
} }
} }
...@@ -433,7 +434,7 @@ ...@@ -433,7 +434,7 @@
//CSS events for changing the Select element //CSS events for changing the Select element
function CustomForSelect( selected ) { function CustomForSelect( selected ) {
// show Radio-Inputs and trigger FileOrNotFile>@upload-file events // show Radio-Inputs and trigger FileOrNotFile>@upload-file events
if(selected=="pubmed" || selected=="istex") { if(selected=="Pubmed (xml format)" || selected=="istext") {
// if(selected=="pubmed") { // if(selected=="pubmed") {
console.log("show the button for: "+selected) console.log("show the button for: "+selected)
$("#pubmedcrawl").css("visibility", "visible"); $("#pubmedcrawl").css("visibility", "visible");
......
...@@ -19,18 +19,21 @@ ...@@ -19,18 +19,21 @@
{% if documents %} {% if documents %}
<div id="delAll" style="visibility: hidden;">
<button onclick="deleteDuplicates(theurl);">Delete Duplicates</button>
</div>
<ul> <ul>
{% for doc in documents %} {% for doc in documents %}
{% if doc.date %} {% if doc.date %}
<li><div id="doc_{{doc.id}}"> <b>{{ doc.date }}</b>: <a target="_blank" href="/nodeinfo/{{doc.id}}">{{ doc.name}}</a> , @ {{ doc.metadata.source}}</div></li> <li><div id="doc_{{doc.id}}"> <b>{{ doc.date }}</b>: <a target="_blank" href="/nodeinfo/{{doc.id}}">{{ doc.name}}</a> , @ {{ doc.metadata.source}}</div></li>
{% endif %} {% endif %}
{% endfor %} {% endfor %}
<div id="delAll" style="visibility: hidden;">
<center>
<button onclick="deleteDuplicates(theurl);">Delete all Duplicates in one click</button>
</center>
</div>
</ul> </ul>
<script> <script>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment