Commit 8aa028fe authored by Administrator's avatar Administrator

[MERGE] from unstable to testing.

parents 376b1304 26261382
import networkx as nx
from itertools import combinations
class Utils:
def __init__(self):
self.G = nx.Graph()
def unique(self,a):
""" return the list with duplicate elements removed """
return list(set(a))
def intersect(self,a, b):
""" return the intersection of two lists """
return list(set(a) & set(b))
def union(self,a, b):
""" return the union of two lists """
return list(set(a) | set(b))
def addCompleteSubGraph(self,terms):
G=self.G
# <addnode> #
for i in terms:
G.add_node(i)
# </addnode> #
# <addedge> #
edges = combinations(terms, 2)
for n in edges:
n1=n[0]
n2=n[1]
one=float(1)
if G.has_edge(n1,n2):
G[n1][n2]['weight']+=one
else: G.add_edge(n1,n2,weight=one)
self.G = G
\ No newline at end of file
This diff is collapsed.
......@@ -60,6 +60,7 @@ def tfidf(corpus, document, ngram):
.filter(NodeNgram.ngram_id == ngram.id)\
.count()
# print("\t\t\t","occs:",occurrences_of_ngram," || ngramsbydoc:",ngrams_by_document," || TF = occ/ngramsbydoc:",term_frequency," |||||| x:",xx," || y:",yy," || IDF = log(x/y):",log(xx/yy))
inverse_document_frequency= log(xx/yy)
# result = tf * idf
......
from django.http import HttpResponseNotFound, HttpResponse, Http404
from django.core.exceptions import PermissionDenied, SuspiciousOperation
from django.core.exceptions import ValidationError
from django.core.urlresolvers import reverse
from django.db.models import Avg, Max, Min, Count, Sum
# from node.models import Language, ResourceType, Resource
......@@ -10,8 +11,9 @@ from sqlalchemy import text, distinct
from sqlalchemy.sql import func
from sqlalchemy.orm import aliased
from gargantext_web.views import move_to_trash
from .db import *
from node import models
def DebugHttpResponse(data):
return HttpResponse('<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), ))
......@@ -45,10 +47,14 @@ _ngrams_order_columns = {
}
from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from rest_framework.permissions import IsAuthenticated
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework.exceptions import APIException as _APIException
class APIException(_APIException):
def __init__(self, message, code=500):
self.status_code = code
......@@ -82,7 +88,7 @@ class NodesChildrenNgrams(APIView):
def get(self, request, node_id):
# query ngrams
ParentNode = aliased(Node)
ngrams_query = (Ngram
ngrams_query = (session
.query(Ngram.terms, func.count().label('count'))
# .query(Ngram.id, Ngram.terms, func.count().label('count'))
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
......@@ -128,7 +134,7 @@ class NodesChildrenDuplicates(APIView):
raise APIException('Missing GET parameter: "keys"', 400)
keys = request.GET['keys'].split(',')
# metadata retrieval
metadata_query = (Metadata
metadata_query = (session
.query(Metadata)
.filter(Metadata.name.in_(keys))
)
......@@ -187,6 +193,7 @@ class NodesChildrenDuplicates(APIView):
# get the minimum ID for each of the nodes sharing the same metadata
kept_node_ids_query = self._fetch_duplicates(request, node_id, [func.min(Node.id).label('id')], 0)
kept_node_ids = [kept_node.id for kept_node in kept_node_ids_query]
# TODO with new orm
duplicate_nodes = models.Node.objects.filter( parent_id=node_id ).exclude(id__in=kept_node_ids)
# # delete the stuff
# delete_query = (session
......@@ -197,7 +204,7 @@ class NodesChildrenDuplicates(APIView):
count = len(duplicate_nodes)
for node in duplicate_nodes:
print("deleting node ",node.id)
node.delete()
move_to_trash(node.id)
# print(delete_query)
# # delete_query.delete(synchronize_session=True)
# session.flush()
......@@ -213,7 +220,7 @@ class NodesChildrenMetatadata(APIView):
# query metadata keys
ParentNode = aliased(Node)
metadata_query = (Metadata
metadata_query = (session
.query(Metadata)
.join(Node_Metadata, Node_Metadata.metadata_id == Metadata.id)
.join(Node, Node.id == Node_Metadata.node_id)
......@@ -233,7 +240,7 @@ class NodesChildrenMetatadata(APIView):
values_to = None
if metadata.type != 'text':
value_column = getattr(Node_Metadata, 'value_' + metadata.type)
node_metadata_query = (Node_Metadata
node_metadata_query = (session
.query(value_column)
.join(Node, Node.id == Node_Metadata.node_id)
.filter(Node.parent_id == node_id)
......@@ -381,9 +388,9 @@ class NodesChildrenQueries(APIView):
for field_name in fields_names:
split_field_name = field_name.split('.')
if split_field_name[0] == 'metadata':
metadata = Metadata.query(Metadata).filter(Metadata.name == split_field_name[1]).first()
metadata = session.query(Metadata).filter(Metadata.name == split_field_name[1]).first()
if metadata is None:
metadata_query = Metadata.query(Metadata.name).order_by(Metadata.name)
metadata_query = session.query(Metadata.name).order_by(Metadata.name)
metadata_names = [metadata.name for metadata in metadata_query.all()]
raise APIException('Invalid key for "%s" in parameter "field", should be one of the following values: "%s". "%s" was found instead' % (field[0], '", "'.join(metadata_names), field[1]), 400)
# check or create Node_Metadata alias; join if necessary
......@@ -422,7 +429,7 @@ class NodesChildrenQueries(APIView):
)
# starting the query!
document_type_id = NodeType.query(NodeType.id).filter(NodeType.name == 'Document').scalar()
document_type_id = cache.NodeType['Document'].id ##session.query(NodeType.id).filter(NodeType.name == 'Document').scalar()
query = (session
.query(*fields_list)
.select_from(Node)
......@@ -451,9 +458,9 @@ class NodesChildrenQueries(APIView):
#
if field[0] == 'metadata':
# which metadata?
metadata = Metadata.query(Metadata).filter(Metadata.name == field[1]).first()
metadata = session.query(Metadata).filter(Metadata.name == field[1]).first()
if metadata is None:
metadata_query = Metadata.query(Metadata.name).order_by(Metadata.name)
metadata_query = session.query(Metadata.name).order_by(Metadata.name)
metadata_names = [metadata.name for metadata in metadata_query.all()]
raise APIException('Invalid key for "%s" in parameter "field", should be one of the following values: "%s". "%s" was found instead' % (field[0], '", "'.join(metadata_names), field[1]), 400)
# check or create Node_Metadata alias; join if necessary
......@@ -475,7 +482,7 @@ class NodesChildrenQueries(APIView):
))
elif field[0] == 'ngrams':
query = query.filter(
Node.id.in_(Node_Metadata
Node.id.in_(session
.query(Node_Ngram.node_id)
.filter(Node_Ngram.ngram_id == Ngram.id)
.filter(operator(
......@@ -549,11 +556,13 @@ class NodesChildrenQueries(APIView):
class NodesList(APIView):
authentication_classes = (SessionAuthentication, BasicAuthentication)
def get(self, request):
query = (Node
print("user id : " + str(request.user))
query = (session
.query(Node.id, Node.name, NodeType.name.label('type'))
.filter(Node.user_id == request.session._session_cache['_auth_user_id'])
.filter(Node.user_id == int(request.user.id))
.join(NodeType)
)
if 'type' in request.GET:
......@@ -576,8 +585,11 @@ class Nodes(APIView):
return JsonHttpResponse({
'id': node.id,
'name': node.name,
'parent_id': node.parent_id,
'type': cache.NodeType[node.type_id].name,
# 'type': node.type__name,
'metadata': dict(node.metadata),
#'metadata': dict(node.metadata),
'metadata': node.metadata,
})
# deleting node by id
......@@ -585,13 +597,19 @@ class Nodes(APIView):
# it should take the subnodes into account as well,
# for better constistency...
def delete(self, request, node_id):
node = models.Node.objects.filter(id = node_id)
msgres = ""
user = request.user
node = session.query(Node).filter(Node.id == node_id).first()
msgres = str()
try:
node.delete()
msgres = node_id+" deleted!"
except:
msgres ="error deleting: "+node_id
move_to_trash(node_id)
msgres = node_id+" moved to Trash"
except Exception as error:
msgres ="error deleting : " + node_id + str(error)
return JsonHttpResponse({
'deleted': msgres,
......@@ -605,9 +623,9 @@ class CorpusController:
corpus_id = int(corpus_id)
except:
raise ValidationError('Corpora are identified by an integer.', 400)
corpusQuery = Node.objects.filter(id = corpus_id)
corpusQuery = session.query(Node).filter(Node.id == corpus_id).first()
# print(str(corpusQuery))
# raise Http404("C'est toujours ça de pris.")
# raise Http404("404 error.")
if not corpusQuery:
raise Http404("No such corpus: %d" % (corpus_id, ))
corpus = corpusQuery.first()
......@@ -626,7 +644,7 @@ class CorpusController:
# build query
ParentNode = aliased(Node)
query = (Ngram
query = (session
.query(Ngram.terms, func.count('*'))
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
.join(Node, Node.id == Node_Ngram.node_id)
......
from node import models
from gargantext_web import settings
from node import models
__all__ = ['literalquery', 'session', 'cache', 'Session', 'bulk_insert', 'engine', 'get_cursor']
# initialize sqlalchemy
from sqlalchemy.orm import Session, mapper
from sqlalchemy.ext.automap import automap_base
from sqlalchemy import create_engine, MetaData, Table, Column, ForeignKey
from sqlalchemy.types import Integer, String, DateTime
from sqlalchemy.dialects.postgresql import JSON
__all__ = ['literalquery', 'session', 'cache']
engine = create_engine('postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}/{NAME}'.format(
**settings.DATABASES['default']
))
Base = automap_base()
Base.prepare(engine, reflect=True)
# model representation
def model_repr(modelname):
def _repr(obj):
result = '<' + modelname
isfirst = True
for key, value in obj.__dict__.items():
if key[0] != '_':
value = repr(value)
if len(value) > 64:
value = value[:30] + '....' + value[-30:]
if isfirst:
isfirst = False
else:
result += ','
result += ' ' + key + '=' + value
result += '>'
return result
return _repr
# map the Django models found in node.models to SQLAlchemy models
for model_name, model in models.__dict__.items():
if hasattr(model, 'sa'):
globals()[model_name] = model.sa
__all__.append(model_name)
if hasattr(model, '_meta') :
table_name = model._meta.db_table
if hasattr(Base.classes, table_name):
sqla_model = getattr(Base.classes, table_name)
setattr(sqla_model, '__repr__', model_repr(model_name))
globals()[model_name] = sqla_model
__all__.append(model_name)
NodeNgram = Node_Ngram
NodeResource = Node_Resource
# debugging tool, to translate SQLAlchemy queries to string
......@@ -61,16 +103,17 @@ def literalquery(statement, dialect=None):
# SQLAlchemy session management
def get_sessionmaker():
from django.db import connections
from sqlalchemy.orm import sessionmaker
def get_engine():
from sqlalchemy import create_engine
alias = 'default'
connection = connections[alias]
url = 'postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}/{NAME}'.format(
**settings.DATABASES['default']
)
engine = create_engine(url, use_native_hstore=True)
return create_engine(url, use_native_hstore=True)
engine = get_engine()
def get_sessionmaker():
from sqlalchemy.orm import sessionmaker
return sessionmaker(bind=engine)
Session = get_sessionmaker()
......@@ -84,7 +127,7 @@ from sqlalchemy import or_
class ModelCache(dict):
def __init__(self, model, preload=False):
self._model = model.sa
self._model = globals()[model.__name__]
self._columns_names = [column.name for column in model._meta.fields if column.unique]
self._columns = [getattr(self._model, column_name) for column_name in self._columns_names]
self._columns_validators = []
......@@ -92,20 +135,16 @@ class ModelCache(dict):
self.preload()
def __missing__(self, key):
for column in self._columns:
conditions = []
try:
formatted_key = column.type.python_type(key)
conditions.append(column == key)
except ValueError:
pass
if formatted_key in self:
self[key] = self[formatted_key]
else:
element = session.query(self._model).filter(or_(*conditions)).first()
if element is None:
raise KeyError
self[key] = element
#print(key)
conditions = [
(column == str(key))
for column in self._columns
if column.type.python_type == str or key.__class__ == column.type.python_type
]
element = session.query(self._model).filter(or_(*conditions)).first()
if element is None:
raise KeyError
self[key] = element
return element
def preload(self):
......@@ -115,7 +154,7 @@ class ModelCache(dict):
key = getattr(element, column_name)
self[key] = element
class Cache:
class Cache():
def __getattr__(self, key):
try:
......@@ -127,3 +166,50 @@ class Cache:
return modelcache
cache = Cache()
# Insert many elements at once
import psycopg2
def get_cursor():
db_settings = settings.DATABASES['default']
db = psycopg2.connect(**{
'database': db_settings['NAME'],
'user': db_settings['USER'],
'password': db_settings['PASSWORD'],
'host': db_settings['HOST'],
})
return db, db.cursor()
class bulk_insert:
def __init__(self, table, keys, data, cursor=None):
# prepare the iterator
self.iter = iter(data)
# template
self.template = '%s' + (len(keys) - 1) * '\t%s' + '\n'
# prepare the cursor
if cursor is None:
db, cursor = get_cursor()
mustcommit = True
else:
mustcommit = False
# insert data
if not isinstance(table, str):
table = table.__table__.name
cursor.copy_from(self, table, columns=keys)
# commit if necessary
if mustcommit:
db.commit()
def read(self, size=None):
try:
return self.template % tuple(
str(x).replace('\r', '').replace('\n', '\\n').replace('\t', '\\t') for x in next(self.iter)
)
except StopIteration:
return ''
readline = read
import random
import random_words
from math import pi
def paragraph_lorem(size_target=450):
'''
Function that returns paragraph with false latin language.
size_target is the number of random words that will be given.
'''
lorem = random_words.LoremIpsum()
sentences_list = lorem.get_sentences_list(sentences=5)
paragraph_size = 0
while paragraph_size < size_target :
sentences_list.append(lorem.get_sentence())
paragraph = ' '.join(sentences_list)
paragraph_size = len(paragraph)
return(paragraph)
def paragraph_gargantua(size_target=500):
'''
Function that returns paragraph with chapter titles of Gargantua.
size_target is the number of random words that will be given.
'''
paragraph = list()
paragraph_size = 0
chapter_number = 1
while paragraph_size < size_target and chapter_number < 6:
chapitre = open('/srv/gargantext/static/docs/gargantua_book/gargantua_chapter_' + str(chapter_number) + '.txt', 'r')
paragraph.append(random.choice(chapitre.readlines()).strip())
chapitre.close()
paragraph_size = len(' '.join(paragraph))
chapter_number += 1
return(' '.join(paragraph))
def random_letter(mot, size_min=5):
'''
Functions that randomize order letters of a
word which size is greater that size_min.
'''
if len(mot) > size_min:
size = round(len(mot) / pi)
first_letters = mot[:size]
last_letters = mot[-size:]
others_letters = list(mot[size:-size])
random.shuffle(others_letters)
mot_list = list()
mot_list.append(first_letters)
for letter in others_letters:
mot_list.append(letter)
mot_list.append(last_letters)
return(''.join(mot_list))
else:
return(mot)
tutoriel = """Il paraît que l'ordre des lettres dans un mot n'a pas d'importance. La première et la dernière lettre doivent être à la bonne place. Le reste peut être dans un désordre total et on peut toujours lire sans problème. On ne lit donc pas chaque lettre en elle-même, mais le mot comme un tout. Un changement de référentiel et nous transposons ce résultat au texte lui-même: l'ordre des mots est faiblement important comparé au contexte du texte qui, lui, est compté"""
def paragraph_tutoreil(tutoriel=tutoriel):
'''
Functions that returns paragraph of words with words with
randomized letters.
'''
paragraph = ' '.join([ random_letter(mot) for mot in tutoriel.split(" ")]) \
+ ": comptexter avec Gargantext."
return(paragraph)
......@@ -63,12 +63,11 @@ INSTALLED_APPS = (
'django.contrib.messages',
'django.contrib.staticfiles',
'django_extensions',
'south',
'django_pg',
'cte_tree',
'node',
'ngram',
'scrap_pubmed',
'django_hstore',
'djcelery',
'aldjemy',
'rest_framework',
......@@ -83,6 +82,16 @@ MIDDLEWARE_CLASSES = (
'django.middleware.clickjacking.XFrameOptionsMiddleware',
)
REST_SESSION_LOGIN = False
REST_FRAMEWORK = {
'DEFAULT_AUTHENTICATION_CLASSES': (
'rest_framework.authentication.TokenAuthentication',
'rest_framework.authentication.SessionAuthentication',
),
'DEFAULT_PERMISSION_CLASSES': (
'rest_framework.permissions.AllowAny',
),
}
WSGI_APPLICATION = 'wsgi.application'
......
......@@ -3,7 +3,7 @@ from django.conf.urls import patterns, include, url
from django.contrib import admin
from django.contrib.auth.views import login
from gargantext_web import views
from gargantext_web import views, views_optimized
import gargantext_web.api
import scrap_pubmed.views as pubmedscrapper
......@@ -20,22 +20,23 @@ urlpatterns = patterns('',
url(r'^auth/$', views.login_user),
url(r'^auth/logout/$', views.logout_user),
# Dynamic CSS
url(r'^img/logo.svg$', views.logo),
url(r'^css/bootstrap.css$', views.css),
# User Home view
url(r'^$', views.home),
url(r'^$', views.home_view),
url(r'^about/', views.get_about),
url(r'^maintenance/', views.get_maintenance),
# Project Management
url(r'^projects/$', views.projects),
url(r'^project/(\d+)/delete/$', views.delete_project),
url(r'^project/(\d+)/$', views.project),
url(r'^project/(\d+)/$', views_optimized.project),
url(r'^delete/(\d+)$', views.delete_node), # => api.node('id' = id, children = 'True', copies = False)
# Corpus management
url(r'^project/(\d+)/corpus/(\d+)/$', views.corpus),
url(r'^project/(\d+)/corpus/(\d+)/delete/$', views.delete_corpus),
url(r'^project/(\d+)/corpus/(\d+)/corpus.csv$', views.corpus_csv),
url(r'^project/(\d+)/corpus/(tests_mvc_listdocuments+)/corpus.tests_mvc_listdocuments$', views.corpus_csv),
......@@ -47,16 +48,19 @@ urlpatterns = patterns('',
url(r'^project/(\d+)/corpus/(\d+)/matrix$', views.matrix),
# Data management
url(r'^chart/corpus/(\d+)/data.csv$', views.send_csv),
url(r'^corpus/(\d+)/node_link.json$', views.node_link),
url(r'^corpus/(\d+)/adjacency.json$', views.adjacency),
url(r'^api/tfidf/(\d+)/(\w+)$', views.tfidf),
url(r'^chart/corpus/(\d+)/data.csv$', views.send_csv), # => api.node.children('type' : 'data', 'format' : 'csv')
url(r'^corpus/(\d+)/node_link.json$', views.node_link), # => api.analysis('type': 'node_link', 'format' : 'json')
url(r'^corpus/(\d+)/adjacency.json$', views.adjacency), # => api.analysis('type': 'adjacency', 'format' : 'json')
url(r'^api/tfidf/(\d+)/(\w+)$', views_optimized.tfidf),
# url(r'^api/tfidf/(\d+)/(\w+)$', views.tfidf),
url(r'^api/tfidf2/(\d+)/(\w+)$', views.tfidf2),
# Data management
url(r'^api$', gargantext_web.api.Root),
#url(r'^api$', gargantext_web.api.Root), # = ?
url(r'^api/nodes$', gargantext_web.api.NodesList.as_view()),
url(r'^api/nodes/(\d+)$', gargantext_web.api.Nodes.as_view()),
url(r'^api/nodes/(\d+)/children/ngrams$', gargantext_web.api.NodesChildrenNgrams.as_view()),
url(r'^api/nodes/(\d+)/children/ngrams$', gargantext_web.api.NodesChildrenNgrams.as_view()), # => repeated children ?
url(r'^api/nodes/(\d+)/children/metadata$', gargantext_web.api.NodesChildrenMetatadata.as_view()),
url(r'^api/nodes/(\d+)/children/queries$', gargantext_web.api.NodesChildrenQueries.as_view()),
url(r'^api/nodes/(\d+)/children/duplicates$', gargantext_web.api.NodesChildrenDuplicates.as_view()),
......@@ -66,12 +70,13 @@ urlpatterns = patterns('',
url(r'^api/nodes/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),
url(r'^ngrams$', views.ngrams),
url(r'^nodeinfo/(\d+)$', views.nodeinfo),
# Provisory tests
url(r'^ngrams$', views.ngrams), # to be removed
url(r'^nodeinfo/(\d+)$', views.nodeinfo), # to be removed ?
url(r'^tests/mvc$', views.tests_mvc),
url(r'^tests/mvc-listdocuments$', views.tests_mvc_listdocuments),
url(r'^tests/istextquery$', pubmedscrapper.getGlobalStatsISTEXT),
url(r'^tests/istextquery$', pubmedscrapper.getGlobalStatsISTEXT), # api/query?type=istext ?
url(r'^tests/pubmedquery$', pubmedscrapper.getGlobalStats),
url(r'^tests/project/(\d+)/pubmedquery/go$', pubmedscrapper.doTheQuery),
url(r'^tests/project/(\d+)/ISTEXquery/go$', pubmedscrapper.testISTEX)
......@@ -90,3 +95,15 @@ if settings.DEBUG:
}),
)
if settings.MAINTENANCE:
urlpatterns = patterns('',
url(r'^img/logo.svg$', views.logo),
url(r'^css/bootstrap.css$', views.css),
url(r'^$', views.home_view),
url(r'^about/', views.get_about),
url(r'^.*', views.get_maintenance),
)
This diff is collapsed.
from django.shortcuts import redirect
from django.shortcuts import render
from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseForbidden
from sqlalchemy import func, and_, or_
from sqlalchemy.orm import aliased
from collections import defaultdict
from datetime import datetime
from threading import Thread
from node.admin import CustomForm
from gargantext_web.db import *
from gargantext_web.settings import DEBUG, MEDIA_ROOT
from gargantext_web.api import JsonHttpResponse
import json
from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf
def project(request, project_id):
# SQLAlchemy session
session = Session()
# do we have a valid project id?
try:
project_id = int(project_id)
except ValueError:
raise Http404()
# do we have a valid project?
project = (session
.query(Node)
.filter(Node.id == project_id)
.filter(Node.type_id == cache.NodeType['Project'].id)
).first()
if project is None:
raise Http404()
# do we have a valid user?
user = request.user
if not user.is_authenticated():
return redirect('/login/?next=%s' % request.path)
if project.user_id != user.id:
return HttpResponseForbidden()
# Let's find out about the children nodes of the project
ChildrenNode = aliased(Node)
# This query is giving you the wrong number of docs from the pubmedquerier (x 5)
# ... sqlalchemy.func by Resource.type_id is the guilty
# ISSUE L51
corpus_query = (session
.query(Node.id, Node.name, func.count(ChildrenNode.id))
#.query(Node.id, Node.name, Resource.type_id, func.count(ChildrenNode.id))
#.join(Node_Resource, Node_Resource.node_id == Node.id)
#.join(Resource, Resource.id == Node_Resource.resource_id)
.filter(Node.parent_id == project.id)
.filter(Node.type_id == cache.NodeType['Corpus'].id)
.filter(and_(ChildrenNode.parent_id == Node.id, ChildrenNode.type_id == cache.NodeType['Document'].id))
.group_by(Node.id, Node.name)
.order_by(Node.name)
.all()
)
corpora_by_resourcetype = defaultdict(list)
documents_count_by_resourcetype = defaultdict(int)
corpora_count = 0
corpusID_dict = {}
for corpus_id, corpus_name, document_count in corpus_query:
# Not optimized GOTO ISSUE L51
resource_type_id = (session.query(Resource.type_id)
.join(Node_Resource, Node_Resource.resource_id == Resource.id)
.join(Node, Node.id == Node_Resource.node_id )
.filter(Node.id==corpus_id)
.first())[0]
if not corpus_id in corpusID_dict:
if resource_type_id is None:
resourcetype_name = '(no resource)'
else:
resourcetype = cache.ResourceType[resource_type_id]
resourcetype_name = resourcetype.name
corpora_by_resourcetype[resourcetype_name].append({
'id': corpus_id,
'name': corpus_name,
'count': document_count,
})
documents_count_by_resourcetype[resourcetype_name] += document_count
corpora_count += 1
corpusID_dict[corpus_id]=True
# do the donut
total_documents_count = sum(documents_count_by_resourcetype.values())
donut = [
{ 'source': key,
'count': value,
'part' : round(value * 100 / total_documents_count) if total_documents_count else 0,
}
for key, value in documents_count_by_resourcetype.items()
]
# deal with the form
if request.method == 'POST':
# form validation
form = CustomForm(request.POST, request.FILES)
if form.is_valid():
# extract information from the form
name = form.cleaned_data['name']
thefile = form.cleaned_data['file']
resourcetype = cache.ResourceType[form.cleaned_data['type']]
# which default language shall be used?
if resourcetype.name == "europress_french":
language_id = cache.Language['fr'].id
elif resourcetype.name == "europress_english":
language_id = cache.Language['en'].id
else:
language_id = None
# corpus node instanciation as a Django model
corpus = Node(
name = name,
user_id = request.user.id,
parent_id = project_id,
type_id = cache.NodeType['Corpus'].id,
language_id = language_id,
)
session.add(corpus)
session.commit()
# save the uploaded file
filepath = '%s/corpora/%s/%s' % (MEDIA_ROOT, request.user.username, thefile._name)
f = open(filepath, 'wb')
f.write(thefile.read())
f.close()
# add the uploaded resource to the corpus
add_resource(corpus,
user_id = request.user.id,
type_id = resourcetype.id,
file = filepath,
)
# let's start the workflow
try:
def apply_workflow(corpus):
parse_resources(corpus)
extract_ngrams(corpus, ['title'])
compute_tfidf(corpus)
if DEBUG:
apply_workflow(corpus)
else:
thread = Thread(target=apply_workflow, args=(corpus, ), daemon=True)
thread.start()
except Exception as error:
print('WORKFLOW ERROR')
print(error)
# redirect to the main project page
return HttpResponseRedirect('/project/' + str(project_id))
else:
print('ERROR: BAD FORM')
else:
form = CustomForm()
# HTML output
return render(request, 'project.html', {
'form' : form,
'user' : user,
'date' : datetime.now(),
'project' : project,
'donut' : donut,
'list_corpora' : dict(corpora_by_resourcetype),
'whitelists' : '',
'blacklists' : '',
'cooclists' : '',
'number' : corpora_count,
})
def tfidf(request, corpus_id, ngram_ids):
"""Takes IDs of corpus and ngram and returns list of relevent documents in json format
according to TFIDF score (order is decreasing).
"""
limit=6
nodes_list = []
# filter input
ngram_ids = ngram_ids.split('a')
ngram_ids = [int(i) for i in ngram_ids]
# request data
nodes_query = (session
.query(Node, func.sum(NodeNodeNgram.score))
.join(NodeNodeNgram, NodeNodeNgram.nodey_id == Node.id)
.filter(NodeNodeNgram.nodex_id == corpus_id)
.filter(NodeNodeNgram.ngram_id.in_(ngram_ids))
.group_by(Node)
.order_by(func.sum(NodeNodeNgram.score).desc())
.limit(limit)
)
# convert query result to a list of dicts
for node, score in nodes_query:
node_dict = {
'id': node.id,
'score': score,
}
for key in ('title', 'publication_date', 'journal', 'authors', 'fields'):
if key in node.metadata:
node_dict[key] = node.metadata[key]
nodes_list.append(node_dict)
data = json.dumps(nodes_list)
return JsonHttpResponse(data)
No preview for this file type
ALTER TABLE ONLY node_node ALTER COLUMN date SET DEFAULT CURRENT_DATE ;
ALTER TABLE ONLY node_node ALTER COLUMN metadata DROP NOT NULL ;
ALTER TABLE ONLY node_node ALTER COLUMN metadata DROP DEFAULT ;
ALTER TABLE ONLY node_node ALTER COLUMN metadata TYPE JSONB USING hstore_to_json(metadata)::jsonb ;
ALTER TABLE ONLY node_node ALTER COLUMN metadata SET DEFAULT '{}'::jsonb ;
ALTER TABLE ONLY node_node ALTER COLUMN metadata SET NOT NULL ;
......@@ -7,7 +7,7 @@
#NodeType.objects.all().delete()
from node.models import Node, NodeType, Project, Corpus, Document, Ngram, Node_Ngram, User, Language, ResourceType
from node.models import *
import pycountry
......@@ -31,14 +31,8 @@ except:
me = User(username='pksm3')
me.save()
try:
typeProject = NodeType.objects.get(name='Root')
except Exception as error:
print(error)
typeProject = NodeType(name='Root')
typeProject.save()
for node_type in ['Trash', 'Root', ]:
NodeType.objects.get_or_create(name=node_type)
try:
typeProject = NodeType.objects.get(name='Project')
......@@ -141,13 +135,7 @@ except Exception as error:
#Node.objects.all().delete()
# In[9]:
try:
project = Node.objects.get(name='Bees project')
except:
project = Node(name='Bees project', type=typeProject, user=me)
project.save()
try:
stem = Node.objects.get(name='Stem')
......@@ -158,3 +146,17 @@ except:
from gargantext_web.db import *
# Instantiante table NgramTag:
f = open("part_of_speech_labels.txt", 'r')
for line in f.readlines():
name, description = line.strip().split('\t')
_tag = Tag(name=name, description=description)
session.add(_tag)
session.commit()
f.close()
......@@ -4,6 +4,11 @@ psql -d gargandb -f init.sql
sleep 2
../manage.py syncdb
psql -d gargandb -f init2.sql
sleep 2
../manage.py shell < init.py
#../manage.py shell < init.py
../manage.py shell < init_gargantext.py
#psql -d gargandb -f hstore2jsonb.sql
# Without this, we couldn't use the Django environment
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")
# We're gonna use all the models!
# Django models
from node import models
# SQLA models
from gargantext_web.db import *
# Reset: all data
#
#tables_to_empty = [
# Node,
# Node_Metadata,
# Metadata,
# NodeType,
# ResourceType,
# Resource,
#]
#for table in tables_to_empty:
# print('Empty table "%s"...' % (table._meta.db_table, ))
# table.objects.all().delete()
# Integration: metadata types
print('Initialize metadata...')
metadata = {
'publication_date': 'datetime',
'authors': 'string',
'language_fullname': 'string',
'abstract': 'text',
'title': 'string',
'source': 'string',
'volume': 'string',
'text': 'text',
'page': 'string',
'doi': 'string',
'journal': 'string',
}
for name, type in metadata.items():
models.Metadata(name=name, type=type).save()
# Integration: languages
print('Initialize languages...')
import pycountry
Language.objects.all().delete()
for language in pycountry.languages:
if 'alpha2' in language.__dict__:
Language(
iso2 = language.alpha2,
iso3 = language.bibliographic,
fullname = language.name,
implemented = 1 if language.alpha2 in ['en', 'fr'] else 0,
).save()
english = Language.objects.get(iso2='en')
french = Language.objects.get(iso2='fr')
# Integration: users
print('Initialize users...')
me = models.User.objects.get_or_create(username='alexandre')
gargantua = models.User.objects.get_or_create(username='gargantua')
node_root = Node(user_id=gargantua.id, type_id=cache.NodeType['Root'].id, name='Root')
node_stem = Node(user_id=gargantua.id, type_id=cache.NodeType['Stem'].id, name='Stem', parent_id=node_root.id)
node_lem = Node(user_id=gargantua.id, type_id=cache.NodeType['Lem'].id, name='Lem', parent_id=node_root.id)
session.add(node_root)
session.add(node_stem)
session.add(node_lem)
session.commit()
# Integration: node types
print('Initialize node types...')
node_types = [
'Root', 'Trash',
'Project', 'Corpus', 'Document',
'Stem', 'Lem', 'Tfidf',
'Synonym',
'MiamList', 'StopList',
'Cooccurrence', 'WhiteList', 'BlackList'
]
for node_type in node_types:
models.NodeType.objects.get_or_create(name=node_type)
# Integration: resource types
print('Initialize resource...')
resources = [
'pubmed', 'isi', 'ris', 'europress_french', 'europress_english']
for resource in resources:
models.ResourceType.objects.get_or_create(name=resource)
# TODO
# here some tests
# add a new project and some corpora to test it
# Integration: project
#
#print('Initialize project...')
#try:
# project = Node.objects.get(name='Bees project')
#except:
# project = Node(name='Bees project', type=typeProject, user=me)
# project.save()
#
# Integration: corpus
#print('Initialize corpus...')
#try:
# corpus_pubmed = Node.objects.get(name='PubMed corpus')
#except:
# corpus_pubmed = Node(parent=project, name='PubMed corpus', type=typeCorpus, user=me)
# corpus_pubmed.save()
#
#print('Initialize resource...')
#corpus_pubmed.add_resource(
# # file='./data_samples/pubmed.zip',
# #file='./data_samples/pubmed_2013-04-01_HoneyBeesBeeBees.xml',
# file='/srv/gargantext_lib/data_samples/pubmed.xml',
# type=typePubmed,
# user=me
#)
#
#for resource in corpus_pubmed.get_resources():
# print('Resource #%d - %s - %s' % (resource.id, resource.digest, resource.file))
#
## print('Parse corpus #%d...' % (corpus_pubmed.id, ))
# corpus_pubmed.parse_resources(verbose=True)
# print('Extract corpus #%d...' % (corpus_pubmed.id, ))
# corpus_pubmed.children.all().extract_ngrams(['title',])
# print('Parsed corpus #%d.' % (corpus_pubmed.id, ))
# Instantiante table NgramTag:
f = open("part_of_speech_labels.txt", 'r')
for line in f.readlines():
name, description = line.strip().split('\t')
_tag = Tag(name=name, description=description)
session.add(_tag)
session.commit()
f.close()
exit()
CC Coordinating conjunction
CD Cardinal number
DT Determiner
EX Existential there
FW Foreign word
IN Preposition or subordinating conjunction
JJ Adjective
JJR Adjective, comparative
JJS Adjective, superlative
LS List item marker
MD Modal
NN Noun, singular or mass
NNS Noun, plural
NNP Proper noun, singular
NNPS Proper noun, plural
PDT Predeterminer
POS Possessive ending
PRP Personal pronoun
PRP$ Possessive pronoun
RB Adverb
RBR Adverb, comparative
RBS Adverb, superlative
RP Particle
SYM Symbol
TO to
UH Interjection
VB Verb, base form
VBD Verb, past tense
VBG Verb, gerund or present participle
VBN Verb, past participle
VBP Verb, non­3rd person singular present
VBZ Verb, 3rd person singular present
WDT Wh­determiner
WP Wh­pronoun
WP$ Possessive wh­pronoun
WRB Wh­adverb
NGRA Ngram
Cython==0.20.2
Django==1.6.6
Django==1.6.11
Jinja2==2.7.3
MarkupSafe==0.23
Pillow==2.5.3
Pygments==1.6
RandomWords==0.1.12
SQLAlchemy==0.9.8
South==1.0
aldjemy==0.3.10
amqp==1.4.6
anyjson==0.3.3
bibtexparser==0.6.0
billiard==3.3.0.18
celery==3.1.15
certifi==14.05.14
......@@ -23,17 +25,22 @@ django-cte-trees==0.9.2
django-extensions==1.4.0
django-grappelli==2.5.3
django-hstore==1.3.1
django-maintenance==0.1
django-mptt==0.6.1
django-nested-inlines==0.1
django-pgfields==1.4.4
django-pgjson==0.2.2
django-pgjsonb==0.0.10
django-treebeard==2.0
djangorestframework==3.0.0
gensim==0.10.3
graphviz==0.4
ipython==2.2.0
kombu==3.0.23
lxml==3.3.6
matplotlib==1.4.0
lxml==3.4.1
#matplotlib==1.4.0
networkx==1.9
nltk==3.0a4
#nltk==3.0a4
nose==1.3.4
numpy==1.8.2
pandas==0.14.1
......@@ -44,13 +51,16 @@ pycparser==2.10
pydot2==1.0.33
pyparsing==2.0.2
python-dateutil==2.2
python-igraph==0.7
pytz==2014.7
pyzmq==14.3.1
readline==6.2.4.1
redis==2.10.3
scikit-learn==0.15.1
scipy==0.14.0
simplerandom==0.12.1
six==1.7.3
sympy==0.7.5
tornado==4.0.1
uWSGI==2.0.7
ujson==1.33
# Without this, we couldn't use the Django environment
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")
# We're gonna use all the models!
from node.models import User, NodeType, Node
user = User.objects.get(username = 'contro2015.lait')
# Reset: all data
try:
typeDoc = NodeType.objects.get(name='Cooccurrence')
except Exception as error:
print(error)
Node.objects.filter(user=user, type=typeDoc).all().delete()
exit()
# Without this, we couldn't use the Django environment
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")
# We're gonna use all the models!
from node.models import *
# Reset: all data
tables_to_empty = [
Node,
Node_Metadata,
Metadata,
NodeType,
ResourceType,
Resource,
]
for table in tables_to_empty:
print('Empty table "%s"...' % (table._meta.db_table, ))
table.objects.all().delete()
# Integration: metadata types
print('Initialize metadata...')
metadata = {
'publication_date': 'datetime',
'authors': 'string',
'language_fullname': 'string',
'abstract': 'text',
'title': 'string',
'source': 'string',
'volume': 'string',
'text': 'text',
'page': 'string',
'doi': 'string',
'journal': 'string',
}
for name, type in metadata.items():
Metadata(name=name, type=type).save()
# Integration: languages
print('Initialize languages...')
import pycountry
Language.objects.all().delete()
for language in pycountry.languages:
if 'alpha2' in language.__dict__:
Language(
iso2 = language.alpha2,
iso3 = language.bibliographic,
fullname = language.name,
implemented = 1 if language.alpha2 in ['en', 'fr'] else 0,
).save()
english = Language.objects.get(iso2='en')
french = Language.objects.get(iso2='fr')
# Integration: users
print('Initialize users...')
try:
me = User.objects.get(username='alexandre')
except:
me = User(username='alexandre')
me.save()
# Integration: node types
print('Initialize node types...')
try:
typeProject = NodeType.objects.get(name='Root')
except Exception as error:
print(error)
typeProject = NodeType(name='Root')
typeProject.save()
try:
typeProject = NodeType.objects.get(name='Project')
except Exception as error:
print(error)
typeProject = NodeType(name='Project')
typeProject.save()
try:
typeCorpus = NodeType.objects.get(name='Corpus')
except Exception as error:
print(error)
typeCorpus = NodeType(name='Corpus')
typeCorpus.save()
try:
typeDoc = NodeType.objects.get(name='Document')
except Exception as error:
print(error)
typeDoc = NodeType(name='Document')
typeDoc.save()
try:
typeStem = NodeType.objects.get(name='Stem')
except Exception as error:
print(error)
typeStem = NodeType(name='Stem')
typeStem.save()
try:
typeTfidf = NodeType.objects.get(name='Tfidf')
except Exception as error:
print(error)
typeTfidf = NodeType(name='Tfidf')
typeTfidf.save()
try:
typeDoc = NodeType.objects.get(name='WhiteList')
except Exception as error:
print(error)
typeDoc = NodeType(name='WhiteList')
typeDoc.save()
try:
typeDoc = NodeType.objects.get(name='BlackList')
except Exception as error:
print(error)
typeDoc = NodeType(name='BlackList')
typeDoc.save()
try:
typeDoc = NodeType.objects.get(name='Synonyme')
except Exception as error:
print(error)
typeDoc = NodeType(name='Synonyme')
typeDoc.save()
try:
typeDoc = NodeType.objects.get(name='Cooccurrence')
except Exception as error:
print(error)
typeDoc = NodeType(name='Cooccurrence')
typeDoc.save()
# Integration: resource types
print('Initialize resource...')
try:
typePubmed = ResourceType.objects.get(name='pubmed')
typeIsi = ResourceType.objects.get(name='isi')
typeRis = ResourceType.objects.get(name='ris')
typePresseFr = ResourceType.objects.get(name='europress_french')
typePresseEn = ResourceType.objects.get(name='europress_english')
except Exception as error:
print(error)
typePubmed = ResourceType(name='pubmed')
typePubmed.save()
typeIsi = ResourceType(name='isi')
typeIsi.save()
typeRis = ResourceType(name='ris')
typeRis.save()
typePresseFr = ResourceType(name='europress_french')
typePresseFr.save()
typePresseEn = ResourceType(name='europress_english')
typePresseEn.save()
# Integration Node Stem
try:
stem = Node.objects.get(name='Stem')
except:
stem = Node(name='Stem', type=typeStem, user=me)
stem.save()
# Integration: project
print('Initialize project...')
try:
project = Node.objects.get(name='Bees project')
except:
project = Node(name='Bees project', type=typeProject, user=me)
project.save()
# Integration: corpus
print('Initialize corpus...')
try:
corpus_pubmed = Node.objects.get(name='PubMed corpus')
except:
corpus_pubmed = Node(parent=project, name='PubMed corpus', type=typeCorpus, user=me)
corpus_pubmed.save()
print('Initialize resource...')
corpus_pubmed.add_resource(
# file='./data_samples/pubmed.zip',
#file='./data_samples/pubmed_2013-04-01_HoneyBeesBeeBees.xml',
file='/srv/gargantext_lib/data_samples/pubmed.xml',
type=typePubmed,
user=me
)
for resource in corpus_pubmed.get_resources():
print('Resource #%d - %s - %s' % (resource.id, resource.digest, resource.file))
# print('Parse corpus #%d...' % (corpus_pubmed.id, ))
# corpus_pubmed.parse_resources(verbose=True)
# print('Extract corpus #%d...' % (corpus_pubmed.id, ))
# corpus_pubmed.children.all().extract_ngrams(['title',])
# print('Parsed corpus #%d.' % (corpus_pubmed.id, ))
exit()
......@@ -117,23 +117,16 @@ class CustomForm(forms.Form):
"""
def clean_file(self):
file_ = self.cleaned_data.get('file')
from datetime import datetime
file_.name = str(datetime.now().microsecond)
# #Filename length
# if len(file_.name)>30:
# from datetime import datetime
# file_.name = str(datetime.now().microsecond)
# # raise forms.ValidationError(_('Come on dude, name too long. Now is:'+file_.name))
# #File size
# if len(file_)>104857600:
# raise forms.ValidationError(_('File to heavy! (<100MB).'))
## File type:
# if file_.content_type == "application/zip":
# raise forms.ValidationError(_('We need a zip pls.'))
#File size
if len(file_)>1024 ** 3:
raise forms.ValidationError(_('File too heavy! (>1GB).'))
return file_
class CorpusForm(ModelForm):
#parent = ModelChoiceField(EmptyQuerySet)
def __init__(self, *args, **kwargs):
......@@ -155,14 +148,14 @@ class CorpusAdmin(NodeAdmin):
######################################################################
class DocumentForm(ModelForm):
parent = ModelChoiceField(Node.objects.filter(user_id=1, type_id=3))
class DocumentAdmin(NodeAdmin):
_parent_nodetype_name = 'Corpus'
_nodetype_name = 'Document'
form = DocumentForm
#class DocumentForm(ModelForm):
# parent = ModelChoiceField(Node.objects.filter(user_id=1, type_id=3))
#class DocumentAdmin(NodeAdmin):
# _parent_nodetype_name = 'Corpus'
# _nodetype_name = 'Document'
# form = DocumentForm
#
class LanguageAdmin(admin.ModelAdmin):
def get_queryset(self, request):
......@@ -178,7 +171,7 @@ admin.site.register(Language, LanguageAdmin)
admin.site.register(NodeType)
admin.site.register(Project, ProjectAdmin)
admin.site.register(Corpus, CorpusAdmin)
admin.site.register(Document, DocumentAdmin)
admin.site.register(Document)#, DocumentAdmin)
admin.site.register(Node_Resource)
......
This diff is collapsed.
......@@ -41,18 +41,47 @@ class EuropressFileParser(FileParser):
html = etree.fromstring(contents, html_parser)
try:
format_europresse = 50
html_articles = html.xpath('/html/body/table/tbody')
if len(html_articles) < 1:
html_articles = html.xpath('/html/body/table')
if len(html_articles) < 1:
format_europresse = 1
html_articles = html.xpath('//div[@id="docContain"]')
except Exception as error:
print(error)
if format_europresse == 50:
name_xpath = "./tr/td/span[@class = 'DocPublicationName']"
header_xpath = "//span[@class = 'DocHeader']"
title_xpath = "string(./tr/td/span[@class = 'TitreArticleVisu'])"
text_xpath = "./tr/td/descendant-or-self::*[not(self::span[@class='DocHeader'])]/text()"
elif format_europresse == 1:
name_xpath = "//span[@class = 'DocPublicationName']"
header_xpath = "//span[@class = 'DocHeader']"
title_xpath = "string(//div[@class = 'titreArticleVisu'])"
text_xpath = "./descendant::*[\
not(\
self::div[@class='Doc-SourceText'] \
or self::span[@class='DocHeader'] \
or self::span[@class='DocPublicationName'] \
or self::span[@id='docNameVisu'] \
or self::span[@class='DocHeader'] \
or self::div[@class='titreArticleVisu'] \
or self::span[@id='docNameContType'] \
or descendant-or-self::span[@id='ucPubliC_lblCertificatIssuedTo'] \
or descendant-or-self::span[@id='ucPubliC_lblEndDate'] \
or self::td[@class='txtCertificat'] \
)]/text()"
doi_xpath = "//span[@id='ucPubliC_lblNodoc']/text()"
except:
return []
except Exception as error:
print(error)
# initialize the list of metadata
metadata_list = []
# parse all the articles, one by one
try:
for html_article in html_articles:
......@@ -60,19 +89,20 @@ class EuropressFileParser(FileParser):
metadata = {}
if len(html_article):
for name in html_article.xpath("./tr/td/span[@class = 'DocPublicationName']"):
for name in html_article.xpath(name_xpath):
if name.text is not None:
format_journal = re.compile('(.*), (.*)', re.UNICODE)
test_journal = format_journal.match(name.text)
if test_journal is not None:
metadata['source'] = test_journal.group(1)
metadata['journal'] = test_journal.group(1)
metadata['volume'] = test_journal.group(2)
else:
metadata['source'] = name.text.encode(codif)
metadata['journal'] = name.text.encode(codif)
for header in html_article.xpath("./tr/td/span[@class = 'DocHeader']"):
for header in html_article.xpath(header_xpath):
try:
text = header.text
#print("header", text)
except Exception as error:
print(error)
......@@ -138,8 +168,8 @@ class EuropressFileParser(FileParser):
if test_page is not None:
metadata['page'] = test_page.group(1).encode(codif)
metadata['title'] = html_article.xpath("string(./tr/td/span[@class = 'TitreArticleVisu'])").encode(codif)
metadata['text'] = html_article.xpath("./tr/td/descendant-or-self::*[not(self::span[@class='DocHeader'])]/text()")
metadata['title'] = html_article.xpath(title_xpath).encode(codif)
metadata['abstract'] = html_article.xpath(text_xpath)
line = 0
br_tag = 10
......@@ -185,32 +215,36 @@ class EuropressFileParser(FileParser):
metadata['publication_year'] = metadata['publication_date'].strftime('%Y')
metadata['publication_month'] = metadata['publication_date'].strftime('%m')
metadata['publication_day'] = metadata['publication_date'].strftime('%d')
metadata['publication_date'] = ""
metadata.pop('publication_date')
if len(metadata['abstract'])>0 and format_europresse == 50:
metadata['doi'] = str(metadata['abstract'][-9])
metadata['abstract'].pop()
# Here add separator for paragraphs
metadata['abstract'] = str(' '.join(metadata['abstract']))
metadata['abstract'] = str(re.sub('Tous droits réservés.*$', '', metadata['abstract']))
elif format_europresse == 1:
metadata['doi'] = ' '.join(html_article.xpath(doi_xpath))
metadata['abstract'] = metadata['abstract'][:-9]
# Here add separator for paragraphs
metadata['abstract'] = str(' '.join(metadata['abstract']))
else:
metadata['doi'] = "not found"
metadata['length_words'] = len(metadata['abstract'].split(' '))
metadata['length_letters'] = len(metadata['abstract'])
if len(metadata['text'])>0:
metadata['doi'] = str(metadata['text'][-9])
metadata['text'].pop()
metadata['text'] = str(' '.join(metadata['text']))
metadata['text'] = str(re.sub('Tous droits réservés.*$', '', metadata['text']))
else: metadata['doi'] = "not found"
metadata['bdd'] = u'europresse'
metadata['url'] = u''
#metadata_str = {}
for key, value in metadata.items():
metadata[key] = value.decode() if isinstance(value, bytes) else value
metadata_list.append(metadata)
yield metadata
count += 1
file.close()
except Exception as error:
print(error)
pass
# from pprint import pprint
# pprint(metadata_list)
# return []
return metadata_list
......@@ -103,15 +103,21 @@ class FileParser:
zipArchive = zipfile.ZipFile(file)
for filename in zipArchive.namelist():
try:
metadata_list += self.parse(zipArchive.open(filename, "r"))
f = zipArchive.open(filename, 'r')
metadata_list += self.parse(f)
f.close()
except Exception as error:
print(error)
# ...otherwise, let's parse it directly!
else:
try:
metadata_list += self._parse(file)
for metadata in self._parse(file):
metadata_list.append(self.format_metadata(metadata))
if hasattr(file, 'close'):
file.close()
except Exception as error:
print(error)
# return the list of formatted metadata
return map(self.format_metadata, metadata_list)
return metadata_list
......@@ -25,6 +25,7 @@ class PubmedFileParser(FileParser):
metadata_path = {
"journal" : 'MedlineCitation/Article/Journal/Title',
"title" : 'MedlineCitation/Article/ArticleTitle',
"abstract" : 'MedlineCitation/Article/Abstract/AbstractText',
"title2" : 'MedlineCitation/Article/VernacularTitle',
"language_iso3" : 'MedlineCitation/Article/Language',
"doi" : 'PubmedData/ArticleIdList/ArticleId[@type=doi]',
......@@ -101,7 +102,6 @@ class PubmedFileParser(FileParser):
if "realdate_day_" in metadata: metadata.pop("realdate_day_")
if "title2" in metadata: metadata.pop("title2")
# print(metadata)
metadata_list.append(metadata)
# return the list of metadata
return metadata_list
......@@ -17,42 +17,34 @@ class RisFileParser(FileParser):
}
def _parse(self, file):
metadata_list = []
metadata = {}
last_key = None
last_values = []
# browse every line of the file
for line in file:
if len(line) > 2:
# extract the parameter key
parameter_key = line[:2]
# print(parameter_key)
if parameter_key != b' ' and parameter_key != last_key:
if last_key in self._parameters:
# translate the parameter key
parameter = self._parameters[last_key]
if parameter["type"] == "metadata":
separator = parameter["separator"] if "separator" in parameter else ""
metadata[parameter["key"]] = separator.join(last_values)
elif parameter["type"] == "delimiter":
#language = self._languages_fullname[metadata["language"].lower()]
#print(metadata)
try:
#print("append")
if 'language_fullname' not in metadata.keys():
if 'language_iso3' not in metadata.keys():
if 'language_iso2' not in metadata.keys():
metadata['language_iso2'] = 'en'
metadata_list.append(metadata)
metadata = {}
#print("append succeeded")
except:
pass
if 'language_fullname' not in metadata.keys():
if 'language_iso3' not in metadata.keys():
if 'language_iso2' not in metadata.keys():
metadata['language_iso2'] = 'en'
yield metadata
metadata = {}
last_key = parameter_key
last_values = []
try:
last_values.append(line[3:-1].decode())
except Exception as error:
print(error)
pass
#print(len(metadata_list))
#print(metadata_list)
return metadata_list
# if a metadata object is left in memory, yield it as well
if metadata:
yield metadata
from ..Taggers import Tagger
from ..Taggers import TurboTagger
import nltk
......@@ -13,12 +13,13 @@ class NgramsExtractor:
self.start()
self._label = "NP"
self._rule = self._label + ": " + rule
self._grammar = nltk.RegexpParser(self._rule)
def __del__(self):
self.stop()
def start(self):
self.tagger = Tagger()
self.tagger = TurboTagger()
def stop(self):
pass
......@@ -29,19 +30,8 @@ class NgramsExtractor:
"""
def extract_ngrams(self, contents):
tagged_ngrams = self.tagger.tag_text(contents)
if len(tagged_ngrams)==0: return []
grammar = nltk.RegexpParser(self._rule)
result = []
# try:
grammar_parsed = grammar.parse(tagged_ngrams)
for subtree in grammar_parsed.subtrees():
if subtree.label() == self._label:
result.append(subtree.leaves())
# except Exception as e:
# print("Problem while parsing rule '%s'" % (self._rule, ))
# print(e)
return result
if len(tagged_ngrams):
grammar_parsed = self._grammar.parse(tagged_ngrams)
for subtree in grammar_parsed.subtrees():
if subtree.label() == self._label:
yield subtree.leaves()
from .FrenchNgramsExtractor import FrenchNgramsExtractor
from .TurboNgramsExtractor import TurboNgramsExtractor as EnglishNgramsExtractor
# from parsing.NgramsExtractors.EnglishNgramsExtractor import EnglishNgramsExtractor
# from .EnglishNgramsExtractor import EnglishNgramsExtractor
from .NgramsExtractor import NgramsExtractor
......@@ -58,9 +58,11 @@ class Tagger:
if single:
self.tagging_end()
return []
"""Send a text to be tagged.
"""
# Not used right now
def tag_text(self, text):
tokens_tags = []
self.tagging_start()
......@@ -69,4 +71,3 @@ class Tagger:
tokens_tags += self.tag_tokens(tokens, False)
self.tagging_end()
return tokens_tags
......@@ -9,15 +9,24 @@ from .settings import implemented_methods
class NLPClient:
def __init__(self):
self._socket = socket.socket(*server_type_client)
self._socket.connect((server_host, server_port))
self._socket = None
for method_name in dir(self):
if method_name[0] != '_':
if method_name.upper() not in implemented_methods:
setattr(self, method_name, self._notimplemented)
def __del__(self):
self._socket.close()
self._disconnect()
def _connect(self):
self._disconnect()
self._socket = socket.socket(*server_type_client)
self._socket.connect((server_host, server_port))
def _disconnect(self):
if self._socket is not None:
self._socket.close()
self._socket = None
def _notimplemented(self, *args, **kwargs):
raise NotImplementedError(
......@@ -51,7 +60,7 @@ class NLPClient:
data += language + '\n'
data += re.sub(r'\n+', '\n', text)
data += '\n\n'
self.__init__()
self._connect()
self._socket.sendall(data.encode())
sentence = []
if keys is None:
......@@ -73,7 +82,6 @@ class NLPClient:
continue
values = line.split('\t')
sentence.append(dict(zip(keys, line.split('\t'))))
self.__del__()
def tokenize(self, text, language='english', asdict=False):
keys = ('token', ) if asdict else None
......
......@@ -4,7 +4,7 @@ import socketserver
# Server parameters
server_host = 'localhost'
server_port = 1234
server_port = 7777
server_type_server = socketserver.TCPServer
server_type_client = socket.AF_INET, socket.SOCK_STREAM
server_timeout = 2.0
......
This diff is collapsed.
......@@ -40,7 +40,7 @@ class MedlineFetcher:
"Get number of results for query 'query' in variable 'count'"
"Get also 'queryKey' and 'webEnv', which are used by function 'medlineEfetch'"
print(query)
# print(query)
origQuery = query
query = query.replace(' ', '%20')
......@@ -79,7 +79,7 @@ class MedlineFetcher:
queryNoSpace = query.replace(' ', '') # No space in directory and file names, avoids stupid errors
print ("LOG::TIME: ",'medlineEfetchRAW :Query "' , query , '"\t:\t' , count , ' results')
# print ("LOG::TIME: ",'medlineEfetchRAW :Query "' , query , '"\t:\t' , count , ' results')
retstart = 0
eFetch = '%s/efetch.fcgi?email=youremail@example.org&rettype=%s&retmode=xml&retstart=%s&retmax=%s&db=%s&query_key=%s&WebEnv=%s' %(self.pubMedEutilsURL, self.reportType, retstart, retmax, self.pubMedDB, queryKey, webEnv)
......@@ -94,7 +94,7 @@ class MedlineFetcher:
def downloadFile(self, item):
url = item[0]
filename = item[1]
print("\tin test_downloadFile:")
# print("\tin test_downloadFile:")
# print(url,filename)
data = urlopen(url)
f = codecs.open(filename, "w" ,encoding='utf-8')
......@@ -110,7 +110,7 @@ class MedlineFetcher:
def test_downloadFile(self, item):
url = item[0]
filename = item[1]
print("\tin downloadFile:")
# print("\tin downloadFile:")
data = urlopen(url)
return data
......@@ -119,7 +119,7 @@ class MedlineFetcher:
# time.sleep(1) # pretend to do some lengthy work.
returnvalue = self.medlineEsearch(item)
with self.lock:
print(threading.current_thread().name, item)
# print(threading.current_thread().name, item)
return returnvalue
# The worker thread pulls an item from the queue and processes it
......@@ -160,13 +160,13 @@ class MedlineFetcher:
N = 0
print ("MedlineFetcher::serialFetcher :")
# print ("MedlineFetcher::serialFetcher :")
thequeries = []
globalresults = []
for i in range(yearsNumber):
year = str(2015 - i)
print ('YEAR ' + year)
print ('---------\n')
# print ('YEAR ' + year)
# print ('---------\n')
pubmedquery = str(year) + '[dp] '+query
self.q.put( pubmedquery ) #put task in the queue
......@@ -196,5 +196,6 @@ class MedlineFetcher:
retmax_forthisyear = int(round(globalLimit*proportion))
query["retmax"] = retmax_forthisyear
if query["retmax"]==0: query["retmax"]+=1
print(query["string"],"\t[",k,">",query["retmax"],"]")
return thequeries
from django.shortcuts import redirect
from django.shortcuts import render
from django.http import Http404, HttpResponse, HttpResponseRedirect
from django.template.loader import get_template
from django.template import Context
from django.contrib.auth.models import User, Group
from scrap_pubmed.MedlineFetcherDavid2015 import MedlineFetcher
from gargantext_web.api import JsonHttpResponse
from urllib.request import urlopen, urlretrieve
import json
from gargantext_web.settings import MEDIA_ROOT
# from datetime import datetime
import time
import datetime
......@@ -21,9 +16,23 @@ import threading
from django.core.files import File
from gargantext_web.settings import DEBUG
from node.models import Language, ResourceType, Resource, \
Node, NodeType, Node_Resource, Project, Corpus, \
Ngram, Node_Ngram, NodeNgramNgram, NodeNodeNgram
from django.shortcuts import redirect
from django.shortcuts import render
from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseForbidden
from sqlalchemy import func
from sqlalchemy.orm import aliased
from collections import defaultdict
import threading
from node.admin import CustomForm
from gargantext_web.db import *
from gargantext_web.settings import DEBUG, MEDIA_ROOT
from gargantext_web.api import JsonHttpResponse
from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf
def getGlobalStats(request ):
......@@ -31,7 +40,7 @@ def getGlobalStats(request ):
alist = ["bar","foo"]
if request.method == "POST":
N = 100
N = 1000
query = request.POST["query"]
print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" query =", query )
print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" N =", N )
......@@ -72,9 +81,34 @@ def getGlobalStatsISTEXT(request ):
def doTheQuery(request , project_id):
alist = ["hola","mundo"]
if request.method == "POST":
# SQLAlchemy session
session = Session()
# do we have a valid project id?
try:
project_id = int(project_id)
except ValueError:
raise Http404()
# do we have a valid project?
project = (session
.query(Node)
.filter(Node.id == project_id)
.filter(Node.type_id == cache.NodeType['Project'].id)
).first()
if project is None:
raise Http404()
# do we have a valid user?
user = request.user
if not user.is_authenticated():
return redirect('/login/?next=%s' % request.path)
if project.user_id != user.id:
return HttpResponseForbidden()
if request.method == "POST":
query = request.POST["query"]
name = request.POST["string"]
......@@ -86,30 +120,26 @@ def doTheQuery(request , project_id):
urlreqs.append( instancia.medlineEfetchRAW( yearquery ) )
alist = ["tudo fixe" , "tudo bem"]
"""
urlreqs: List of urls to query.
- Then, to each url in urlreqs you do:
eFetchResult = urlopen(url)
eFetchResult.read() # this will output the XML... normally you write this to a XML-file.
"""
thefile = "how we do this here?"
resource_type = ResourceType.objects.get(name="pubmed" )
parent = Node.objects.get(id=project_id)
node_type = NodeType.objects.get(name='Corpus')
type_id = NodeType.objects.get(name='Document').id
user_id = User.objects.get( username=request.user ).id
resourcetype = cache.ResourceType["pubmed"]
# corpus node instanciation as a Django model
corpus = Node(
user=request.user,
parent=parent,
type=node_type,
name=name,
name = name,
user_id = request.user.id,
parent_id = project_id,
type_id = cache.NodeType['Corpus'].id,
language_id = None,
)
session.add(corpus)
session.commit()
# """
# urlreqs: List of urls to query.
# - Then, to each url in urlreqs you do:
# eFetchResult = urlopen(url)
# eFetchResult.read() # this will output the XML... normally you write this to a XML-file.
# """
corpus.save()
tasks = MedlineFetcher()
for i in range(8):
......@@ -124,24 +154,30 @@ def doTheQuery(request , project_id):
dwnldsOK = 0
for filename in tasks.firstResults:
if filename!=False:
corpus.add_resource( user=request.user, type=resource_type, file=filename )
# add the uploaded resource to the corpus
add_resource(corpus,
user_id = request.user.id,
type_id = resourcetype.id,
file = filename,
)
dwnldsOK+=1
if dwnldsOK == 0: return JsonHttpResponse(["fail"])
# do the WorkFlow
try:
if DEBUG is True:
corpus.workflow()
# corpus.workflow__MOV()
def apply_workflow(corpus):
parse_resources(corpus)
extract_ngrams(corpus, ['title'])
compute_tfidf(corpus)
if DEBUG:
apply_workflow(corpus)
else:
corpus.workflow.apply_async((), countdown=3)
return JsonHttpResponse(["workflow","finished"])
thread = threading.Thread(target=apply_workflow, args=(corpus, ), daemon=True)
thread.start()
except Exception as error:
print('WORKFLOW ERROR')
print(error)
return JsonHttpResponse(["workflow","finished","outside the try-except"])
return HttpResponseRedirect('/project/' + str(project_id))
data = alist
return JsonHttpResponse(data)
......@@ -164,59 +200,59 @@ def testISTEX(request , project_id):
print(query_string , query , N)
urlreqs = []
pagesize = 50
tasks = MedlineFetcher()
chunks = list(tasks.chunks(range(N), pagesize))
for k in chunks:
if (k[0]+pagesize)>N: pagesize = N-k[0]
urlreqs.append("http://api.istex.fr/document/?q="+query_string+"&output=*&"+"from="+str(k[0])+"&size="+str(pagesize))
print(urlreqs)
urlreqs = ["http://localhost/374255" , "http://localhost/374278" ]
print(urlreqs)
resource_type = ResourceType.objects.get(name="istext" )
parent = Node.objects.get(id=project_id)
node_type = NodeType.objects.get(name='Corpus')
type_id = NodeType.objects.get(name='Document').id
user_id = User.objects.get( username=request.user ).id
corpus = Node(
user=request.user,
parent=parent,
type=node_type,
name=query,
)
corpus.save()
# configuring your queue with the event
for i in range(8):
t = threading.Thread(target=tasks.worker2) #thing to do
t.daemon = True # thread dies when main thread (only non-daemon thread) exits.
t.start()
for url in urlreqs:
filename = MEDIA_ROOT + '/corpora/%s/%s' % (request.user, str(datetime.now().microsecond))
tasks.q.put( [url , filename]) #put a task in th queue
tasks.q.join() # wait until everything is finished
for filename in tasks.firstResults:
corpus.add_resource( user=request.user, type=resource_type, file=filename )
corpus.save()
print("DEBUG:",DEBUG)
# do the WorkFlow
try:
if DEBUG is True:
corpus.workflow()
else:
corpus.workflow.apply_async((), countdown=3)
return JsonHttpResponse(["workflow","finished"])
except Exception as error:
print(error)
# urlreqs = []
# pagesize = 50
# tasks = MedlineFetcher()
# chunks = list(tasks.chunks(range(N), pagesize))
# for k in chunks:
# if (k[0]+pagesize)>N: pagesize = N-k[0]
# urlreqs.append("http://api.istex.fr/document/?q="+query_string+"&output=*&"+"from="+str(k[0])+"&size="+str(pagesize))
# print(urlreqs)
# urlreqs = ["http://localhost/374255" , "http://localhost/374278" ]
# print(urlreqs)
# resource_type = ResourceType.objects.get(name="istext" )
# parent = Node.objects.get(id=project_id)
# node_type = NodeType.objects.get(name='Corpus')
# type_id = NodeType.objects.get(name='Document').id
# user_id = User.objects.get( username=request.user ).id
# corpus = Node(
# user=request.user,
# parent=parent,
# type=node_type,
# name=query,
# )
# corpus.save()
# # configuring your queue with the event
# for i in range(8):
# t = threading.Thread(target=tasks.worker2) #thing to do
# t.daemon = True # thread dies when main thread (only non-daemon thread) exits.
# t.start()
# for url in urlreqs:
# filename = MEDIA_ROOT + '/corpora/%s/%s' % (request.user, str(datetime.now().microsecond))
# tasks.q.put( [url , filename]) #put a task in th queue
# tasks.q.join() # wait until everything is finished
# for filename in tasks.firstResults:
# corpus.add_resource( user=request.user, type=resource_type, file=filename )
# corpus.save()
# print("DEBUG:",DEBUG)
# # do the WorkFlow
# try:
# if DEBUG is True:
# corpus.workflow()
# else:
# corpus.workflow.apply_async((), countdown=3)
# return JsonHttpResponse(["workflow","finished"])
# except Exception as error:
# print(error)
data = [query_string,query,N]
return JsonHttpResponse(data)
......
Project Gutenberg's Gargantua and Pantagruel, Complete., by Francois Rabelais
This eBook is for the use of anyone anywhere at no cost and with
almost no restrictions whatsoever. You may copy it, give it away or
re-use it under the terms of the Project Gutenberg License included
with this eBook or online at www.gutenberg.net
Title: Gargantua and Pantagruel, Complete.
Five Books Of The Lives, Heroic Deeds And Sayings Of Gargantua And
His Son Pantagruel
Author: Francois Rabelais
Release Date: August 8, 2004 [EBook #1200]
Language: English
*** START OF THIS PROJECT GUTENBERG EBOOK GARGANTUA AND PANTAGRUEL, ***
Produced by Sue Asscher and David Widger
MASTER FRANCIS RABELAIS
FIVE BOOKS OF THE LIVES, HEROIC DEEDS AND SAYINGS OF
GARGANTUA AND HIS SON PANTAGRUEL
Translated into English by
Sir Thomas Urquhart of Cromarty
and
Peter Antony Motteux
The text of the first Two Books of Rabelais has been reprinted from the
first edition (1653) of Urquhart's translation. Footnotes initialled 'M.'
are drawn from the Maitland Club edition (1838); other footnotes are by the
translator. Urquhart's translation of Book III. appeared posthumously in
1693, with a new edition of Books I. and II., under Motteux's editorship.
Motteux's rendering of Books IV. and V. followed in 1708. Occasionally (as
the footnotes indicate) passages omitted by Motteux have been restored from
the 1738 copy edited by Ozell.
Chapter 1.I. Of the Genealogy and Antiquity of Gargantua.
Chapter 1.II. The Antidoted Fanfreluches: or, a Galimatia of extravagant Conceits found in an ancient Monument.
Chapter 1.III. How Gargantua was carried eleven months in his mother's belly.
Chapter 1.IV. How Gargamelle, being great with Gargantua, did eat a huge deal of tripes.
Chapter 1.IX. The colours and liveries of Gargantua.
Chapter 1.L. Gargantua's speech to the vanquished.
Chapter 1.LI. How the victorious Gargantuists were recompensed after the battle.
Chapter 1.LII. How Gargantua caused to be built for the Monk the Abbey of Theleme.
Chapter 1.LIII. How the abbey of the Thelemites was built and endowed.
Chapter 1.LIV. The inscription set upon the great gate of Theleme.
Chapter 1.LV. What manner of dwelling the Thelemites had.
Chapter 1.LVI. How the men and women of the religious order of Theleme were apparelled.
Chapter 1.LVII. How the Thelemites were governed, and of their manner of living.
Chapter 1.LVIII. A prophetical Riddle.
Chapter 1.V. The Discourse of the Drinkers.
Chapter 1.VI. How Gargantua was born in a strange manner.
Chapter 1.VII. After what manner Gargantua had his name given him, and how he tippled, bibbed, and curried the can.
Chapter 1.VIII. How they apparelled Gargantua.
Chapter 1.X. Of that which is signified by the colours white and blue.
Chapter 1.XI. Of the youthful age of Gargantua.
Chapter 1.XII. Of Gargantua's wooden horses.
Chapter 1.XIII. How Gargantua's wonderful understanding became known to his father Grangousier, by the invention of a torchecul or wipebreech.
Chapter 1.XIV. How Gargantua was taught Latin by a Sophister.
Chapter 1.XIX. The oration of Master Janotus de Bragmardo for recovery of the bells.
Chapter 1.XL. Why monks are the outcasts of the world; and wherefore some have bigger noses than others.
Chapter 1.XLI. How the Monk made Gargantua sleep, and of his hours and breviaries.
Chapter 1.XLII. How the Monk encouraged his fellow-champions, and how he hanged upon a tree.
Chapter 1.XLIII. How the scouts and fore-party of Picrochole were met with by Gargantua, and how the Monk slew Captain Drawforth, and then was taken prisoner by his enemies.
Chapter 1.XLIV. How the Monk rid himself of his keepers, and how Picrochole's forlorn hope was defeated.
Chapter 1.XLIX. How Picrochole in his flight fell into great misfortunes, and what Gargantua did after the battle.
Chapter 1.XLV. How the Monk carried along with him the Pilgrims, and of the good words that Grangousier gave them.
Chapter 1.XLVI. How Grangousier did very kindly entertain Touchfaucet his prisoner.
Chapter 1.XLVII. How Grangousier sent for his legions, and how Touchfaucet slew Rashcalf, and was afterwards executed by the command of Picrochole.
Chapter 1.XLVIII. How Gargantua set upon Picrochole within the rock Clermond, and utterly defeated the army of the said Picrochole.
Chapter 1.XV. How Gargantua was put under other schoolmasters.
Chapter 1.XVI. How Gargantua was sent to Paris, and of the huge great mare that he rode on; how she destroyed the oxflies of the Beauce.
Chapter 1.XVII. How Gargantua paid his welcome to the Parisians, and how he took away the great bells of Our Lady's Church.
Chapter 1.XVIII. How Janotus de Bragmardo was sent to Gargantua to recover the great bells.
Chapter 1.XX. How the Sophister carried away his cloth, and how he had a suit in law against the other masters.
Chapter 1.XXI. The study of Gargantua, according to the discipline of his schoolmasters the Sophisters.
Chapter 1.XXII. The games of Gargantua.
Chapter 1.XXIII. How Gargantua was instructed by Ponocrates, and in such sort disciplinated, that he lost not one hour of the day.
Chapter 1.XXIV. How Gargantua spent his time in rainy weather.
Chapter 1.XXIX. The tenour of the letter which Grangousier wrote to his son Gargantua.
Chapter 1.XXV. How there was great strife and debate raised betwixt the cake-bakers of Lerne, and those of Gargantua's country, whereupon were waged great wars.
Chapter 1.XXVI. How the inhabitants of Lerne, by the commandment of Picrochole their king, assaulted the shepherds of Gargantua unexpectedly and on a sudden.
Chapter 1.XXVII. How a monk of Seville saved the close of the abbey from being ransacked by the enemy.
Chapter 1.XXVIII. How Picrochole stormed and took by assault the rock Clermond, and of Grangousier's unwillingness and aversion from the undertaking of war.
Chapter 1.XXX. How Ulric Gallet was sent unto Picrochole.
Chapter 1.XXXI. The speech made by Gallet to Picrochole.
Chapter 1.XXXII. How Grangousier, to buy peace, caused the cakes to be restored.
Chapter 1.XXXIII. How some statesmen of Picrochole, by hairbrained counsel, put him in extreme danger.
Chapter 1.XXXIV. How Gargantua left the city of Paris to succour his country, and how Gymnast encountered with the enemy.
Chapter 1.XXXIX. How the Monk was feasted by Gargantua, and of the jovial discourse they had at supper.
Chapter 1.XXXV. How Gymnast very souply and cunningly killed Captain Tripet and others of Picrochole's men.
Chapter 1.XXXVI. How Gargantua demolished the castle at the ford of Vede, and how they passed the ford.
Chapter 1.XXXVII. How Gargantua, in combing his head, made the great cannon-balls fall out of his hair.
Chapter 1.XXXVIII. How Gargantua did eat up six pilgrims in a salad.
Chapter 2.I. Of the original and antiquity of the great Pantagruel.
Chapter 2.II. Of the nativity of the most dread and redoubted Pantagruel.
Chapter 2.III. Of the grief wherewith Gargantua was moved at the decease of his wife Badebec.
Chapter 2.IV. Of the infancy of Pantagruel.
Chapter 2.IX. How Pantagruel found Panurge, whom he loved all his lifetime.
Chapter 2.V. Of the acts of the noble Pantagruel in his youthful age.
Chapter 2.VI. How Pantagruel met with a Limousin, who too affectedly did counterfeit the French language.
Chapter 2.VII. How Pantagruel came to Paris, and of the choice books of the Library of St. Victor.
Chapter 2.VIII. How Pantagruel, being at Paris, received letters from his father Gargantua, and the copy of them.
Chapter 2.X. How Pantagruel judged so equitably of a controversy, which was wonderfully obscure and difficult, that, by reason of his just decree therein, he was reputed to have a most admirable judgment.
Chapter 2.XI. How the Lords of Kissbreech and Suckfist did plead before Pantagruel without an attorney.
Chapter 2.XII. How the Lord of Suckfist pleaded before Pantagruel.
Chapter 2.XIII. How Pantagruel gave judgment upon the difference of the two lords.
Chapter 2.XIV. How Panurge related the manner how he escaped out of the hands of the Turks.
Chapter 2.XIX. How Panurge put to a nonplus the Englishman that argued by signs.
Chapter 2.XV. How Panurge showed a very new way to build the walls of Paris.
Chapter 2.XVI. Of the qualities and conditions of Panurge.
Chapter 2.XVII. How Panurge gained the pardons, and married the old women, and of the suit in law which he had at Paris.
Chapter 2.XVIII. How a great scholar of England would have argued against Pantagruel, and was overcome by Panurge.
Chapter 2.XX. How Thaumast relateth the virtues and knowledge of Panurge.
Chapter 2.XXI. How Panurge was in love with a lady of Paris.
Chapter 2.XXII. How Panurge served a Parisian lady a trick that pleased her not very well.
Chapter 2.XXIII. How Pantagruel departed from Paris, hearing news that the Dipsodes had invaded the land of the Amaurots; and the cause wherefore the leagues are so short in France.
Chapter 2.XXIV. A letter which a messenger brought to Pantagruel from a lady of Paris, together with the exposition of a posy written in a gold ring.
Chapter 2.XXIX. How Pantagruel discomfited the three hundred giants armed.
Chapter 2.XXV. How Panurge, Carpalin, Eusthenes, and Epistemon, the gentlemen attendants of Pantagruel, vanquished and discomfited six hundred and threescore horsemen very cunningly.
Chapter 2.XXVI. How Pantagruel and his company were weary in eating still salt meats; and how Carpalin went a-hunting to have some venison.
Chapter 2.XXVII. How Pantagruel set up one trophy in memorial of their valour, and Panurge another in remembrance of the hares. How Pantagruel likewise with his farts begat little men, and with his fisgs little women; and how Panurge broke a great staff over two glasses.
Chapter 2.XXVIII. How Pantagruel got the victory very strangely over the Dipsodes and the Giants.
Chapter 2.XXX. How Epistemon, who had his head cut off, was finely healed by Panurge, and of the news which he brought from the devils, and of the damned people in hell.
Chapter 2.XXXI. How Pantagruel entered into the city of the Amaurots, and how Panurge married King Anarchus to an old lantern-carrying hag, and made him a crier of green sauce.
Chapter 2.XXXII. How Pantagruel with his tongue covered a whole army, and what the author saw in his mouth.
Chapter 2.XXXIII. How Pantagruel became sick, and the manner how he was recovered.
Chapter 2.XXXIV. The conclusion of this present book, and the excuse of the author.
Chapter 3.I. How Pantagruel transported a colony of Utopians into Dipsody.
Chapter 3.II. How Panurge was made Laird of Salmigondin in Dipsody, and did waste his revenue before it came in.
Chapter 3.III. How Panurge praiseth the debtors and borrowers.
Chapter 3.IV. Panurge continueth his discourse in the praise of borrowers and lenders.
Chapter 3.IX. How Panurge asketh counsel of Pantagruel whether he should marry, yea, or no.
Chapter 3.L. How the famous Pantagruelion ought to be prepared and wrought.
Chapter 3.LI. Why it is called Pantagruelion, and of the admirable virtues.
Chapter 3.LII. How a certain kind of Pantagruelion is of that nature that the fire is not able to consume it.
Chapter 3.V. How Pantagruel altogether abhorreth the debtors and borrowers.
Chapter 3.VI. Why new married men were privileged from going to the wars.
Chapter 3.VII. How Panurge had a flea in his ear, and forbore to wear any longer his magnificent codpiece.
Chapter 3.VIII. Why the codpiece is held to be the chief piece of armour amongst warriors.
Chapter 3.X. How Pantagruel representeth unto Panurge the difficulty of giving advice in the matter of marriage; and to that purpose mentioneth somewhat of the Homeric and Virgilian lotteries.
Chapter 3.XI. How Pantagruel showeth the trial of one's fortune by the throwing of dice to be unlawful.
Chapter 3.XII. How Pantagruel doth explore by the Virgilian lottery what fortune Panurge shall have in his marriage.
Chapter 3.XIII. How Pantagruel adviseth Panurge to try the future good or bad luck of his marriage by dreams.
Chapter 3.XIV. Panurge's dream, with the interpretation thereof.
Chapter 3.XIX. How Pantagruel praiseth the counsel of dumb men.
Chapter 3.XL. How Bridlegoose giveth reasons why he looked upon those law- actions which he decided by the chance of the dice.
Chapter 3.XLI. How Bridlegoose relateth the history of the reconcilers of parties at variance in matters of law.
Chapter 3.XLII. How suits at law are bred at first, and how they come afterwards to their perfect growth.
Chapter 3.XLIII. How Pantagruel excuseth Bridlegoose in the matter of sentencing actions at law by the chance of the dice.
Chapter 3.XLIV. How Pantagruel relateth a strange history of the perplexity of human judgment.
Chapter 3.XLIX. How Pantagruel did put himself in a readiness to go to sea; and of the herb named Pantagruelion.
Chapter 3.XLV. How Panurge taketh advice of Triboulet.
Chapter 3.XLVI. How Pantagruel and Panurge diversely interpret the words of Triboulet.
Chapter 3.XLVII. How Pantagruel and Panurge resolved to make a visit to the Oracle of the Holy Bottle.
Chapter 3.XLVIII. How Gargantua showeth that the children ought not to marry without the special knowledge and advice of their fathers and mothers.
Chapter 3.XV. Panurge's excuse and exposition of the monastic mystery concerning powdered beef.
Chapter 3.XVI. How Pantagruel adviseth Panurge to consult with the Sibyl of Panzoust.
Chapter 3.XVII. How Panurge spoke to the Sibyl of Panzoust.
Chapter 3.XVIII. How Pantagruel and Panurge did diversely expound the verses of the Sibyl of Panzoust.
Chapter 3.XX. How Goatsnose by signs maketh answer to Panurge.
Chapter 3.XXI. How Panurge consulteth with an old French poet, named Raminagrobis.
Chapter 3.XXII. How Panurge patrocinates and defendeth the Order of the Begging Friars.
Chapter 3.XXIII. How Panurge maketh the motion of a return to Raminagrobis.
Chapter 3.XXIV. How Panurge consulteth with Epistemon.
Chapter 3.XXIX. How Pantagruel convocated together a theologian, physician, lawyer, and philosopher, for extricating Panurge out of the perplexity wherein he was.
Chapter 3.XXV. How Panurge consulteth with Herr Trippa.
Chapter 3.XXVI. How Panurge consulteth with Friar John of the Funnels.
Chapter 3.XXVII. How Friar John merrily and sportingly counselleth Panurge.
Chapter 3.XXVIII. How Friar John comforteth Panurge in the doubtful matter of cuckoldry.
Chapter 3.XXX. How the theologue, Hippothadee, giveth counsel to Panurge in the matter and business of his nuptial enterprise.
Chapter 3.XXXI. How the physician Rondibilis counselleth Panurge.
Chapter 3.XXXII. How Rondibilis declareth cuckoldry to be naturally one of the appendances of marriage.
Chapter 3.XXXIII. Rondibilis the physician's cure of cuckoldry.
Chapter 3.XXXIV. How women ordinarily have the greatest longing after things prohibited.
Chapter 3.XXXIX. How Pantagruel was present at the trial of Judge Bridlegoose, who decided causes and controversies in law by the chance and fortune of the dice.
Chapter 3.XXXV. How the philosopher Trouillogan handleth the difficulty of marriage.
Chapter 3.XXXVI. A continuation of the answer of the Ephectic and Pyrrhonian philosopher Trouillogan.
Chapter 3.XXXVII. How Pantagruel persuaded Panurge to take counsel of a fool.
Chapter 3.XXXVIII. How Triboulet is set forth and blazed by Pantagruel and Panurge.
Chapter 4.I. How Pantagruel went to sea to visit the oracle of Bacbuc, alias the Holy Bottle.
Chapter 4.II. How Pantagruel bought many rarities in the island of Medamothy.
Chapter 4.III. How Pantagruel received a letter from his father Gargantua, and of the strange way to have speedy news from far distant places.
Chapter 4.IV. How Pantagruel writ to his father Gargantua, and sent him several curiosities.
Chapter 4.IX. How Pantagruel arrived at the island of Ennasin, and of the strange ways of being akin in that country.
Chapter 4.L. How Homenas showed us the archetype, or representation of a pope.
Chapter 4.LI. Table-talk in praise of the decretals.
Chapter 4.LII. A continuation of the miracles caused by the decretals.
Chapter 4.LIII. How, by the virtue of the decretals, gold is subtilely drawn out of France to Rome.
Chapter 4.LIV. How Homenas gave Pantagruel some bon-Christian pears.
Chapter 4.LIX. Of the ridiculous statue Manduce; and how and what the Gastrolaters sacrifice to their ventripotent god.
Chapter 4.LV. How Pantagruel, being at sea, heard various unfrozen words.
Chapter 4.LVI. How among the frozen words Pantagruel found some odd ones.
Chapter 4.LVII. How Pantagruel went ashore at the dwelling of Gaster, the first master of arts in the world.
Chapter 4.LVIII. How, at the court of the master of ingenuity, Pantagruel detested the Engastrimythes and the Gastrolaters.
Chapter 4.LX. What the Gastrolaters sacrificed to their god on interlarded fish-days.
Chapter 4.LXI. How Gaster invented means to get and preserve corn.
Chapter 4.LXII. How Gaster invented an art to avoid being hurt or touched by cannon-balls.
Chapter 4.LXIII. How Pantagruel fell asleep near the island of Chaneph, and of the problems proposed to be solved when he waked.
Chapter 4.LXIV. How Pantagruel gave no answer to the problems.
Chapter 4.LXV. How Pantagruel passed the time with his servants.
Chapter 4.LXVI. How, by Pantagruel's order, the Muses were saluted near the isle of Ganabim.
Chapter 4.LXVII. How Panurge berayed himself for fear; and of the huge cat Rodilardus, which he took for a puny devil.
Chapter 4.V. How Pantagruel met a ship with passengers returning from Lantern-land.
Chapter 4.VI. How, the fray being over, Panurge cheapened one of Dingdong's sheep.
Chapter 4.VII. Which if you read you'll find how Panurge bargained with Dingdong.
Chapter 4.VIII. How Panurge caused Dingdong and his sheep to be drowned in the sea.
Chapter 4.X. How Pantagruel went ashore at the island of Chely, where he saw King St. Panigon.
Chapter 4.XI. Why monks love to be in kitchens.
Chapter 4.XII. How Pantagruel passed by the land of Pettifogging, and of the strange way of living among the Catchpoles.
Chapter 4.XIII. How, like Master Francis Villon, the Lord of Basche commended his servants.
Chapter 4.XIV. A further account of catchpoles who were drubbed at Basche's house.
Chapter 4.XIX. What countenances Panurge and Friar John kept during the.
Chapter 4.XL. How Friar John fitted up the sow; and of the valiant cooks that went into it.
Chapter 4.XLI. How Pantagruel broke the Chitterlings at the knees.
Chapter 4.XLII. How Pantagruel held a treaty with Niphleseth, Queen of the Chitterlings.
Chapter 4.XLIII. How Pantagruel went into the island of Ruach.
Chapter 4.XLIV. How small rain lays a high wind.
Chapter 4.XLIX. How Homenas, Bishop of Papimany, showed us the Uranopet decretals .
Chapter 4.XLV. How Pantagruel went ashore in the island of Pope-Figland.
Chapter 4.XLVI. How a junior devil was fooled by a husbandman of Pope- Figland.
Chapter 4.XLVII. How the devil was deceived by an old woman of Pope- Figland.
Chapter 4.XLVIII. How Pantagruel went ashore at the island of Papimany.
Chapter 4.XV. How the ancient custom at nuptials is renewed by the catchpole.
Chapter 4.XVI. How Friar John made trial of the nature of the catchpoles.
Chapter 4.XVII. How Pantagruel came to the islands of Tohu and Bohu; and of the strange death of Wide-nostrils, the swallower of windmills.
Chapter 4.XVIII. How Pantagruel met with a great storm at sea.
Chapter 4.XX. How the pilots were forsaking their ships in the greatest stress of weather.
Chapter 4.XXI. A continuation of the storm, with a short discourse on the subject of making testaments at sea.
Chapter 4.XXII. An end of the storm.
Chapter 4.XXIII. How Panurge played the good fellow when the storm was over.
Chapter 4.XXIV. How Panurge was said to have been afraid without reason during the storm.
Chapter 4.XXIX. How Pantagruel sailed by the Sneaking Island, where Shrovetide reigned.
Chapter 4.XXV. How, after the storm, Pantagruel went on shore in the islands of the Macreons.
Chapter 4.XXVI. How the good Macrobius gave us an account of the mansion and decease of the heroes.
Chapter 4.XXVII. Pantagruel's discourse of the decease of heroic souls; and of the dreadful prodigies that happened before the death of the late Lord de Langey.
Chapter 4.XXVIII. How Pantagruel related a very sad story of the death of the heroes.
Chapter 4.XXX. How Shrovetide is anatomized and described by Xenomanes.
Chapter 4.XXXI. Shrovetide's outward parts anatomized.
Chapter 4.XXXII. A continuation of Shrovetide's countenance.
Chapter 4.XXXIII. How Pantagruel discovered a monstrous physeter, or whirlpool, near the Wild Island.
Chapter 4.XXXIV. How the monstrous physeter was slain by Pantagruel.
Chapter 4.XXXIX. How Friar John joined with the cooks to fight the Chitterlings.
Chapter 4.XXXV. How Pantagruel went on shore in the Wild Island, the ancient abode of the Chitterlings.
Chapter 4.XXXVI. How the wild Chitterlings laid an ambuscado for Pantagruel.
Chapter 4.XXXVII. How Pantagruel sent for Colonel Maul-chitterling and Colonel Cut-pudding; with a discourse well worth your hearing about the names of places and persons.
Chapter 4.XXXVIII. How Chitterlings are not to be slighted by men.
Chapter 5.I. How Pantagruel arrived at the Ringing Island, and of the noise that we heard.
Chapter 5.II. How the Ringing Island had been inhabited by the Siticines, who were become birds.
Chapter 5.III. How there is but one pope-hawk in the Ringing Island.
Chapter 5.IV. How the birds of the Ringing Island were all passengers.
Chapter 5.IX. How we arrived at the island of Tools.
Chapter 5.V. Of the dumb Knight-hawks of the Ringing Island.
Chapter 5.VI. How the birds are crammed in the Ringing Island.
Chapter 5.VII. How Panurge related to Master Aedituus the fable of the horse and the ass.
Chapter 5.VIII. How with much ado we got a sight of the pope-hawk.
Chapter 5.X. How Pantagruel arrived at the island of Sharping.
Chapter 5.XI. How we passed through the wicket inhabited by Gripe-men-all, Archduke of the Furred Law-cats.
Chapter 5.XII. How Gripe-men-all propounded a riddle to us.
Chapter 5.XIII. How Panurge solved Gripe-men-all's riddle.
Chapter 5.XIV. How the Furred Law-cats live on corruption.
Chapter 5.XIX. How we arrived at the queendom of Whims or Entelechy.
Chapter 5.XL. How the battle in which the good Bacchus overthrew the Indians was represented in mosaic work.
Chapter 5.XLI. How the temple was illuminated with a wonderful lamp.
Chapter 5.XLII. How the Priestess Bacbuc showed us a fantastic fountain in the temple, and how the fountain-water had the taste of wine, according to the imagination of those who drank of it.
Chapter 5.XLIII. How the Priestess Bacbuc equipped Panurge in order to have the word of the Bottle.
Chapter 5.XLIV. How Bacbuc, the high-priestess, brought Panurge before the Holy Bottle.
Chapter 5.XLV. How Bacbuc explained the word of the Goddess-Bottle.
Chapter 5.XLVI. How Panurge and the rest rhymed with poetic fury.
Chapter 5.XLVII. How we took our leave of Bacbuc, and left the Oracle of the Holy Bottle.
Chapter 5.XV. How Friar John talks of rooting out the Furred Law-cats.
Chapter 5.XVI. How Pantagruel came to the island of the Apedefers, or Ignoramuses, with long claws and crooked paws, and of terrible adventures and monsters there.
Chapter 5.XVII. How we went forwards, and how Panurge had like to have been killed.
Chapter 5.XVIII. How our ships were stranded, and we were relieved by some people that were subject to Queen Whims (qui tenoient de la Quinte).
Chapter 5.XX. How the Quintessence cured the sick with a song.
Chapter 5.XXI. How the Queen passed her time after dinner.
Chapter 5.XXII. How Queen Whims' officers were employed; and how the said lady retained us among her abstractors.
Chapter 5.XXIII. How the Queen was served at dinner, and of her way of eating.
Chapter 5.XXIV. How there was a ball in the manner of a tournament, at which Queen Whims was present.
Chapter 5.XXIX. How Epistemon disliked the institution of Lent.
Chapter 5.XXV. How the thirty-two persons at the ball fought.
Chapter 5.XXVI. How we came to the island of Odes, where the ways go up and down.
Chapter 5.XXVII. How we came to the island of Sandals; and of the order of Semiquaver Friars.
Chapter 5.XXVIII. How Panurge asked a Semiquaver Friar many questions, and was only answered in monosyllables.
Chapter 5.XXX. How we came to the land of Satin.
Chapter 5.XXXI. How in the land of Satin we saw Hearsay, who kept a school of vouching.
Chapter 5.XXXII. How we came in sight of Lantern-land.
Chapter 5.XXXIII. How we landed at the port of the Lychnobii, and came to Lantern-land.
Chapter 5.XXXIV. How we arrived at the Oracle of the Bottle.
Chapter 5.XXXIX. How we saw Bacchus's army drawn up in battalia in mosaic work.
Chapter 5.XXXV. How we went underground to come to the Temple of the Holy Bottle, and how Chinon is the oldest city in the world.
Chapter 5.XXXVI. How we went down the tetradic steps, and of Panurge's fear.
Chapter 5.XXXVII. How the temple gates in a wonderful manner opened of themselves.
Chapter 5.XXXVIII. Of the temple's admirable pavement.
......@@ -42,12 +42,12 @@
</p>
{% endif %}
<!-- <a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.pk }}/">Add file</a> -->
<a class="btn btn-primary btn-lg" role="button" href="/project/{{project.pk}}/corpus/{{ corpus.pk }}/corpus.csv">Save as</a>
<a class="btn btn-primary btn-lg" role="button" href="/project/{{project.pk}}/corpus/{{ corpus.pk }}/delete">Delete</a></p>
<!-- <a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.id }}/">Add file</a> -->
<a class="btn btn-primary btn-lg" role="button" href="/project/{{project.id}}/corpus/{{ corpus.id }}/corpus.csv">Save as</a>
<a class="btn btn-primary btn-lg" role="button" href="/delete/{{ corpus.id }}">Delete</a></p>
{% if number == 0 %}
<a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.pk }}/">Add documents</a></p>
<a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.id }}/">Add documents</a></p>
{% endif %}
</div>
......
......@@ -260,8 +260,29 @@
</div>
<div id="topPapers"></div>
<!--
<div id="tab-container-top" class='tab-container'>
<ul class='etabs'>
<li id="tabmed" class='tab active'><a href="#tabs3">Medline Pubs</a></li>
<li id="tabgps" class='tab'><a href="#tabs4">+</a></li>
</ul>
<div class='panel-container'>
<div id="tabs3">
<div id="topPapers"></div>
</div>
<div id="tabs4">
<div id="topProposals"></div>
</div>
</div>
</div>
-->
<div id="information"></div>
</div>
......
......@@ -19,18 +19,16 @@
<div class="col-md-4 content">
<h1>Gargantext</h1>
<p>A web platform to explore text-mining</p>
<a class="btn btn-primary btn-lg" href="/projects">Test Gargantext</a>
<a class="btn btn-primary btn-lg" href="/projects" title="Click and test by yourself">Test Gargantext</a>
</div>
<div class="col-md-3 content">
</div>
<div class="col-md-5 content">
<!--
<h3>Project Manager:</h3>
<h4><a href="http://alexandre.delanoe.org" target="blank">Alexandre Delanoë</a></h4>
<h3>Scientific board:</h3>
<h4><a href="http://chavalarias.com" target="blank">David Chavalarias</a> and <a href="http://alexandre.delanoe.org" target="blank">Alexandre Delanoë</a></h4>
<h3><a href="/about/#collapseTeam" target="blank">Thanks to all the team</a></h3>
--!>
<div class="col-md-2 content"></div>
<div class="col-md-2 content"></div>
<div class="col-md-2 content">
<p class="right">
<div style="border:15px">
<img src="{% static "img/logo.png"%}" title="Logo designed by anoe" style="100px; height:150px; border:3px solid white">
</div>
</p>
</div>
</div>
</div>
......@@ -39,7 +37,7 @@
<div class="row">
<div class="content">
<center>
<img src="{% static "img/logo.png"%}" alt="Logo Gargantext" style="100px; height:150px">
<img src="{% static "img/Gargantextuel-212x300.jpg"%}" title="Gargantextuel drawn by Cecile Meadel" style="border:2px solid black">
<!--
<h2>Introduction Video</h2>
......@@ -63,57 +61,23 @@
<div class="row">
<div class="col-md-4 content">
<h3><a href="#">Historic</a></h3>
<p>
Chapter 1.VI. -- How Gargantua was born in a strange manner.
Chapter 2.XXIII. -- How Pantagruel departed from Paris, hearing
news that the Dipsodes had invaded the land of the Amaurots; and
the cause wherefore the leagues are so short in France. Chapter
3.XLVI. -- How Pantagruel and Panurge diversely interpret the
words of Triboulet. Chapter 4.LV. -- How Pantagruel, being at sea,
heard various unfrozen words. Chapter 5.IX. -- How we arrived at
the island of Tools.
</p>
<h3><a href="#" title="Random sentences in Gargantua's Books chapters, historically true">Historic</a></h3>
<p> {{ paragraph_gargantua }}</p>
</div>
<div class="col-md-4 content">
<h3><a href="#">Presentation</a></h3>
<p>
Lorem ipsum dolor sit amet, consectetur adipiscing elit,
sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris
nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in
reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla
pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
culpa qui officia deserunt mollit anim id est laborum.
<h3><a href="#" title="Randomized words, semantically and syntaxically falses." >Presentation</a></h3>
<p> {{ paragraph_lorem }}
</p>
</div>
<div class="col-md-4 content">
<h3><a href="#">Tutoreil</a></h3>
<h3><a href="#" title="Randomized letters, true or false ?">Tutoreil</a></h3>
<p>
{{ paragraph_tutoreil }}
<!-- Why not French ? -->
<!-- find Cambridge source which inspired this --!>
Il praaît que l'odrre des ltetres dnas un mot n'a pas
d'iprnorotncae. La pmeirère et la drenèire letrte diovent
êrte à la bnnoe pclae. Le rsete peut êrte dnas un dsérorde
ttoal et on puet tujoruos lrie snas poribême. On ne lit
donc pas chuaqe ltetre en elle-mmêe, mias le mot cmome un
tuot. Un chnagmnet de réfretniel et nuos tarnsposns ce
rselutat au txete lui-mmêe: l'odrre des mtos est faiblement
imoprtnat copmraé au cnotxete du txete qui, lui, est copmté:
comptexter avec Gargantext.
</p>
</div>
......
......@@ -17,16 +17,16 @@
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" style="line-height:15px; height:10px; padding: 10px 10px;" href="/"><img src="/img/logo.svg"></a>
<a class="navbar-brand" style="line-height:15px; height:10px; padding: 10px 10px;" href="/"><img src="/img/logo.svg" title="Back to home."></a>
</div>
<div class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<!-- <li><a href="/admin/">Admin/</a></li> --!>
<li><a href="/about/">About</a>
<li><a href="/about/" title="More informations about the project, its sponsors and its authors.">About</a>
</li>
{% if user.is_authenticated %}
<li><a href="/projects/">Projects</a></li>
<li><a href="/projects/" title="All your projects are here.">Projects</a></li>
{% endif %}
{% if project %}
<li><a href="/project/{{project.id}}">{{project.name}}</a></li>
......@@ -40,14 +40,14 @@
<ul class="nav pull-right">
<li class="dropdown">
<a href="#" role="button" class="dropdown-toggle" data-toggle="dropdown"><i class="icon-user"></i> {{ user }}<i class="caret"></i>
<a href="#" role="button" class="dropdown-toggle" data-toggle="dropdown" title="That is your login"><i class="icon-user"></i> {{ user }}<i class="caret"></i>
</a>
<ul class="dropdown-menu">
<li><a tabindex="-1" href="http://www.iscpif.fr/tiki-index.php?page=gargantext_feedback" target="blank" >Report Feedback</a></li>
<li><a tabindex="-1" href="http://www.iscpif.fr/tiki-index.php?page=gargantext_feedback" title="Send us a message (bug, thanks, congrats...)">Report Feedback</a></li>
<li class="divider"></li>
{% if user.is_authenticated %}
<li><a tabindex="-1" href="/auth/logout">Logout</a></li>
<li><a tabindex="-1" href="/auth/logout" title="Click here to logout especially on public devices">Logout</a></li>
{% else %}
<li><a tabindex="-1" href="/auth/">Login</a></li>
{% endif %}
......@@ -66,8 +66,8 @@
<hr>
<footer>
<p>Gargantext, version 1.0.6, <a href="http://www.cnrs.fr" target="blank">Copyrights CNRS {{ date.year }}</a>,
<a href="http://www.gnu.org/licenses/agpl-3.0.html" target="blank">Licence aGPLV3</a>.</p>
<p>Gargantext, version 1.0.6, <a href="http://www.cnrs.fr" target="blank" title="Institution that enables this project.">Copyrights CNRS {{ date.year }}</a>,
<a href="http://www.gnu.org/licenses/agpl-3.0.html" target="blank" title="Legal instructions of the project.">Licence aGPLV3</a>.</p>
</footer>
......
......@@ -84,19 +84,16 @@
<ul>
{% for corpus in corpora %}
<li> {% ifnotequal corpus.count 0 %}
<a href="/project/{{project.id}}/corpus/{{corpus.id}}">
{{corpus.name}}
</a>
, {{ corpus.count }} Documents
{% else %}
{{corpus.name}} : <img width="20px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img> Processing, drink a cup of tea, and refresh the page :)
{% endifnotequal %}
<a href="/project/{{project.id}}/corpus/{{corpus.id}}"> {{corpus.name}} </a> , {{ corpus.count }} Documents
{% else %}
{{corpus.name}} : <img width="20px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img> Processing, drink a cup of tea, and refresh the page :)
{% endifnotequal %}
<button type="button" class="btn btn-xs btn-default" data-container="body" data-toggle="popover" data-placement="bottom"
data-content='
<ul>
<li> Rename </li>
<li> Add new documents </li>
<li><a href="/project/{{ project.id }}/corpus/{{ corpus.id}}/delete">Delete</a></li>
<li><a href="/delete/{{corpus.id}}">Delete</a></li>
</ul>
'>Manage</button>
</li>
......@@ -330,7 +327,7 @@
console.log("enabling "+"#"+value.id)
$("#"+value.id).attr('onclick','getGlobalResults(this);');
// $("#submit_thing").prop('disabled' , false)
$("#submit_thing").html("Process a 100 sample!")
$("#submit_thing").html("Process a 1000 sample!")
thequeries = data
var N=0,k=0;
......@@ -427,8 +424,8 @@
//CSS events for changing the Select element
function CustomForSelect( selected ) {
// show Radio-Inputs and trigger FileOrNotFile>@upload-file events
//if(selected=="pubmed" || selected=="istext") {
if(selected=="pubmed") {
if(selected=="pubmed" || selected=="istext") {
// if(selected=="pubmed") {
console.log("show the button for: "+selected)
$("#pubmedcrawl").css("visibility", "visible");
$("#pubmedcrawl").show();
......
......@@ -44,7 +44,7 @@
<ul>
<li> Rename </li>
<li> Add new corpus </li>
<li><a href="/project/{{ project.id }}/delete">Delete</a></li>
<li><a href="/delete/{{ project.id }}">Delete</a></li>
</ul>
'>Manage</button>
......
......@@ -19,18 +19,21 @@
{% if documents %}
<div id="delAll" style="visibility: hidden;">
<button onclick="deleteDuplicates(theurl);">Delete Duplicates</button>
</div>
<ul>
{% for doc in documents %}
{% if doc.date %}
<li><div id="doc_{{doc.id}}"> <b>{{ doc.date }}</b>: <a target="_blank" href="/nodeinfo/{{doc.id}}">{{ doc.name}}</a> , @ {{ doc.metadata.source}}</div></li>
{% endif %}
{% endfor %}
<div id="delAll" style="visibility: hidden;">
<center>
<button onclick="deleteDuplicates(theurl);">Delete all Duplicates in one click</button>
</center>
</div>
</ul>
<script>
......
# Without this, we couldn't use the Django environment
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")
# database tools
from node import models
from gargantext_web.db import *
from parsing.corpustools import *
user = session.query(User).first()
project = session.query(Node).filter(Node.name == 'A').first()
corpus = Node(
parent_id = project.id,
name = 'Test 456',
type_id = cache.NodeType['Corpus'].id,
user_id = user.id,
)
session.add(corpus)
session.commit()
add_resource(corpus,
# file = './data_samples/pubmed_result.xml',
file = '/srv/gargantext_lib/data_samples/pubmed_2013-04-01_HoneyBeesBeeBees.xml',
type_id = cache.ResourceType['pubmed'].id,
)
parse_resources(corpus)
extract_ngrams(corpus, ('title', ))
# print(corpus)
# corpus = session.query(Node).filter(Node.id == 72771).first()
# corpus = session.query(Node).filter(Node.id == 73017).first()
compute_tfidf(corpus)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment