Commit e7ac6426 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge remote-tracking branch 'gargantext.org/simon-unstable-notebook' into unstable-merge

parents dae0243d 30c1dbdc
from sqlalchemy.schema import Column, ForeignKey, UniqueConstraint, Index
from sqlalchemy.orm import relationship
from sqlalchemy.orm import relationship, validates
from sqlalchemy.types import TypeDecorator, \
Integer, Float, Boolean, DateTime, String, Text
from sqlalchemy.dialects.postgresql import JSONB, DOUBLE_PRECISION as Double
......@@ -7,6 +7,7 @@ from sqlalchemy.ext.mutable import MutableDict, MutableList
from sqlalchemy.ext.declarative import declarative_base
__all__ = ["Column", "ForeignKey", "UniqueConstraint", "relationship",
"validates", "ValidatorMixin",
"Integer", "Float", "Boolean", "DateTime", "String", "Text",
"TypeDecorator",
"JSONB", "Double",
......@@ -18,6 +19,25 @@ __all__ = ["Column", "ForeignKey", "UniqueConstraint", "relationship",
# all tables handled by Alembic migration scripts.
Base = declarative_base()
# To be used by tables already handled by Django ORM, such as User model. We
# separate them in order to keep those out of Alembic sight.
DjangoBase = declarative_base()
class ValidatorMixin(object):
def enforce_length(self, key, value):
"""Truncate a string according to its column length
Usage example:
.. code-block:: python
@validates('some_column')
def validate_some_column(self, key, value):
self.enforce_length(key, value)
"""
max_len = getattr(self.__class__, key).prop.columns[0].type.length
if value and len(value) > max_len:
return value[:max_len]
return value
......@@ -9,7 +9,7 @@ from datetime import datetime
from .base import Base, Column, ForeignKey, relationship, TypeDecorator, Index, \
Integer, Float, String, DateTime, JSONB, \
MutableList, MutableDict
MutableList, MutableDict, validates, ValidatorMixin
from .users import User
__all__ = ['Node', 'NodeNode', 'CorpusNode']
......@@ -26,7 +26,7 @@ class NodeType(TypeDecorator):
return NODETYPES[typeindex]
class Node(Base):
class Node(ValidatorMixin, Base):
"""This model can fit many purposes:
myFirstCorpus = session.query(CorpusNode).first()
......@@ -112,6 +112,10 @@ class Node(Base):
'user_id={0.user_id}, parent_id={0.parent_id}, ' \
'name={0.name!r}, date={0.date})>'.format(self)
@validates('name')
def validate_name(self, key, value):
return self.enforce_length(key, value)
@property
def ngrams(self):
"""Pseudo-attribute allowing to retrieve a node's ngrams.
......
......@@ -73,7 +73,8 @@ from rest_framework.views import APIView
from gargantext.util.json import json_encoder
def JsonHttpResponse(data, status=200):
return HttpResponse(
content = json_encoder.encode(data),
content = data.encode('utf-8') if isinstance(data, str) else \
json_encoder.encode(data),
content_type = 'application/json; charset=utf-8',
status = status
)
......
#!/usr/bin/env python
"""
Gargantext Software Copyright (c) 2016-2017 CNRS ISC-PIF -
http://iscpif.fr
......@@ -6,45 +7,29 @@ http://gitlab.iscpif.fr/humanities/gargantext/blob/stable/LICENSE )
- In France : a CECILL variant affero compliant
- GNU aGPLV3 for all other countries
"""
#!/usr/bin/env python
import sys
import os
import django
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'gargantext.settings')
django.setup()
# Django settings
dirname = os.path.dirname(os.path.realpath(__file__))
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext.settings")
from gargantext.constants import QUERY_SIZE_N_MAX, get_resource, get_resource_by_name
from gargantext.models import ProjectNode, DocumentNode, UserNode, User
from gargantext.util.db import session, get_engine
from collections import Counter
import importlib
from django.http import Http404
# initialize Django application
from django.core.wsgi import get_wsgi_application
application = get_wsgi_application()
from gargantext.util.toolchain.main import parse_extract_indexhyperdata
from gargantext.util.db import *
from gargantext.models import Node
from gargantext.util.toolchain.main import parse_extract_indexhyperdata
from nltk.tokenize import wordpunct_tokenize
class NotebookError(Exception):
pass
from gargantext.models import *
from nltk.tokenize import word_tokenize
import nltk as nltk
from statistics import mean
from math import log
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
import datetime
from collections import Counter
from langdetect import detect as detect_lang
def documents(corpus_id):
return (session.query(Node).filter( Node.parent_id==corpus_id
, Node.typename=="DOCUMENT"
)
# .order_by(Node.hyperdata['publication_date'])
.all()
)
return (session.query(DocumentNode).filter_by(parent_id=corpus_id)
#.order_by(Node.hyperdata['publication_date'])
.all())
#import seaborn as sns
......@@ -56,11 +41,14 @@ def chart(docs, field):
frame1 = pd.DataFrame(year_publis, columns=['Date', 'DateValue'], index=frame0.Date)
return frame1
from gargantext.util.crawlers.HAL import HalCrawler
def scan_hal(request):
hal = HalCrawler()
return hal.scan_results(request)
def scan_gargantext(corpus_id, lang, request):
connection = get_engine().connect()
# TODO add some sugar the request (ideally request should be the same for hal and garg)
......@@ -77,47 +65,117 @@ def myProject_fromUrl(url):
myProject :: String -> Project
"""
project_id = url.split("/")[4]
project = session.query(Node).filter(Node.id == project_id).first()
project = session.query(ProjectNode).get(project_id)
return project
def newCorpus(project, resourceName=11, name="Machine learning", query="LSTM"):
print("Corpus \"%s\" in project \"%s\" created" % (name, project.name))
def newCorpus(project, source, name=None, query=None):
error = False
corpus = project.add_child(name="Corpus name", typename='CORPUS')
corpus.hyperdata["resources"] = [{"extracted" : "true", "type" : 11}]
corpus.hyperdata["statuses"] = [{"action" : "notebook", "complete" : "true"}]
# [TODO] Add informations needed to get buttons on the Project view.
session.add(corpus)
session.commit()
if name is None:
name = query
hal = HalCrawler()
max_result = hal.scan_results(query)
paging = 100
for page in range(0, max_result, paging):
print("%s documents downloaded / %s." % (str( paging * (page +1)), str(max_result) ))
docs = (hal._get(query, fromPage=page, count=paging)
.get("response", {})
.get("docs", [])
)
from gargantext.util.parsers.HAL import HalParser
# [TODO] fix boilerplate for docs here
new_docs = HalParser(docs)._parse(docs)
for doc in new_docs:
new_doc = (corpus.add_child( name = doc["title"][:255]
, typename = 'DOCUMENT')
)
new_doc["hyperdata"] = doc
session.add(new_doc)
session.commit()
if not isinstance(project, ProjectNode):
error = "a valid project"
if not isinstance(source, int) and not isinstance(source, str):
error = "a valid source identifier: id or name"
elif not isinstance(query, str):
error = "a valid query"
elif not isinstance(name, str):
error = "a valid name"
print("Extracting the ngrams")
parse_extract_indexhyperdata(corpus)
if error:
raise NotebookError("Please provide %s." % error)
print("Corpus is ready to explore:")
print("http://imt.gargantext.org/projects/%s/corpora/%s/" % (project.id, corpus.id))
resource = get_resource(source) if isinstance(source, int) else \
get_resource_by_name(source)
return corpus
moissonneur_name = get_moissonneur_name(resource) if resource else \
source.lower()
try:
moissonneur = get_moissonneur(moissonneur_name)
except ImportError:
raise NotebookError("Invalid source identifier: %r" % source)
return run_moissonneur(moissonneur, project, name, query)
def get_moissonneur_name(ident):
""" Return moissonneur module name from RESOURCETYPE or crawler name """
# Does it quacks like a RESOURCETYPE ?
if hasattr(ident, 'get'):
ident = ident.get('crawler')
# Extract name from crawler class name, otherwise assume ident is already
# a moissonneur name.
if isinstance(ident, str) and ident.endswith('Crawler'):
return ident[:-len('Crawler')].lower()
def get_moissonneur(name):
""" Return moissonneur module from its name """
if not isinstance(name, str) or not name.islower():
raise NotebookError("Invalid moissonneur name: %r" % name)
module = importlib.import_module('moissonneurs.%s' % name)
module.name = name
return module
def run_moissonneur(moissonneur, project, name, query):
""" Run moissonneur and return resulting corpus """
# XXX Uber-kludge with gory details. Spaghetti rulezzzzz!
class Dummy(object):
pass
request = Dummy()
request.method = 'POST'
request.path = 'nowhere'
request.META = {}
# XXX 'string' only have effect on moissonneurs.pubmed; its value is added
# when processing request client-side, take a deep breath and see
# templates/projects/project.html for more details.
request.POST = {'string': name,
'query': query,
'N': QUERY_SIZE_N_MAX}
request.user = Dummy()
request.user.id = project.user_id
request.user.is_authenticated = lambda: True
if moissonneur.name == 'istex':
# Replace ALL spaces by plus signs
request.POST['query'] = '+'.join(filter(None, query.split(' ')))
try:
import json
r = moissonneur.query(request)
raw_json = r.content.decode('utf-8')
data = json.loads(raw_json)
if moissonneur.name == 'pubmed':
count = sum(x['count'] for x in data)
request.POST['query'] = raw_json
elif moissonneur.name == 'istex':
count = data.get('total', 0)
else:
count = data.get('results_nb', 0)
if count > 0:
corpus = moissonneur.save(request, project.id, return_corpus=True)
else:
return None
except (ValueError, Http404) as e:
raise e
# Sometimes strange things happens...
if corpus.name != name:
corpus.name = name
session.commit()
return corpus
......@@ -30,7 +30,7 @@ def query( request):
#ids = crawlerbot.get_ids(query)
return JsonHttpResponse({"results_nb":crawlerbot.results_nb})
def save(request, project_id):
def save(request, project_id, return_corpus=False):
'''save'''
if request.method == "POST":
......@@ -101,6 +101,9 @@ def save(request, project_id):
session.rollback()
# --------------------------------------------
if return_corpus:
return corpus
return render(
template_name = 'pages/projects/wait.html',
request = request,
......
......@@ -33,7 +33,7 @@ def query( request):
print(results)
return JsonHttpResponse({"results_nb":crawlerbot.results_nb})
def save(request, project_id):
def save(request, project_id, return_corpus=False):
'''save'''
if request.method == "POST":
......@@ -103,6 +103,9 @@ def save(request, project_id):
session.rollback()
# --------------------------------------------
if return_corpus:
return corpus
return render(
template_name = 'pages/projects/wait.html',
request = request,
......
......@@ -29,7 +29,7 @@ def query( request):
#ids = crawlerbot.get_ids(query)
return JsonHttpResponse({"results_nb":crawlerbot.results_nb})
def save(request, project_id):
def save(request, project_id, return_corpus=False):
'''save'''
if request.method == "POST":
......@@ -100,6 +100,9 @@ def save(request, project_id):
session.rollback()
# --------------------------------------------
if return_corpus:
return corpus
return render(
template_name = 'pages/projects/wait.html',
request = request,
......
......@@ -52,7 +52,7 @@ def query( request ):
def save(request , project_id):
def save(request , project_id, return_corpus=False):
print("testISTEX:")
print(request.method)
alist = ["bar","foo"]
......@@ -171,6 +171,9 @@ def save(request , project_id):
session.rollback()
# --------------------------------------------
if return_corpus:
return corpus
return render(
template_name = 'pages/projects/wait.html',
request = request,
......
......@@ -33,7 +33,7 @@ def query( request):
print(results)
return JsonHttpResponse({"results_nb":crawlerbot.results_nb})
def save(request, project_id):
def save(request, project_id, return_corpus=False):
'''save'''
if request.method == "POST":
......@@ -104,6 +104,9 @@ def save(request, project_id):
session.rollback()
# --------------------------------------------
if return_corpus:
return corpus
return render(
template_name = 'pages/projects/wait.html',
request = request,
......
......@@ -69,7 +69,7 @@ def query( request ):
return JsonHttpResponse(data)
def save( request , project_id ) :
def save( request , project_id, return_corpus=False ) :
# implicit global session
# do we have a valid project id?
try:
......@@ -164,6 +164,10 @@ def save( request , project_id ) :
session.rollback()
# --------------------------------------------
sleep(1)
if return_corpus:
return corpus
return HttpResponseRedirect('/projects/' + str(project_id))
data = alist
......
......@@ -2,10 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"metadata": {},
"source": [
"# Advanced Gargantext Tutorial (Python)"
]
......@@ -13,9 +10,7 @@
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
......@@ -25,11 +20,7 @@
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"metadata": {},
"outputs": [],
"source": [
"# First import the library Gargantext Notebook\n",
......@@ -41,17 +32,19 @@
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Corpus \"Machine learning\" in project \"Tests\" created\n",
"100 documents downloaded / 105.\n",
"105\n",
"LSTM 1000\n",
"Downloading page 0 to 100 results\n",
"Downloading page 100 to 100 results\n",
"CORPUS #17058\n",
"PARSING\n",
"Loading available PARSERS:\n",
"\t- EuropresseParser\n",
"\t- RISParser\n",
......@@ -64,85 +57,64 @@
"\t- CernParser\n",
"\t- MultivacParser\n",
"\t- HalParser\n",
"\t- IsidoreParser\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Exception ignored in: <bound method Parser.__del__ of <gargantext.util.parsers.HAL.HalParser object at 0x7f8d1d70ad30>>\n",
"Traceback (most recent call last):\n",
" File \"/srv/gargantext/gargantext/util/parsers/_Parser.py\", line 24, in __del__\n",
" self._file.close()\n",
"AttributeError: 'list' object has no attribute 'close'\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"10100 documents downloaded / 105.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Exception ignored in: <bound method Parser.__del__ of <gargantext.util.parsers.HAL.HalParser object at 0x7f8d1d6f1e10>>\n",
"Traceback (most recent call last):\n",
" File \"/srv/gargantext/gargantext/util/parsers/_Parser.py\", line 24, in __del__\n",
" self._file.close()\n",
"AttributeError: 'list' object has no attribute 'close'\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Extracting the ngrams\n",
"CORPUS #300990\n",
"CORPUS #300990: parsed 105\n"
]
},
{
"ename": "KeyError",
"evalue": "'languages'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-3-c08deaa02bf2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mproject\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmyProject_fromUrl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"http://imt.gargantext.org/projects/300535\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mcorpus\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnewCorpus\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mproject\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"Machine learning\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mquery\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"LSTM\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/srv/gargantext/gargantext_notebook.py\u001b[0m in \u001b[0;36mnewCorpus\u001b[0;34m(project, resourceName, name, query)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Extracting the ngrams\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 117\u001b[0;31m \u001b[0mparse_extract_indexhyperdata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcorpus\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 118\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 119\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Corpus is ready to explore:\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/alexandre/local/logiciels/python/env/3.5_20170123/lib/python3.5/site-packages/celery/local.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *a, **kw)\u001b[0m\n\u001b[1;32m 186\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 188\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_current_object\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 189\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 190\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__len__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/alexandre/local/logiciels/python/env/3.5_20170123/lib/python3.5/site-packages/celery/app/task.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 426\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__self__\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 427\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__self__\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 428\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 429\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 430\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpop_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/srv/gargantext/gargantext/util/toolchain/main.py\u001b[0m in \u001b[0;36mparse_extract_indexhyperdata\u001b[0;34m(corpus)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0mdocs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcorpus\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchildren\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"DOCUMENT\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'CORPUS #%d: parsed %d'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mcorpus\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdocs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 73\u001b[0;31m \u001b[0mextract_ngrams\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcorpus\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 74\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0;31m# Preparing Databse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/srv/gargantext/gargantext/util/toolchain/ngrams_extraction.py\u001b[0m in \u001b[0;36mextract_ngrams\u001b[0;34m(corpus, keys, do_subngrams)\u001b[0m\n\u001b[1;32m 151\u001b[0m \u001b[0mcorpus\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstatus\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Ngrams'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merror\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[0mcorpus\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave_hyperdata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 153\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 154\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 155\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/srv/gargantext/gargantext/util/toolchain/ngrams_extraction.py\u001b[0m in \u001b[0;36mextract_ngrams\u001b[0;34m(corpus, keys, do_subngrams)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[0;31m#load available taggers for default langage of plateform\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[0;31m#print(LANGUAGES.keys())\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 56\u001b[0;31m tagger_bots = {lang: load_tagger(lang) for lang in corpus.hyperdata[\"languages\"] \\\n\u001b[0m\u001b[1;32m 57\u001b[0m if lang != \"__unknown__\"}\n\u001b[1;32m 58\u001b[0m \u001b[0mtagger_bots\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"__unknown__\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_tagger\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"en\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'languages'"
"\t- IsidoreParser\n",
"0 docs skipped\n",
"105 parsed\n",
"#MAIN language of the CORPUS __unknown__\n",
"CORPUS #17058: parsed 105\n",
"INTEGRATE\n",
"INTEGRATE\n",
"CORPUS #17058: extracted ngrams\n",
"CORPUS #17058: indexed hyperdata\n",
"CORPUS #17058: [2017-08-11_11:21:18] new favorites node #17164\n",
"CORPUS #17058: [2017-08-11_11:21:18] starting ngram lists computation\n",
"CORPUS #17058: [2017-08-11_11:21:18] new stoplist node #17165\n",
"# STEMMERS LOADED {'__unknown__': <nltk.stem.snowball.SnowballStemmer object at 0x7fb811588e48>}\n",
"#SUPPORTED STEMMERS LANGS []\n",
"CORPUS #17058: [2017-08-11_11:21:20] new grouplist node #17166\n",
"CORPUS #17058: [2017-08-11_11:21:20] new occs node #17167\n",
"compute_ti_ranking\n",
"2017-08-11_11:21:20 : Starting Query tf_nd_query\n",
"2017-08-11_11:21:21 : End Query tf_nd_quer\n",
"2017-08-11_11:21:21 : tfidfsum\n",
"CORPUS #17058: [2017-08-11_11:21:21] new ti ranking node #17168\n",
"MAINLIST: keeping 2908 ngrams out of 3878\n",
"CORPUS #17058: [2017-08-11_11:21:21] new mainlist node #17169\n",
"Compute TFIDF local\n",
"CORPUS #17058: [2017-08-11_11:21:22] new localtfidf node #17170\n",
"COOCS: NEW matrix shape [220x807]\n",
"CORPUS #17058: [2017-08-11_11:21:23] computed mainlist coocs for specif rank\n",
"SPECIFICITY: computing on 154 ngrams\n",
"CORPUS #17058: [2017-08-11_11:21:23] new spec-clusion node #17171\n",
"CORPUS #17058: [2017-08-11_11:21:23] new gen-clusion node #17172\n",
"MAPLIST quotas: {'topgen': {'multigrams': 168, 'monograms': 42}, 'topspec': {'multigrams': 112, 'monograms': 28}}\n",
"MAPLIST: top_spec_monograms = 28\n",
"MAPLIST: top_spec_multigrams = 41\n",
"MAPLIST: top_gen_monograms = 42\n",
"MAPLIST: top_gen_multigrams = 0\n",
"MAPLIST: kept 111 ngrams in total \n",
"CORPUS #17058: [2017-08-11_11:21:23] new maplist node #17173\n",
"CORPUS #17058: [2017-08-11_11:21:23] FINISHED ngram lists computation\n"
]
}
],
"source": [
"project = myProject_fromUrl(\"http://imt.gargantext.org/projects/300535\")\n",
"corpus = newCorpus(project, name=\"Machine learning\", query=\"LSTM\")"
"#project = myProject_fromUrl(\"http://imt.gargantext.org/projects/300535\")\n",
"project = myProject_fromUrl(\"http://localhost:8000/projects/2\")\n",
"corpus = newCorpus(project, source=\"hal\", name=\"Machine learning\", query=\"LSTM\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"session.query(Node.hyperdata[\"\"])"
......@@ -151,27 +123,21 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"\n",
......@@ -181,9 +147,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"for doc in new_docs:\n",
......@@ -218,11 +182,7 @@
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"metadata": {},
"outputs": [
{
"name": "stdout",
......@@ -244,11 +204,7 @@
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"metadata": {},
"outputs": [],
"source": [
"# To get all the documents:\n",
......@@ -258,11 +214,7 @@
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"metadata": {},
"outputs": [
{
"data": {
......@@ -284,11 +236,7 @@
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"metadata": {},
"outputs": [
{
"data": {
......@@ -309,11 +257,7 @@
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"metadata": {},
"outputs": [
{
"data": {
......@@ -334,11 +278,7 @@
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"metadata": {},
"outputs": [
{
"data": {
......@@ -360,9 +300,7 @@
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
"collapsed": true
},
"outputs": [],
"source": [
......@@ -373,11 +311,7 @@
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"metadata": {},
"outputs": [
{
"data": {
......@@ -406,10 +340,7 @@
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"metadata": {},
"source": [
"## Title\n",
"\n",
......@@ -422,19 +353,14 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"metadata": {},
"source": [
"# Lang Cleaning tools"
]
......@@ -442,11 +368,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"metadata": {},
"outputs": [],
"source": [
"detect_lang(\"Ceci est une phrase en français.\")"
......@@ -455,11 +377,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"metadata": {},
"outputs": [],
"source": [
"detect_lang(\"This is an english sentence.\")"
......@@ -468,11 +386,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"metadata": {},
"outputs": [],
"source": [
"chart(docs, \"language_iso2\").plot.bar()"
......@@ -481,11 +395,7 @@
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"metadata": {},
"outputs": [
{
"data": {
......@@ -516,9 +426,7 @@
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
"collapsed": true
},
"outputs": [],
"source": [
......@@ -534,11 +442,7 @@
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"metadata": {},
"outputs": [
{
"data": {
......@@ -558,11 +462,7 @@
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"metadata": {},
"outputs": [
{
"data": {
......@@ -606,9 +506,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
"collapsed": true
},
"outputs": [],
"source": [
......@@ -617,10 +515,7 @@
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"metadata": {},
"source": [
"# Measures IMT Tools"
]
......@@ -628,11 +523,7 @@
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"metadata": {},
"outputs": [
{
"ename": "ConnectionError",
......@@ -680,11 +571,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"metadata": {},
"outputs": [],
"source": [
"# Request syntax\n",
......@@ -701,11 +588,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"metadata": {},
"outputs": [],
"source": [
"# Forces / Faiblesses de l'IMT\n",
......@@ -720,9 +603,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
"collapsed": true
},
"outputs": [],
"source": [
......@@ -734,11 +615,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"metadata": {},
"outputs": [],
"source": [
"# Then chart it to see your strenght and weakness!\n",
......@@ -749,9 +626,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
"collapsed": true
},
"outputs": [],
"source": []
......@@ -760,9 +635,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
"collapsed": true
},
"outputs": [],
"source": []
......@@ -771,9 +644,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
"collapsed": true
},
"outputs": [],
"source": []
......@@ -782,9 +653,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
"collapsed": true
},
"outputs": [],
"source": []
......@@ -793,9 +662,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
"collapsed": true
},
"outputs": [],
"source": []
......@@ -804,19 +671,14 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"metadata": {},
"source": [
"# Graph generation"
]
......@@ -825,9 +687,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
"collapsed": true
},
"outputs": [],
"source": [
......@@ -838,9 +698,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
"collapsed": true
},
"outputs": [],
"source": [
......@@ -849,10 +707,7 @@
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"metadata": {},
"source": [
"# List Management"
]
......@@ -861,9 +716,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
"collapsed": true
},
"outputs": [],
"source": [
......@@ -874,9 +727,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
"collapsed": true
},
"outputs": [],
"source": [
......@@ -900,7 +751,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.3rc1"
"version": "3.5.3"
}
},
"nbformat": 4,
......
......@@ -532,7 +532,7 @@
$("#submit_thing").html("Process a {{ query_size }} sample!")
thequeries = data
var N=0,k=0;
var N=0;
for(var i in thequeries) N += thequeries[i].count
if( N>0) {
......@@ -571,12 +571,11 @@
$("#submit_thing").html("Process a {{ query_size }} sample!")
thequeries = data
var N=data.length,k=0;
// for(var i in thequeries) N += thequeries[i].count
if( N>1) {
var total = JSON.parse(data).total
console.log("N: "+total)
$("#theresults").html("<i> <b>"+pubmedquery+"</b>: "+total+" publications.</i><br>")
var N = data.total;
if (N > 0) {
console.log("N: "+N)
$("#theresults").html("<i> <b>"+pubmedquery+"</b>: "+N+" publications.</i><br>")
$('#submit_thing').prop('disabled', false);
} else {
$("#theresults").html("<i> <b>"+data[0]+"</b></i><br>")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment