Remove notebook

591915ef · sim · 50872e57 · 50872e57 · 50872e57 · 50872e57
Commit 591915ef authored Oct 31, 2017 by sim
11 changed files
--- a/gargantext_notebook.py
+++ b/gargantext_notebook.py
-install/notebook/gargantext_notebook.py
\ No newline at end of file
--- a/install/notebook.install
+++ b/install/notebook.install
-#!/bin/bash
-
-sudo adduser --disabled-password --gecos "" notebooks
-sudo docker rm $(sudo docker ps -a | grep sh | awk '{print $1}')
-sudo docker build -t garg-notebook:latest ./notebook
--- a/install/notebook.run
+++ b/install/notebook.run
-#!/bin/bash
-
-        #-v /srv/gargandata:/srv/gargandata \
-        #-v /srv/gargantext_lib:/srv/gargantext_lib \
-
-sudo docker rm $(sudo docker ps -a | grep notebook | grep sh | awk '{print $1}')
-
-#HOSTIP=$(ip route show 0.0.0.0/0 | awk '{print $3}')
-#--add-host=localhost:${HOSTIP} \
-
-
-sudo docker run \
-        --name=garg-notebook \
-        --net=host \
-        -p 8899:8899 \
-        --env POSTGRES_HOST=localhost \
-        -v /srv/gargantext:/srv/gargantext \
-        -it garg-notebook:latest \
-        /bin/bash -c "/bin/su notebooks -c 'source /env_3-5/bin/activate && cd /home/notebooks && jupyter notebook --port=8899 --ip=0.0.0.0 --no-browser'"
-#        #&& jupyter nbextension enable --py widgetsnbextension --sys-prefix 
-        #/bin/bash -c "/bin/su notebooks -c 'source /env_3-5/bin/activate && cd /srv/gargantext/ && jupyter notebook --port=8899 --ip=0.0.0.0 --no-browser --notebook-dir=/home/notebooks/'"
-
-
--- a/install/notebook/Dockerfile
+++ b/install/notebook/Dockerfile
-###########################################################
-# Gargamelle WEB
-###########################################################
-#Build an image starting with debian:stretch image
-# wich contains all the source code of the app
-FROM debian:stretch
-MAINTAINER ISCPIF <gargantext@iscpif.fr>
-
-USER root
-
-
-### Update and install base dependencies
-RUN echo "############ DEBIAN LIBS ###############"
-RUN apt-get update &&                                     \
-    apt-get install -y                                     \
-    apt-utils ca-certificates locales                       \
-    sudo aptitude gcc g++ wget git vim                       \
-    build-essential make                                      \
-    curl                                                       
-#    postgresql-9.6 postgresql-client-9.6 postgresql-contrib-9.6 \
-#    postgresql-server-dev-9.6 libpq-dev libxml2                  \
-#    postgresql-9.6 postgresql-client-9.6 postgresql-contrib-9.6
-
-# Install Stack
-
-### Configure timezone and locale
-RUN echo "###########  LOCALES & TZ #################"
-RUN echo "Europe/Paris" > /etc/timezone
-ENV TZ "Europe/Paris"
-
-RUN sed -i -e 's/# en_GB.UTF-8 UTF-8/en_GB.UTF-8 UTF-8/' /etc/locale.gen && \
-    sed -i -e 's/# fr_FR.UTF-8 UTF-8/fr_FR.UTF-8 UTF-8/' /etc/locale.gen &&  \
-    dpkg-reconfigure --frontend=noninteractive locales                   &&   \
-    echo 'LANG="fr_FR.UTF-8"' > /etc/default/locale
-ENV LANG fr_FR.UTF-8
-ENV LANGUAGE fr_FR.UTF-8
-ENV LC_ALL fr_FR.UTF-8
-
-
-### Install main dependencies and python packages based on Debian distrib
-RUN echo "############# PYTHON DEPENDENCIES ###############"
-RUN apt-get update && apt-get install -y       \
-        libxml2-dev xml-core libgfortran-6-dev  \
-        libpq-dev                                \
-        python3.5                                 \
-        python3-dev                                \
-        # for numpy, pandas and numpyperf           \
-        python3-six python3-numpy python3-setuptools \
-        python3-numexpr                               \
-        # python dependencies                          \
-        python3-pip                                     \
-        # for lxml
-        libxml2-dev libxslt-dev libxslt1-dev zlib1g-dev
-
-
-# UPDATE AND CLEAN
-RUN apt-get update && apt-get autoclean \ 
-    && rm -rf /var/lib/apt/lists/*
-#NB: removing /var/lib will avoid to significantly fill up your /var/ folder on your native system
-
-########################################################################
-### PYTHON ENVIRONNEMENT (as ROOT)
-########################################################################
-
-RUN adduser --disabled-password --gecos "" notebooks
-
-RUN pip3 install virtualenv
-RUN virtualenv /env_3-5
-RUN echo 'alias venv="source /env_3-5/bin/activate"' >> ~/.bashrc
-# CONFIG FILES
-ADD requirements.txt /
-#ADD psql_configure.sh /
-ADD django_configure.sh /
-
-RUN . /env_3-5/bin/activate && pip3 install -r requirements.txt                         && \
-    pip3  install git+https://github.com/zzzeek/sqlalchemy.git@rel_1_1                  && \
-    python3 -m nltk.downloader averaged_perceptron_tagger -d /usr/local/share/nltk_data
-
-#RUN ./psql_configure.sh
-#RUN ./django_configure.sh
-RUN chown notebooks:notebooks -R /env_3-5
-
-########################################################################
-### POSTGRESQL DATA (as ROOT)
-########################################################################
-
-#RUN sed -iP "s%^data_directory.*%data_directory = \'\/srv\/gargandata\'%" /etc/postgresql/9.5/main/postgresql.conf
-#RUN echo "host all  all    0.0.0.0/0  md5" >> /etc/postgresql/9.5/main/pg_hba.conf
-#RUN echo "listen_addresses='*'" >> /etc/postgresql/9.5/main/postgresql.conf
-
-EXPOSE 5432 8899
-
-VOLUME ["/srv/","/home/notebooks/"]
-
-
-########################################################################
-### Notebook IHaskell and IPYTHON ENVIRONNEMENT
-########################################################################
-
-RUN apt-get update && apt-get install -y \
-        libtinfo-dev                      \
-        libzmq3-dev                        \
-        libcairo2-dev                       \
-        libpango1.0-dev                      \
-        libmagic-dev                          \
-        libblas-dev                            \
-        liblapack-dev
-
-#USER notebooks
-#
-#RUN cd  /home/notebooks                              \
-#    &&  curl -sSL https://get.haskellstack.org/ | sh  \
-#    &&  stack setup                                    \
-#    &&  git clone https://github.com/gibiansky/IHaskell \
-#    &&  . /env_3-5/bin/activate                          \
-#    &&  cd IHaskell                                       \
-#    &&  stack install gtk2hs-buildtools                    \
-#    &&  stack install --fast                                \
-#    &&  /root/.local/bin/ihaskell install --stack
-#
-
-
--- a/install/notebook/django_configure.sh
+++ b/install/notebook/django_configure.sh
-#!/bin/bash
-##################################################
-#             __| |(_) __ _ _ __   __ _  ___
-#            / _` || |/ _` | '_ \ / _` |/ _ \
-#           | (_| || | (_| | | | | (_| | (_) |
-#            \__,_|/ |\__,_|_| |_|\__, |\___/
-#                 |__/             |___/
-##################################################
-#configure django migrations
-##################################################
-echo "::::: DJANGO :::::"
-#echo "Starting Postgres"
-#/usr/sbin/service postgresql start
-
-
-
-su gargantua -c 'source /srv/env_3-5/bin/activate &&\
-    echo "Activated env" &&\
-    /srv/gargantext/manage.py makemigrations &&\
-    /srv/gargantext/manage.py migrate && \
-    echo "migrations ok" &&\
-    /srv/gargantext/dbmigrate.py && \
-    /srv/gargantext/dbmigrate.py && \
-    /srv/gargantext/dbmigrate.py && \
-    /srv/gargantext/manage.py createsuperuser'
-
-service postgresql stop
--- a/install/notebook/gargantext_notebook.py
+++ b/install/notebook/gargantext_notebook.py
-#!/usr/bin/env python
-"""
-   Gargantext Software Copyright (c) 2016-2017 CNRS ISC-PIF -
-http://iscpif.fr
-    Licence (see :
-http://gitlab.iscpif.fr/humanities/gargantext/blob/stable/LICENSE )
-    - In France : a CECILL variant affero compliant
-    - GNU aGPLV3 for all other countries
-"""
-
-import os
-import django
-
-os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'gargantext.settings')
-django.setup()
-
-from gargantext.constants import QUERY_SIZE_N_MAX, get_resource, get_resource_by_name
-from gargantext.models import (Node, ProjectNode, DocumentNode,
-                               Ngram, NodeNgram, NodeNgramNgram, NodeNodeNgram)
-from gargantext.util.db import session, get_engine, func, aliased, case
-from collections import Counter
-import importlib
-from django.http import Http404
-
-# Import those to be available by notebook user
-from langdetect import detect as detect_lang
-from gargantext.models import UserNode, User
-import functools
-
-class NotebookError(Exception):
-    pass
-
-
-def documents(corpus_id):
-    return (session.query(DocumentNode).filter_by(parent_id=corpus_id)
-                  #.order_by(Node.hyperdata['publication_date'])
-                   .all())
-
-
-#import seaborn as sns
-import pandas as pd
-
-def countByField(docs, field):
-    return list(Counter([doc.hyperdata[field] for doc in docs]).items())
-
-def chart(docs, field):
-    year_publis = countByField(docs, field)
-    frame0 = pd.DataFrame(year_publis, columns=['Date', 'DateValue'])
-    frame1 = pd.DataFrame(year_publis, columns=['Date', 'DateValue'], index=frame0.Date)
-    return frame1
-
-
-from gargantext.util.crawlers.HAL import HalCrawler
-
-def scan_hal(request):
-    hal = HalCrawler()
-    return hal.scan_results(request)
-
-
-def _search_docs(corpus_id, request, fast=False):
-    q = session.query(DocumentNode).filter_by(parent_id=corpus_id)
-
-    # Search ngram <request> in hyperdata <field>
-    H = lambda field, request: Node.hyperdata[field].astext.op('~*')(request)
-
-    if not fast:
-        # Only match <request> starting and ending with word boundary
-        # Sequence of spaces will match any sequence of spaces
-        request = '\s+'.join(filter(None, r'\m{}\M'.format(request).split(' ')))
-
-    return q.filter(Node.title_abstract.match(request)) if fast else \
-           q.filter(H('title', request) | H('abstract', request))
-
-
-def scan_gargantext(corpus_id, request, fast=False, documents=False):
-    query = _search_docs(corpus_id, request, fast)
-
-    if documents:
-        return query.all()
-
-    return query.with_entities(func.count(DocumentNode.id.distinct())).one()[0]
-
-
-def scan_gargantext_and_delete(corpus_id, request, fast=False):
-    r = _search_docs(corpus_id, request, fast).delete(synchronize_session='fetch')
-    session.commit()
-
-    return r
-
-
-def myProject_fromUrl(url):
-    """
-    myProject :: String -> Project
-    """
-    project_id = url.split("/")[4]
-    project = session.query(ProjectNode).get(project_id)
-    return project
-
-
-def newCorpus(project, source, name=None, query=None):
-    error = False
-
-    if name is None:
-        name = query
-
-    if not isinstance(project, ProjectNode):
-        error = "a valid project"
-    if not isinstance(source, int) and not isinstance(source, str):
-        error = "a valid source identifier: id or name"
-    elif not isinstance(query, str):
-        error = "a valid query"
-    elif not isinstance(name, str):
-        error = "a valid name"
-
-    if error:
-        raise NotebookError("Please provide %s." % error)
-
-    resource = get_resource(source) if isinstance(source, int) else \
-               get_resource_by_name(source)
-
-    moissonneur_name = get_moissonneur_name(resource) if resource else \
-                       source.lower()
-
-    try:
-        moissonneur = get_moissonneur(moissonneur_name)
-    except ImportError:
-        raise NotebookError("Invalid source identifier: %r" % source)
-
-    return run_moissonneur(moissonneur, project, name, query)
-
-
-def get_moissonneur_name(ident):
-    """ Return moissonneur module name from RESOURCETYPE or crawler name """
-
-    # Does it quacks like a RESOURCETYPE ?
-    if hasattr(ident, 'get'):
-        ident = ident.get('crawler')
-
-    # Extract name from crawler class name, otherwise assume ident is already
-    # a moissonneur name.
-    if isinstance(ident, str) and ident.endswith('Crawler'):
-        return ident[:-len('Crawler')].lower()
-
-
-def get_moissonneur(name):
-    """ Return moissonneur module from its name """
-    if not isinstance(name, str) or not name.islower():
-        raise NotebookError("Invalid moissonneur name: %r" % name)
-
-    module = importlib.import_module('gargantext.moissonneurs.%s' % name)
-    module.name = name
-
-    return module
-
-
-def run_moissonneur(moissonneur, project, name, query):
-    """ Run moissonneur and return resulting corpus """
-
-    # XXX Uber-kludge with gory details. Spaghetti rulezzzzz!
-    class Dummy(object):
-        pass
-
-    request = Dummy()
-    request.method = 'POST'
-    request.path = 'nowhere'
-    request.META = {}
-    # XXX 'string' only have effect on moissonneurs.pubmed; its value is added
-    #     when processing request client-side, take a deep breath and see
-    #     templates/projects/project.html for more details.
-    request.POST = {'string': name,
-                    'query': query,
-                    'N': QUERY_SIZE_N_MAX}
-    request.user = Dummy()
-    request.user.id = project.user_id
-    request.user.is_authenticated = lambda: True
-
-    if moissonneur.name == 'istex':
-        # Replace ALL spaces by plus signs
-        request.POST['query'] = '+'.join(filter(None, query.split(' ')))
-
-    try:
-        import json
-
-        r = moissonneur.query(request)
-        raw_json = r.content.decode('utf-8')
-        data = json.loads(raw_json)
-
-        if moissonneur.name == 'pubmed':
-            count = sum(x['count'] for x in data)
-            request.POST['query'] = raw_json
-        elif moissonneur.name == 'istex':
-            count = data.get('total', 0)
-        else:
-            count = data.get('results_nb', 0)
-
-        if count > 0:
-            corpus = moissonneur.save(request, project.id, return_corpus=True)
-        else:
-            return None
-
-    except (ValueError, Http404) as e:
-        raise e
-
-    # Sometimes strange things happens...
-    if corpus.name != name:
-        corpus.name = name
-        session.commit()
-
-    return corpus
-
-
-ALL_LIST_TYPES = ['main', 'map', 'stop']
-
-
-def _ngrams(corpus_id, list_types, entities):
-    list_types = (list_types,) if isinstance(list_types, str) else list_types
-    list_typenames = [
-        '{}LIST'.format(t.upper()) for t in list_types if t in ALL_LIST_TYPES]
-
-    # `Node` is our list, ie. MAINLIST and/or MAPLIST and/or STOPLIST
-    return (session.query(*entities)
-                   .select_from(Ngram)
-                   .filter(NodeNgram.ngram_id==Ngram.id,
-                           NodeNgram.node_id==Node.id,
-                           Node.parent_id==corpus_id,
-                           Node.typename.in_(list_typenames)))
-
-
-def corpus_list(corpus_id, list_types=ALL_LIST_TYPES, with_synonyms=False,
-                with_count=False):
-    # Link between a GROUPLIST, a normal form (ngram1), and a synonym (ngram2)
-    NNN = NodeNgramNgram
-
-    # Get the list type from the Node type -- as in CSV export
-    list_type = (case([(Node.typename=='MAINLIST', 'main'),
-                       (Node.typename=='MAPLIST',  'map'),
-                       (Node.typename=='STOPLIST', 'stop')])
-                 .label('type'))
-
-    # We will retrieve each ngram as the following tuple:
-    entities = (list_type, Ngram.terms.label('ng'))
-
-    if with_count:
-        entities += (Ngram.id.label('id'),)
-
-    # First, get ngrams from wanted lists
-    ngrams = _ngrams(corpus_id, list_types, entities)
-
-    # Secondly, exclude "synonyms" (grouped ngrams that are not normal forms).
-    # We have to exclude synonyms first because data is inconsistent and some
-    # of them can be both in GROUPLIST and in MAIN/MAP/STOP lists. We want to
-    # take synonyms from GROUPLIST only -- see below.
-    Groups = aliased(Node, name='groups')
-    query = (ngrams.outerjoin(Groups, (Groups.parent_id==corpus_id) & (Groups.typename=='GROUPLIST'))
-                   .outerjoin(NNN, (NNN.node_id==Groups.id) & (NNN.ngram2_id==Ngram.id))
-                   .filter(NNN.ngram1_id==None))
-
-    # If `with_synonyms` is True, add them from GROUPLIST: this is the reliable
-    # source for them
-    if with_synonyms:
-        Synonym = aliased(Ngram)
-        ent = (list_type, Synonym.terms.label('ng'), Synonym.id.label('id'))
-        synonyms = (ngrams.with_entities(*ent)
-                          .filter(NNN.ngram1_id==Ngram.id,
-                                  NNN.ngram2_id==Synonym.id,
-                                  NNN.node_id==Groups.id,
-                                  Groups.parent_id==corpus_id,
-                                  Groups.typename=='GROUPLIST'))
-        query = query.union(synonyms)
-
-    # Again, data is inconsistent: MAINLIST may intersect with MAPLIST and
-    # we don't wan't that
-    if 'main' in list_types and 'map' not in list_types:
-        # Exclude MAPLIST ngrams from MAINLIST
-        query = query.except_(_ngrams(corpus_id, 'map', entities))
-
-    if with_count:
-        N = query.subquery()
-        return (session.query(N.c.type, N.c.ng, NodeNodeNgram.score)
-                       .join(Node, (Node.parent_id==corpus_id) & (Node.typename=='OCCURRENCES'))
-                       .outerjoin(NodeNodeNgram, (NodeNodeNgram.ngram_id==N.c.id) &
-                                                 (NodeNodeNgram.node1_id==Node.id) &
-                                                 (NodeNodeNgram.node2_id==corpus_id)))
-
-    # Return found ngrams sorted by list type, and then alphabetically
-    return query.order_by('type', 'ng')
--- a/install/notebook/psql_configure.sh
+++ b/install/notebook/psql_configure.sh
-#!/bin/bash
-
-#######################################################################
-##    ____           _
-##   |  _ \ ___  ___| |_ __ _ _ __ ___  ___
-##   | |_) / _ \/ __| __/ _` | '__/ _ \/ __|
-##   |  __/ (_) \__ \ || (_| | | |  __/\__ \
-##   |_|   \___/|___/\__\__, |_|  \___||___/
-##                      |___/
-#######################################################################
-echo "::::: POSTGRESQL :::::"
-su postgres -c 'pg_dropcluster 9.4 main --stop'
-#done in docker but redoing it
-rm -rf /srv/gargandata && mkdir /srv/gargandata && chown postgres:postgres /srv/gargandata
-su postgres -c '/usr/lib/postgresql/9.6/bin/initdb -D /srv/gargandata/'
-su postgres -c '/usr/lib/postgresql/9.6/bin/pg_ctl -D /srv/gargandata/ -l /srv/gargandata/journal_applicatif start'
-
-su postgres -c 'pg_createcluster -D /srv/gargandata 9.6 main '
-su postgres -c 'pg_ctlcluster -D /srv/gargandata 9.6 main start '
-su postgres -c 'pg_ctlcluster 9.6 main start'
-
-service postgresql start
-
-su postgres -c "psql -c \"CREATE user gargantua WITH PASSWORD 'C8kdcUrAQy66U'\""
-su postgres -c "createdb -O gargantua gargandb"
-
-echo "Postgres configured"
-#service postgresql stop
--- a/install/notebook/requirements.txt
+++ b/install/notebook/requirements.txt
-# try bottleneck
-eventlet==0.20.1
-amqp==1.4.9
-anyjson==0.3.3
-billiard==3.3.0.23
-celery==3.1.25
-chardet==2.3.0
-dateparser==0.3.5
-Django==1.10.5
-django-celery==3.2.1
-django-pgfields==1.4.4
-django-pgjsonb==0.0.23
-djangorestframework==3.5.3
-html5lib==0.9999999
-#python-igraph>=0.7.1
-jdatetime==1.7.2
-kombu==3.0.37                  # messaging
-langdetect==1.0.6              #detectinglanguage
-nltk==3.1
-numpy==1.13.1
-psycopg2==2.6.2
-pycountry==1.20
-python-dateutil==2.4.2
-pytz==2016.10                   # timezones
-PyYAML==3.11
-RandomWords==0.1.12
-ujson==1.35
-umalqurra==0.2                 # arabic calendars (?? why use ??)
-networkx==1.11
-pandas==0.18.0
-six==1.10.0
-lxml==3.5.0
-requests-futures==0.9.7
-bs4==0.0.1
-requests==2.10.0
-djangorestframework-jwt==1.9.0
-jupyter==1.0.0
-jupyter-client==5.0.0
-jupyter-console==5.1.0
-jupyter-core==4.3.0
-ipython==5.2.0
-ipython-genutils==0.1.0
-ipywidgets
-matplotlib==2.0.2
-alembic>=0.9.2
-SQLAlchemy==1.1.14
-SQLAlchemy-Searchable==0.10.4
-SQLAlchemy-Utils==0.32.16
--- a/notebooks/.ipynb_checkpoints/AdvancedTutorial-checkpoint.ipynb
+++ b/notebooks/.ipynb_checkpoints/AdvancedTutorial-checkpoint.ipynb
--- a/notebooks/AdvancedTutorial.ipynb
+++ b/notebooks/AdvancedTutorial.ipynb
--- a/notebooks/gargantext_core_tutorial.ipynb
+++ b/notebooks/gargantext_core_tutorial.ipynb