Commit ab2c76dd authored by Romain Loth's avatar Romain Loth

Merge branch 'refactoring' into refactoring-rom

Conflicts:
	gargantext/views/api/urls.py
parents 63ec1b5c 8be7e5a7
...@@ -4,8 +4,9 @@ from annotations import views ...@@ -4,8 +4,9 @@ from annotations import views
# /!\ urls patterns here are *without* the trailing slash # /!\ urls patterns here are *without* the trailing slash
urlpatterns = patterns('', urlpatterns = [
# json:title,id,authors,journal,
# json:title,id,authors,journal,
# publication_date # publication_date
# abstract_text,full_text # abstract_text,full_text
url(r'^documents/(?P<doc_id>[0-9]+)$', views.Document.as_view()), # document view url(r'^documents/(?P<doc_id>[0-9]+)$', views.Document.as_view()), # document view
...@@ -16,4 +17,4 @@ urlpatterns = patterns('', ...@@ -16,4 +17,4 @@ urlpatterns = patterns('',
# url(r'^lists/(?P<list_id>[0-9]+)/ngrams/(?P<ngram_ids>[0-9,\+]+)+$', views.NgramEdit.as_view()), # url(r'^lists/(?P<list_id>[0-9]+)/ngrams/(?P<ngram_ids>[0-9,\+]+)+$', views.NgramEdit.as_view()),
# POST (fixed 2015-12-16) # POST (fixed 2015-12-16)
# url(r'^lists/(?P<list_id>[0-9]+)/ngrams/create$', views.NgramCreate.as_view()), # # url(r'^lists/(?P<list_id>[0-9]+)/ngrams/create$', views.NgramCreate.as_view()), #
) ]
# django.ini file
[uwsgi]
# uwsgi --vacuum --socket monsite/mysite.sock --wsgi-file monsite/wsgi.py --chmod-socket=666 --home=/srv/alexandre.delanoe/env --chdir=/var/www/www/alexandre/monsite --env
env = DJANGO_SETTINGS_MODULE=gargantext.settings
#module = django.core.handlers.wsgi:WSGIHandler()
plugins = python35
# the base directory
chdir = /srv/gargantext
# Django's wsgi file
#module = wsgi
wsgi-file = /srv/gargantext/gargantext/wsgi.py
# the virtualenv
home = /srv/gargantext_env_3.5
lazy-apps = True
# master
master = true
# maximum number of processes
processes = 10
# the socket (use the full path to be safe)
socket = /tmp/gargantext.sock
threads = 4
# with appropriate permissions - *may* be needed
chmod-socket = 666
# clear environment on exit
vacuum = true
pidfile = /tmp/gargantext.pid
# touch /tmp/gargantext.reload to reload configuration (after git pull for instance)
touch-reload = /tmp/gargantext.reload
# respawn processes taking more than 20 seconds
harakiri = 120
# limit the project to 128 MB
#limit-as = 128
# respawn processes after serving 5000 requests
max-requests = 5000
# background the process & log
#daemonize = /var/log/uwsgi/gargantext.log
uid = 1000
gid = 1000
################### other gargantext constants ###################
[scrappers]
# default number of docs POSTed to scrappers.views.py
# (at page project > add a corpus > scan/process sample)
QUERY_SIZE_N_DEFAULT = 1000
# checked just before scrap to prevent running impossible workflows
# even if somebody would set "query size N" manually in POST data
QUERY_SIZE_N_MAX = 20000
...@@ -37,14 +37,86 @@ NODETYPES = [ ...@@ -37,14 +37,86 @@ NODETYPES = [
'TFIDF-GLOBAL', # 14 'TFIDF-GLOBAL', # 14
] ]
# TODO find somewhere else than constants.py for function
import datetime
import dateutil
def convert_to_date(date):
if isinstance(date, (int, float)):
return datetime.datetime.timestamp(date)
else:
return dateutil.parser.parse(date)
INDEXED_HYPERDATA = {
# TODO use properties during toolchain.hyperdata_indexing
# (type, convert_to_db, convert_from_db)
'count':
{ 'id' : 1
, 'type' : int
, 'convert_to_db' : int
, 'convert_from_db': int
},
'publication_date':
{ 'id' : 2
, 'type' : datetime.datetime
, 'convert_to_db' : convert_to_date
, 'convert_from_db': datetime.datetime.fromtimestamp
},
'title':
{ 'id' : 3
, 'type' : str
, 'convert_to_db' : str
, 'convert_from_db': str
},
'authors':
{ 'id' : 4
, 'type' : str
, 'convert_to_db' : str
, 'convert_from_db': str
},
'journal':
{ 'id' : 5
, 'type' : str
, 'convert_to_db' : str
, 'convert_from_db': str
},
'abstract':
{ 'id' : 6
, 'type' : str
, 'convert_to_db' : str
, 'convert_from_db': str
},
'text':
{ 'id' : 7
, 'type' : str
, 'convert_to_db' : str
, 'convert_from_db': str
},
'page':
{ 'id' : 8
, 'type' : int
, 'convert_to_db' : int
, 'convert_from_db': int
},
}
from gargantext.util.taggers import * from gargantext.util.taggers import *
LANGUAGES = { LANGUAGES = {
'en': { 'en': {
'tagger': TurboTagger, 'tagger': EnglishMeltTagger,
# 'tagger': EnglishMeltTagger, #'tagger': TurboTagger,
# 'tagger': NltkTagger, #'tagger': NltkTagger,
}, },
'fr': { 'fr': {
'tagger': FrenchMeltTagger, 'tagger': FrenchMeltTagger,
...@@ -95,15 +167,15 @@ RESOURCETYPES = [ ...@@ -95,15 +167,15 @@ RESOURCETYPES = [
] ]
# linguistic extraction parameters --------------------------------------------- # linguistic extraction parameters ---------------------------------------------
DEFAULT_TFIDF_CUTOFF_RATIO = .45 # MAINLIST maximum terms in % DEFAULT_TFIDF_CUTOFF_RATIO = .45 # MAINLIST maximum terms in %
DEFAULT_TFIDF_HARD_LIMIT = 750 # MAINLIST maximum terms abs DEFAULT_TFIDF_HARD_LIMIT = 750 # MAINLIST maximum terms abs
# (makes COOCS larger ~ O(N²) /!\) # (makes COOCS larger ~ O(N²) /!\)
DEFAULT_COOC_THRESHOLD = 3 # inclusive minimum for COOCS coefs DEFAULT_COOC_THRESHOLD = 3 # inclusive minimum for COOCS coefs
# (makes COOCS more sparse) # (makes COOCS more sparse)
DEFAULT_MAPLIST_MAX = 300 # MAPLIST maximum terms DEFAULT_MAPLIST_MAX = 300 # MAPLIST maximum terms
DEFAULT_MAPLIST_MONOGRAMS_RATIO = .5 # part of monograms in MAPLIST DEFAULT_MAPLIST_MONOGRAMS_RATIO = .5 # part of monograms in MAPLIST
...@@ -126,11 +198,11 @@ QUERY_SIZE_N_DEFAULT = 1000 ...@@ -126,11 +198,11 @@ QUERY_SIZE_N_DEFAULT = 1000
import os import os
from .settings import BASE_DIR from .settings import BASE_DIR
UPLOAD_DIRECTORY = os.path.join(BASE_DIR, 'uploads') UPLOAD_DIRECTORY = os.path.join(BASE_DIR, 'uploads')
UPLOAD_LIMIT = 1024 * 1024 * 1024 UPLOAD_LIMIT = 1024 * 1024 * 1024
DOWNLOAD_DIRECTORY = UPLOAD_DIRECTORY DOWNLOAD_DIRECTORY = UPLOAD_DIRECTORY
# about batch processing... # about batch processing...
BATCH_PARSING_SIZE = 256 BATCH_PARSING_SIZE = 256
BATCH_NGRAMSEXTRACTION_SIZE = 1024 BATCH_NGRAMSEXTRACTION_SIZE = 1024
from .nodes import * from .nodes import *
from .hyperdata import *
from .users import * from .users import *
from .ngrams import * from .ngrams import *
from gargantext.util.db import *
from gargantext.constants import INDEXED_HYPERDATA
from .nodes import Node
import datetime
__all__ = ['NodeHyperdata']
class classproperty(object):
"""See: http://stackoverflow.com/a/3203659/734335
"""
def __init__(self, getter):
self.getter = getter
def __get__(self, instance, owner):
return self.getter(owner)
class HyperdataValueComparer(object):
"""This class is there to allow hyperdata comparison.
Its attribute are overrided at the end of the present module to fit those
of the `value_flt` and `value_str` attributes of the `NodeHyperdata` class.
"""
class HyperdataKey(TypeDecorator):
"""Define a new type of column to describe a Hyperdata field's type.
Internally, this column type is implemented as an SQL integer.
Values are detailed in `gargantext.constants.INDEXED_HYPERDATA`.
"""
impl = Integer
def process_bind_param(self, keyname, dialect):
if keyname in INDEXED_HYPERDATA:
return INDEXED_HYPERDATA[keyname]['id']
raise ValueError('Hyperdata key "%s" was not found in `gargantext.constants.INDEXED_HYPERDATA`' % keyname)
def process_result_value(self, keyindex, dialect):
for keyname, keysubhash in INDEXED_HYPERDATA.items():
if keysubhash['id'] == keyindex:
return keyname
raise ValueError('Hyperdata key with id=%d was not found in `gargantext.constants.INDEXED_HYPERDATA`' % keyindex)
class NodeHyperdata(Base):
"""This model's primary role is to allow better indexation of hyperdata.
It stores values contained in the `nodes.hyperdata` column (only those
listed in `gargantext.constants.INDEXED_HYPERDATA`), associated with the
corresponding key's index, and hyperdata value.
Example:
query = (session
.query(Node)
.join(NodeHyperdata)
.filter(NodeHyperdata.key == 'title')
.filter(NodeHyperdata.value.startswith('Bees'))
)
Example:
query = (session
.query(Node)
.join(NodeHyperdata)
.filter(NodeHyperdata.key == 'publication_date')
.filter(NodeHyperdata.value > datetime.datetime.now())
)
"""
__tablename__ = 'nodes_hyperdata'
id = Column( Integer, primary_key=True )
node_id = Column( Integer, ForeignKey(Node.id, ondelete='CASCADE'))
key = Column( HyperdataKey )
value_int = Column( Integer , index=True )
value_flt = Column( Double() , index=True )
value_utc = Column( DateTime(timezone=True) , index=True )
value_str = Column( String(255) , index=True )
value_txt = Column( Text , index=True )
def __init__(self, node=None, key=None, value=None):
"""Custom constructor
"""
# node reference
if node is not None:
if hasattr(node, 'id'):
self.node_id = node.id
else:
self.node_id = node
# key
if key is not None:
self.key = key
# value
self.value = value
# FIXME
@property
def value(self):
"""Pseudo-attribute used to extract the value in the right format.
"""
key = INDEXED_HYPERDATA[self.key]
return key['convert_from_db'](
self.value_flt if (self.value_str is None) else self.value_str
)
@value.setter
def value(self, value):
"""Pseudo-attribute used to insert the value in the right format.
"""
key = INDEXED_HYPERDATA[self.key]
value = key['convert_to_db'](value)
if isinstance(value, str):
self.value_str = value
else:
self.value_flt = value
@classproperty
def value(cls):
"""Pseudo-attribute used for hyperdata comparison inside a query.
"""
return HyperdataValueComparer()
def HyperdataValueComparer_overrider(key):
def comparator(self, *args):
if len(args) == 0:
return
if isinstance(args[0], datetime.datetime):
args = tuple(map(datetime.datetime.timestamp, args))
if isinstance(args[0], (int, float)):
return getattr(NodeHyperdata.value_flt, key)(*args)
if isinstance(args[0], str):
return getattr(NodeHyperdata.value_str, key)(*args)
return comparator
# ??
for key in set(dir(NodeHyperdata.value_flt) + dir(NodeHyperdata.value_str)):
if key in ( '__dict__'
, '__weakref__'
, '__repr__'
, '__str__') \
or 'attr' in key \
or 'class' in key \
or 'init' in key \
or 'new' in key :
continue
setattr(HyperdataValueComparer, key, HyperdataValueComparer_overrider(key))
"""URL Configuration of GarganText """URL Configuration of GarganText
Views are shared between three main modules: Views are shared between these modules:
- `api`, for JSON and CSV interaction with data - `api`, for JSON and CSV interaction with data
- `pages`, to present HTML views to the user - `pages`, to present HTML views to the user
- `contents`, for Python-generated contents - `contents`, for Python-generated contents
- `annotations`, to annotate local context of a corpus (as global context)
- `graph explorer`, to explore graphs
""" """
from django.conf.urls import include, url from django.conf.urls import include, url
...@@ -14,10 +16,15 @@ import gargantext.views.api.urls ...@@ -14,10 +16,15 @@ import gargantext.views.api.urls
import gargantext.views.generated.urls import gargantext.views.generated.urls
import gargantext.views.pages.urls import gargantext.views.pages.urls
# tempo: unchanged doc-annotations -- # Module Annotation
from annotations import urls as annotations_urls ## tempo: unchanged doc-annotations --
from annotations import urls as annotations_urls
from annotations.views import main as annotations_main_view from annotations.views import main as annotations_main_view
# Module "Graph Explorer"
#from graphExplorer import urls as graphExplorer_urls
from graphExplorer.rest import Graph
from graphExplorer.views import explorer
urlpatterns = [ urlpatterns = [
url(r'^admin/', admin.site.urls), url(r'^admin/', admin.site.urls),
...@@ -25,7 +32,16 @@ urlpatterns = [ ...@@ -25,7 +32,16 @@ urlpatterns = [
url(r'^api/', include(gargantext.views.api.urls)), url(r'^api/', include(gargantext.views.api.urls)),
url(r'^', include(gargantext.views.pages.urls)), url(r'^', include(gargantext.views.pages.urls)),
# tempo: unchanged doc-annotations routes -- # Module Annotation
# tempo: unchanged doc-annotations routes --
url(r'^annotations/', include(annotations_urls)),
url(r'^projects/(\d+)/corpora/(\d+)/documents/(\d+)/$', annotations_main_view), url(r'^projects/(\d+)/corpora/(\d+)/documents/(\d+)/$', annotations_main_view),
url(r'^annotations/', include(annotations_urls))
# Module "Graph Explorer"
url(r'^projects/(\d+)/corpora/(\d+)/explorer$', explorer),
url(r'^projects/(\d+)/corpora/(\d+)/graph$', Graph.as_view()),
# to be removed:
url(r'^projects/(\d+)/corpora/(\d+)/node_link.json$', Graph.as_view())
#url(r'^projects/(\d+)/corpora/(\d+)/explorer$', include(graphExplorer.urls))
#url(r'^projects/(\d+)/corpora/(\d+)/explorer$', include(graphExplorer_urls))
] ]
from gargantext import settings from gargantext import settings
from gargantext.util.json import json_dumps
# get engine, session, etc. # get engine, session, etc.
from sqlalchemy.orm import sessionmaker, scoped_session from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
from gargantext.util.json import json_dumps from sqlalchemy import delete
def get_engine(): def get_engine():
from sqlalchemy import create_engine from sqlalchemy import create_engine
...@@ -28,9 +28,9 @@ session = scoped_session(sessionmaker(bind=engine)) ...@@ -28,9 +28,9 @@ session = scoped_session(sessionmaker(bind=engine))
from sqlalchemy.types import * from sqlalchemy.types import *
from sqlalchemy.schema import Column, ForeignKey, UniqueConstraint from sqlalchemy.schema import Column, ForeignKey, UniqueConstraint
from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.dialects.postgresql import JSONB, DOUBLE_PRECISION
from sqlalchemy.ext.mutable import MutableDict, MutableList from sqlalchemy.ext.mutable import MutableDict, MutableList
Double = DOUBLE_PRECISION
# useful for queries # useful for queries
...@@ -76,7 +76,7 @@ class bulk_insert: ...@@ -76,7 +76,7 @@ class bulk_insert:
try: try:
return '\t'.join( return '\t'.join(
value.replace('\\', '\\\\').replace('\n', '\\\n').replace('\r', '\\\r').replace('\t', '\\\t') value.replace('\\', '\\\\').replace('\n', '\\\n').replace('\r', '\\\r').replace('\t', '\\\t')
if isinstance(value, str) else str(value) if isinstance(value, str) else str(value) if value is not None else '\\N'
for value in next(self.iter) for value in next(self.iter)
) + '\n' ) + '\n'
except StopIteration: except StopIteration:
......
...@@ -78,6 +78,7 @@ class Parser: ...@@ -78,6 +78,7 @@ class Parser:
except: except:
pass pass
else: else:
print("WARNING: Date unknown at _Parser level, using now()")
hyperdata['publication_date'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") hyperdata['publication_date'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# ...then parse all the "date" fields, to parse it into separate elements # ...then parse all the "date" fields, to parse it into separate elements
......
...@@ -37,7 +37,8 @@ def scheduled_celery(func): ...@@ -37,7 +37,8 @@ def scheduled_celery(func):
"""Provides a decorator to schedule a task with Celery. """Provides a decorator to schedule a task with Celery.
""" """
def go(*args, **kwargs): def go(*args, **kwargs):
shared_task(func).apply_async(args=args, kwargs=kwargs) func.apply_async(args=args, kwargs=kwargs)
#shared_task(func).apply_async(args=args, kwargs=kwargs)
return go return go
......
from .parsing import parse from .parsing import parse
from .ngrams_extraction import extract_ngrams from .ngrams_extraction import extract_ngrams
from .hyperdata_indexing import index_hyperdata
# in usual run order # in usual run order
from .list_stop import do_stoplist from .list_stop import do_stoplist
...@@ -10,11 +11,13 @@ from .metric_specificity import compute_specificity ...@@ -10,11 +11,13 @@ from .metric_specificity import compute_specificity
from .list_map import do_maplist # TEST from .list_map import do_maplist # TEST
from .ngram_groups import compute_groups from .ngram_groups import compute_groups
from gargantext.util.db import session from gargantext.util.db import session
from gargantext.models import Node from gargantext.models import Node
from datetime import datetime from datetime import datetime
from celery import shared_task
@shared_task
def parse_extract(corpus): def parse_extract(corpus):
# retrieve corpus from database from id # retrieve corpus from database from id
if isinstance(corpus, int): if isinstance(corpus, int):
...@@ -36,6 +39,24 @@ def parse_extract(corpus): ...@@ -36,6 +39,24 @@ def parse_extract(corpus):
extract_ngrams(corpus) extract_ngrams(corpus)
print('CORPUS #%d: extracted ngrams' % (corpus.id)) print('CORPUS #%d: extracted ngrams' % (corpus.id))
@shared_task
def parse_extract_indexhyperdata(corpus):
# retrieve corpus from database from id
if isinstance(corpus, int):
corpus_id = corpus
corpus = session.query(Node).filter(Node.id == corpus_id).first()
if corpus is None:
print('NO SUCH CORPUS: #%d' % corpus_id)
return
# apply actions
print('CORPUS #%d' % (corpus.id))
parse(corpus)
print('CORPUS #%d: parsed' % (corpus.id))
extract_ngrams(corpus)
print('CORPUS #%d: extracted ngrams' % (corpus.id))
index_hyperdata(corpus)
print('CORPUS #%d: indexed hyperdata' % (corpus.id))
# ------------------------------- # -------------------------------
# temporary ngram lists workflow # temporary ngram lists workflow
# ------------------------------- # -------------------------------
......
from gargantext.util.db import bulk_insert
from gargantext.constants import INDEXED_HYPERDATA
from gargantext.models import NodeHyperdata
from datetime import datetime
def _nodes_hyperdata_generator(corpus):
"""This method generates columns for insertions in `nodes_hyperdata`.
In case one of the values is a list, its items are iterated over and
yielded separately.
If its a string (eg date) it will be truncated to 255 chars
"""
for document in corpus.children(typename='DOCUMENT'):
for keyname, key in INDEXED_HYPERDATA.items():
if keyname in document.hyperdata:
values = key['convert_to_db'](document.hyperdata[keyname])
if not isinstance(values, list):
values = [values]
for value in values:
if isinstance(value, (int, )):
yield (
document.id,
key['id'],
value,
None,
None,
None,
None,
)
elif isinstance(value, (float, )):
yield (
document.id,
key['id'],
None,
value,
None,
None,
None,
)
elif isinstance(value, (datetime, )):
yield (
document.id,
key['id'],
None,
None,
value.strftime("%Y-%m-%d %H:%M:%S"),
# FIXME check timestamp +%Z
None,
None,
)
elif isinstance(value, (str, )) :
if len(value) < 255 :
yield (
document.id,
key['id'],
None,
None,
None,
value,
None,
)
elif len(value) < 2712 :
yield (
document.id,
key['id'],
None,
None,
None,
None,
value,
)
else :
print("La taille de la ligne index, \
dépasse le maximum, 2712, pour l'index \
« ix_nodes_hyperdata_value_txt » HINT: \
Les valeurs plus larges qu'un tiers d'une\
page de tampon ne peuvent pas être \
indexées (sur postgres 9.5). TODO : \
Utilisez un index sur le hachage MD5 de la \
valeur et/ou passez à l'indexation de la \
recherche plein texte.")
yield (
document.id,
key['id'],
None,
None,
None,
None,
value[:2712],
)
else:
print("WARNING: Couldn't insert an INDEXED_HYPERDATA value because of unknown type:", type(value))
def index_hyperdata(corpus):
bulk_insert(
table = NodeHyperdata,
fields = ( 'node_id', 'key'
, 'value_int'
, 'value_flt'
, 'value_utc'
, 'value_str'
, 'value_txt' ),
data = _nodes_hyperdata_generator(corpus),
)
from gargantext.models import Node, NodeNgram, NodeNgramNgram from gargantext.models import Node, NodeNgram, NodeNgramNgram, \
NodeHyperdata
from gargantext.util.lists import WeightedMatrix from gargantext.util.lists import WeightedMatrix
from gargantext.util.db import session, aliased, func from gargantext.util.db import session, aliased, func
from gargantext.util.db_cache import cache from gargantext.util.db_cache import cache
from gargantext.constants import DEFAULT_COOC_THRESHOLD from gargantext.constants import DEFAULT_COOC_THRESHOLD
from datetime import datetime
def compute_coocs(corpus, def compute_coocs( corpus,
overwrite_id = None, overwrite_id = None,
threshold = DEFAULT_COOC_THRESHOLD, threshold = DEFAULT_COOC_THRESHOLD,
mainlist_id = None, mainlist_id = None,
stoplist_id = None, stoplist_id = None,
start = None,
end = None,
symmetry_filter = True): symmetry_filter = True):
""" """
Count how often some extracted terms appear Count how often some extracted terms appear
...@@ -19,10 +23,10 @@ def compute_coocs(corpus, ...@@ -19,10 +23,10 @@ def compute_coocs(corpus,
node_id | ngram_id | weight ngram1_id | ngram2_id | score | node_id | ngram_id | weight ngram1_id | ngram2_id | score |
--------+----------+-------- ----------+-----------+-------+ --------+----------+-------- ----------+-----------+-------+
MYDOCA | 487 | 1 => 487 | 294 | 2 | MyDocA | 487 | 1 => 487 | 294 | 2 |
MYDOCA | 294 | 3 MyDocA | 294 | 3
MYDOCB | 487 | 1 MyDocB | 487 | 1
MYDOCB | 294 | 4 MyDocB | 294 | 4
Fill that info in DB: Fill that info in DB:
- a *new* COOCCURRENCES node - a *new* COOCCURRENCES node
...@@ -40,6 +44,10 @@ def compute_coocs(corpus, ...@@ -40,6 +44,10 @@ def compute_coocs(corpus,
- mainlist_id: mainlist to constrain the input ngrams - mainlist_id: mainlist to constrain the input ngrams
- stoplist_id: stoplist for filtering input ngrams - stoplist_id: stoplist for filtering input ngrams
(normally unnecessary if a mainlist is provided) (normally unnecessary if a mainlist is provided)
- start, end: provide one or both temporal limits to filter on doc date
NB the expected type of parameter value is datetime.datetime
(string is also possible but format must follow
this convention: "2001-01-01" aka "%Y-%m-%d")
(deprecated parameters) (deprecated parameters)
- field1,2: allowed to count other things than ngrams (eg tags) but no use case at present - field1,2: allowed to count other things than ngrams (eg tags) but no use case at present
...@@ -69,7 +77,6 @@ def compute_coocs(corpus, ...@@ -69,7 +77,6 @@ def compute_coocs(corpus,
# - TODO add grouped element's values in grouping 'chief ngram' # - TODO add grouped element's values in grouping 'chief ngram'
# - TODO cvalue_id: allow a metric as additional input filter # - TODO cvalue_id: allow a metric as additional input filter
# - TODO n_min, n_max : filter on Ngram.n (aka length of ngram) # - TODO n_min, n_max : filter on Ngram.n (aka length of ngram)
# - TODO start, end : filter on document date
# - TODO weighted: if False normal cooc to be saved as result # - TODO weighted: if False normal cooc to be saved as result
# if True weighted cooc (experimental) # if True weighted cooc (experimental)
...@@ -97,8 +104,8 @@ def compute_coocs(corpus, ...@@ -97,8 +104,8 @@ def compute_coocs(corpus,
coocs_query = ( coocs_query = (
session.query(x1.ngram_id, x2.ngram_id, ucooc) session.query(x1.ngram_id, x2.ngram_id, ucooc)
.filter(x1.node_id == x2.node_id) # <- by definition of cooc .filter(x1.node_id == x2.node_id) # <- by definition of cooc
.filter(x1.ngram_id != x2.ngram_id) # <- b/c not with itself .filter(x1.ngram_id != x2.ngram_id) # <- b/c not with itself
.filter(x1.node_id.in_(docids_subquery)) # <- b/c within corpus .filter(x1.node_id.in_(docids_subquery)) # <- b/c within corpus
.group_by(x1.ngram_id, x2.ngram_id) .group_by(x1.ngram_id, x2.ngram_id)
) )
...@@ -128,6 +135,42 @@ def compute_coocs(corpus, ...@@ -128,6 +135,42 @@ def compute_coocs(corpus,
.filter( ~ x2.ngram_id.in_(stop_subquery) ) .filter( ~ x2.ngram_id.in_(stop_subquery) )
) )
if start:
if isinstance(start, datetime):
start_str = start.strftime("%Y-%m-%d %H:%M:%S")
else:
start_str = str(start)
# doc_ids matching this limit
starttime_subquery = (session
.query(NodeHyperdata.node_id)
.filter(NodeHyperdata.key=="publication_date")
.filter(NodeHyperdata.value_str >= start_str)
.subquery()
)
# direct use of str comparison op because there is consistency b/w
# sql alpha sort and chrono sort *in this format %Y-%m-%d %H:%M:%S*
# the filtering by start limit
coocs_query = coocs_query.filter(x1.node_id.in_(starttime_subquery))
if end:
if isinstance(end, datetime):
end_str = end.strftime("%Y-%m-%d %H:%M:%S")
else:
end_str = str(end)
endtime_subquery = (session
.query(NodeHyperdata.node_id)
.filter(NodeHyperdata.key=="publication_date")
.filter(NodeHyperdata.value_str <= end_str)
.subquery()
)
# the filtering by end limit
coocs_query = coocs_query.filter(x1.node_id.in_(endtime_subquery))
if symmetry_filter: if symmetry_filter:
# 1 filtre tenant en compte de la symétrie # 1 filtre tenant en compte de la symétrie
# -> réduit le travail de moitié !! # -> réduit le travail de moitié !!
...@@ -167,7 +210,7 @@ def compute_coocs(corpus, ...@@ -167,7 +210,7 @@ def compute_coocs(corpus,
# 5) SAVE # 5) SAVE
# -------- # --------
# saving the parameters of the analysis in the Node JSON # saving the parameters of the analysis in the Node JSON
new_hyperdata = { 'corpus': corpus.id, new_hyperdata = { 'corpus' : corpus.id,
'threshold': threshold } 'threshold': threshold }
if overwrite_id: if overwrite_id:
# overwrite pre-existing id # overwrite pre-existing id
......
...@@ -6,6 +6,7 @@ from gargantext.util.ngramsextractors import ngramsextractors ...@@ -6,6 +6,7 @@ from gargantext.util.ngramsextractors import ngramsextractors
from collections import defaultdict from collections import defaultdict
from re import sub from re import sub
from gargantext.util.scheduling import scheduled
def _integrate_associations(nodes_ngrams_count, ngrams_data, db, cursor): def _integrate_associations(nodes_ngrams_count, ngrams_data, db, cursor):
print('INTEGRATE') print('INTEGRATE')
......
from gargantext.util.http import *
from gargantext.util.db import *
from gargantext.util.db_cache import *
from gargantext.models import *
from gargantext.constants import *
from gargantext.models import Node, Ngram, NodeNgram, NodeNodeNgram
from gargantext.constants import NODETYPES
from gargantext.util.db import session, delete, func
from gargantext.util.db_cache import cache, or_
from gargantext.util.validation import validate from gargantext.util.validation import validate
from gargantext.util.http import ValidationException, APIView \
, get_parameters, JsonHttpResponse, Http404
from collections import defaultdict from collections import defaultdict
...@@ -71,22 +73,108 @@ class NodeListResource(APIView): ...@@ -71,22 +73,108 @@ class NodeListResource(APIView):
] ]
}) })
def post(self, request): def post(self, request):
"""Create a new node. """Create a new node.
NOT IMPLEMENTED NOT IMPLEMENTED
""" """
def delete(self, request): def delete(self, request):
"""Removes the list of nodes corresponding to the query. """Removes the list of nodes corresponding to the query.
WARNING! THIS IS TOTALLY UNTESTED!!!!! TODO : Should be a delete method!
""" """
parameters, query, count = _query_nodes(request) parameters = get_parameters(request)
query.delete() parameters = validate(parameters, {'ids': list} )
try :
node_ids = [int(n) for n in parameters['ids'].split(',')]
except :
raise ValidationException('"ids" needs integers separated by comma.')
result = session.execute(
delete(Node).where(Node.id.in_(node_ids))
)
session.commit() session.commit()
return JsonHttpResponse({
'parameters': parameters, return JsonHttpResponse({'deleted': result.rowcount})
'count': count,
}, 200) class NodeListHaving(APIView):
'''
Gives a list of nodes according to its score which is related
to some specific ngrams.
TODO: implement other options (offset)
Simple implementation:
Takes IDs of corpus and ngram and returns list of relevent documents in json format
according to TFIDF score (order is decreasing).
'''
def get(self, request, corpus_id):
parameters = get_parameters(request)
parameters = validate(parameters, {'score': str, 'ngram_ids' : list} )
try :
ngram_ids = [int(n) for n in parameters['ngram_ids'].split(',')]
except :
raise ValidationException('"ngram_ids" needs integers separated by comma.')
limit=5
nodes_list = []
corpus = session.query(Node).filter(Node.id==corpus_id).first()
tfidf_id = ( session.query( Node.id )
.filter( Node.typename == "TFIDF-CORPUS"
, Node.parent_id == corpus.id
)
.first()
)
tfidf_id = tfidf_id[0]
print(tfidf_id)
# request data
nodes_query = (session
.query(Node, func.sum(NodeNodeNgram.score))
.join(NodeNodeNgram, NodeNodeNgram.node2_id == Node.id)
.filter(NodeNodeNgram.node1_id == tfidf_id)
.filter(Node.typename == 'DOCUMENT', Node.parent_id== corpus.id)
.filter(or_(*[NodeNodeNgram.ngram_id==ngram_id for ngram_id in ngram_ids]))
.group_by(Node)
.order_by(func.sum(NodeNodeNgram.score).desc())
.limit(limit)
)
# print("\n")
# print("in TFIDF:")
# print("\tcorpus_id:",corpus_id)
# convert query result to a list of dicts
# if nodes_query is None:
# print("TFIDF error, juste take sums")
# nodes_query = (session
# .query(Node, func.sum(NodeNgram.weight))
# .join(NodeNgram, NodeNgram.node_id == Node.id)
# .filter(Node.parent_id == corpus_id)
# .filter(Node.typename == 'DOCUMENT')
# .filter(or_(*[NodeNgram.ngram_id==ngram_id for ngram_id in ngram_ids]))
# .group_by(Node)
# .order_by(func.sum(NodeNgram.weight).desc())
# .limit(limit)
# )
for node, score in nodes_query:
print(node,score)
print("\t corpus:",corpus_id,"\t",node.name)
node_dict = {
'id': node.id,
'score': score,
}
for key in ('title', 'publication_date', 'journal', 'authors', 'fields'):
if key in node.hyperdata:
node_dict[key] = node.hyperdata[key]
nodes_list.append(node_dict)
return JsonHttpResponse(nodes_list)
class NodeResource(APIView): class NodeResource(APIView):
...@@ -104,7 +192,6 @@ class NodeResource(APIView): ...@@ -104,7 +192,6 @@ class NodeResource(APIView):
parameters, query, count = _query_nodes(request, node_id) parameters, query, count = _query_nodes(request, node_id)
if not len(query): if not len(query):
raise Http404() raise Http404()
from sqlalchemy import delete
result = session.execute( result = session.execute(
delete(Node).where(Node.id == node_id) delete(Node).where(Node.id == node_id)
) )
...@@ -177,3 +264,6 @@ class CorpusFacet(APIView): ...@@ -177,3 +264,6 @@ class CorpusFacet(APIView):
# // if subfield not in corpus.aggs: # // if subfield not in corpus.aggs:
# // corpus.aggs[subfield] = xcounts # // corpus.aggs[subfield] = xcounts
return (xcounts, total) return (xcounts, total)
...@@ -5,10 +5,11 @@ from . import ngramlists ...@@ -5,10 +5,11 @@ from . import ngramlists
urlpatterns = [ urlpatterns = [
url(r'^nodes$', nodes.NodeListResource.as_view()), url(r'^nodes$' , nodes.NodeListResource.as_view()),
url(r'^nodes/(\d+)$', nodes.NodeResource.as_view()), url(r'^nodes/(\d+)$' , nodes.NodeResource.as_view()),
url(r'^nodes/(\d+)/facets$', nodes.CorpusFacet.as_view()), url(r'^nodes/(\d+)/facets$' , nodes.CorpusFacet.as_view()),
url(r'^nodes/(\d+)/having$' , nodes.NodeListHaving.as_view()),
# add or remove ngram from a list # add or remove ngram from a list
# ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2 # ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2
...@@ -23,5 +24,4 @@ urlpatterns = [ ...@@ -23,5 +24,4 @@ urlpatterns = [
# - an optional grouplist) # - an optional grouplist)
url(r'^ngramlists/family$', ngramlists.ListFamily.as_view()), url(r'^ngramlists/family$', ngramlists.ListFamily.as_view()),
] ]
...@@ -7,21 +7,9 @@ def login(request): ...@@ -7,21 +7,9 @@ def login(request):
"""Performs user login """Performs user login
""" """
auth.logout(request) auth.logout(request)
# if the user wants to access the login form
if request.method == 'GET':
additional_context = {}
# if for exemple: auth/?next=/project/5/corpus/554/document/556/
# => we'll forward ?next="..." into template with form
if 'next' in request.GET:
additional_context = {'next_page':request.GET['next']}
return render(
template_name = 'pages/auth/login.html',
request = request,
context = additional_context,
)
# if the user send her authentication data to the page # if the user send her authentication data to the page
elif request.method == "POST": if request.method == "POST":
# /!\ pass is sent clear in POST data: use SSL # /!\ pass is sent clear in POST data: use SSL
user = auth.authenticate( user = auth.authenticate(
username = request.POST['username'], username = request.POST['username'],
...@@ -35,6 +23,19 @@ def login(request): ...@@ -35,6 +23,19 @@ def login(request):
else: else:
return redirect('/projects/') return redirect('/projects/')
# if the user wants to access the login form
additional_context = {}
# if for exemple: auth/?next=/project/5/corpus/554/document/556/
# => we'll forward ?next="..." into template with form
if 'next' in request.GET:
additional_context = {'next_page':request.GET['next']}
return render(
template_name = 'pages/auth/login.html',
request = request,
context = additional_context,
)
def logout(request): def logout(request):
"""Logout the user, and redirect to main page """Logout the user, and redirect to main page
......
...@@ -24,23 +24,21 @@ def _get_user_project_corpus(request, project_id, corpus_id): ...@@ -24,23 +24,21 @@ def _get_user_project_corpus(request, project_id, corpus_id):
@requires_auth @requires_auth
def corpus(request, project_id, corpus_id): def docs_by_titles(request, project_id, corpus_id):
authorized, user, project, corpus = _get_user_project_corpus(request, project_id, corpus_id) authorized, user, project, corpus = _get_user_project_corpus(request, project_id, corpus_id)
if not authorized: if not authorized:
return HttpResponseForbidden() return HttpResponseForbidden()
# response! # response!
return render( return render(
template_name = 'pages/corpora/corpus.html', template_name = 'pages/corpora/titles.html',
request = request, request = request,
context = { context = {
'debug': DEBUG, 'debug': DEBUG,
'user': user,
'date': datetime.now(), 'date': datetime.now(),
'project': project, 'project': project,
'corpus': corpus, 'corpus': corpus,
# 'processing': corpus['extracted'], 'view': 'titles',
# 'number': number, 'user': request.user
'view': 'documents'
}, },
) )
...@@ -74,3 +72,4 @@ def docs_by_journals(request, project_id, corpus_id): ...@@ -74,3 +72,4 @@ def docs_by_journals(request, project_id, corpus_id):
'view': 'journals' 'view': 'journals'
}, },
) )
...@@ -6,7 +6,7 @@ from gargantext.models import * ...@@ -6,7 +6,7 @@ from gargantext.models import *
from gargantext.constants import * from gargantext.constants import *
from gargantext.util.scheduling import scheduled from gargantext.util.scheduling import scheduled
from gargantext.util.toolchain import parse_extract from gargantext.util.toolchain import parse_extract_indexhyperdata
from datetime import datetime from datetime import datetime
from collections import defaultdict from collections import defaultdict
...@@ -94,8 +94,9 @@ def project(request, project_id): ...@@ -94,8 +94,9 @@ def project(request, project_id):
) )
session.add(corpus) session.add(corpus)
session.commit() session.commit()
# parse_extract: fileparsing -> ngram extraction -> lists # parse_extract: fileparsing -> ngram extraction -> lists
scheduled(parse_extract)(corpus.id) scheduled(parse_extract_indexhyperdata)(corpus.id)
# corpora within this project # corpora within this project
corpora = project.children('CORPUS').all() corpora = project.children('CORPUS').all()
......
...@@ -20,7 +20,7 @@ urlpatterns = [ ...@@ -20,7 +20,7 @@ urlpatterns = [
url(r'^projects/(\d+)/?$', projects.project), url(r'^projects/(\d+)/?$', projects.project),
# corpora # corpora
url(r'^projects/(\d+)/corpora/(\d+)/?$', corpora.corpus), url(r'^projects/(\d+)/corpora/(\d+)/?$', corpora.docs_by_titles),
url(r'^projects/(\d+)/corpora/(\d+)/chart/?$', corpora.chart), url(r'^projects/(\d+)/corpora/(\d+)/chart/?$', corpora.chart),
# corpus by journals # corpus by journals
...@@ -28,4 +28,5 @@ urlpatterns = [ ...@@ -28,4 +28,5 @@ urlpatterns = [
# terms table for the corpus # terms table for the corpus
url(r'^projects/(\d+)/corpora/(\d+)/terms/?$', terms.ngramtable), url(r'^projects/(\d+)/corpora/(\d+)/terms/?$', terms.ngramtable),
] ]
Module Graph Explorer: from text to graph.
Maintainer: If you see bugs, please report to team@gargantext.org
# Article coming soon
from gargantext.util.db import session
from gargantext.models.ngrams import Ngram
from collections import defaultdict
from networkx.readwrite import json_graph
def filterByBridgeness(G,partition,ids,weight,bridgeness,type,field1,field2):
# Data are stored in a dict(), (== hashmap by default for Python)
data = dict()
if type == "node_link":
nodesB_dict = {}
for node_id in G.nodes():
#node,type(labels[node])
G.node[node_id]['pk'] = ids[node_id][1]
nodesB_dict [ ids[node_id][1] ] = True
# TODO the query below is not optimized (do it do_distance).
the_label = session.query(Ngram.terms).filter(Ngram.id==node_id).first()
the_label = ", ".join(the_label)
G.node[node_id]['label'] = the_label
G.node[node_id]['size'] = weight[node_id]
G.node[node_id]['type'] = ids[node_id][0].replace("ngrams","terms")
G.node[node_id]['attributes'] = { "clust_default": partition[node_id]} # new format
# G.add_edge(node, "cluster " + str(partition[node]), weight=3)
links = []
i=1
if bridgeness > 0:
com_link = defaultdict(lambda: defaultdict(list))
com_ids = defaultdict(list)
for k, v in partition.items():
com_ids[v].append(k)
for e in G.edges_iter():
s = e[0]
t = e[1]
weight = G[ids[s][1]][ids[t][1]]["weight"]
if bridgeness < 0:
info = { "s": ids[s][1]
, "t": ids[t][1]
, "w": weight
}
links.append(info)
else:
if partition[s] == partition[t]:
info = { "s": ids[s][1]
, "t": ids[t][1]
, "w": weight
}
links.append(info)
if bridgeness > 0:
if partition[s] < partition[t]:
com_link[partition[s]][partition[t]].append((s,t,weight))
if bridgeness > 0:
for c1 in com_link.keys():
for c2 in com_link[c1].keys():
index = round(bridgeness*len(com_link[c1][c2]) / (len(com_ids[c1]) + len(com_ids[c2])))
#print((c1,len(com_ids[c1])), (c2,len(com_ids[c2])), index)
if index > 0:
for link in sorted(com_link[c1][c2], key=lambda x: x[2], reverse=True)[:index]:
#print(c1, c2, link[2])
info = {"s": link[0], "t": link[1], "w": link[2]}
links.append(info)
B = json_graph.node_link_data(G)
B["links"] = []
B["links"] = links
if field1 == field2 == 'ngrams' :
data["nodes"] = B["nodes"]
data["links"] = B["links"]
else:
A = get_graphA( "journal" , nodesB_dict , B["links"] , corpus )
print("#nodesA:",len(A["nodes"]))
print("#linksAA + #linksAB:",len(A["links"]))
print("#nodesB:",len(B["nodes"]))
print("#linksBB:",len(B["links"]))
data["nodes"] = A["nodes"] + B["nodes"]
data["links"] = A["links"] + B["links"]
print(" total nodes :",len(data["nodes"]))
print(" total links :",len(data["links"]))
print("")
elif type == "adjacency":
for node in G.nodes():
try:
#node,type(labels[node])
#G.node[node]['label'] = node
G.node[node]['name'] = node
#G.node[node]['size'] = weight[node]
G.node[node]['group'] = partition[node]
#G.add_edge(node, partition[node], weight=3)
except Exception as error:
print("error02: ",error)
data = json_graph.node_link_data(G)
elif type == 'bestpartition':
return(partition)
return(data)
from gargantext.models import Node, Ngram, NodeNgram, NodeNgramNgram, \
NodeHyperdata
from gargantext.util.db import session, aliased, bulk_insert, func
from gargantext.util.lists import WeightedMatrix, UnweightedList, Translations
from sqlalchemy import desc, asc, or_, and_
#import inspect
import datetime
def countCooccurrences( corpus=None
, field1='ngrams' , field2='ngrams'
, start=None , end=None
, mapList_id=None , groupList_id=None
, n_min=1, n_max=None , limit=1000
, coocNode_id=None , reset=True
, isMonopartite=True , threshold = 3):
'''
Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id
For the moment list of paramters are not supported because, lists need to
be merged before.
corpus :: Corpus
mapList_id :: Int
groupList_id :: Int
For the moment, start and end are simple, only year is implemented yet
start :: TimeStamp -- example: '2010-05-30 02:00:00+02'
end :: TimeStamp
limit :: Int
'''
# TODO : add hyperdata here
# Security test
field1,field2 = str(field1), str(field2)
# Get node
if not coocNode_id:
coocNode_id0 = ( session.query( Node.id )
.filter( Node.typename == "COOCCURRENCES"
, Node.name == "GRAPH EXPLORER"
, Node.parent_id == corpus.id
)
.first()
)
if not coocNode_id:
coocNode = corpus.add_child(
typename = "COOCCURRENCES",
name = "GRAPH EXPLORER COOC (in:%s)" % corpus.id
)
session.add(coocNode)
session.commit()
coocNode_id = coocNode.id
else :
coocNode_id = coocNode_id[0]
if reset == True :
session.query( NodeNgramNgram ).filter( NodeNgramNgram.node_id == coocNode_id ).delete()
session.commit()
NodeNgramX = aliased(NodeNgram)
# Simple Cooccurrences
cooc_score = func.count(NodeNgramX.node_id).label('cooc_score')
# A kind of Euclidean distance cooccurrences
#cooc_score = func.sqrt(func.sum(NodeNgramX.weight * NodeNgramY.weight)).label('cooc_score')
if isMonopartite :
NodeNgramY = aliased(NodeNgram)
cooc_query = (session.query( NodeNgramX.ngram_id
, NodeNgramY.ngram_id
, cooc_score
)
.join( Node
, Node.id == NodeNgramX.node_id
)
.join( NodeNgramY
, NodeNgramY.node_id == Node.id
)
.filter( Node.parent_id==corpus.id
, Node.typename=="DOCUMENT"
)
)
else :
NodeNgramY = aliased(NodeNgram)
cooc_query = (session.query( NodeHyperdataNgram.ngram_id
, NodeNgramY.ngram_id
, cooc_score
)
.join( Node
, Node.id == NodeHyperdataNgram.node_id
)
.join( NodeNgramY
, NodeNgramY.node_id == Node.id
)
.join( Hyperdata
, Hyperdata.id == NodeHyperdataNgram.hyperdata_id
)
.filter( Node.parent_id == corpus.id
, Node.typename == "DOCUMENT"
)
.filter( Hyperdata.name == field1 )
)
# Size of the ngrams between n_min and n_max
if n_min is not None or n_max is not None:
if isMonopartite:
NgramX = aliased(Ngram)
cooc_query = cooc_query.join ( NgramX
, NgramX.id == NodeNgramX.ngram_id
)
NgramY = aliased(Ngram)
cooc_query = cooc_query.join ( NgramY
, NgramY.id == NodeNgramY.ngram_id
)
if n_min is not None:
cooc_query = (cooc_query
.filter(NgramY.n >= n_min)
)
if isMonopartite:
cooc_query = cooc_query.filter(NgramX.n >= n_min)
if n_max is not None:
cooc_query = (cooc_query
.filter(NgramY.n >= n_min)
)
if isMonopartite:
cooc_query = cooc_query.filter(NgramX.n >= n_min)
# Cooc between the dates start and end
if start is not None:
#date_start = datetime.datetime.strptime ("2001-2-3 10:11:12", "%Y-%m-%d %H:%M:%S")
# TODO : more complexe date format here.
date_start = datetime.datetime.strptime (str(start), "%Y-%m-%d")
date_start_utc = date_start.strftime("%Y-%m-%d %H:%M:%S")
Start=aliased(NodeHyperdata)
StartFormat = aliased(Hyperdata)
cooc_query = (cooc_query.join( Start
, Start.node_id == Node.id
)
.join( StartFormat
, StartFormat.id == Start.hyperdata_id
)
.filter( StartFormat.name == 'publication_date')
.filter( Start.value_datetime >= date_start_utc)
)
if end is not None:
# TODO : more complexe date format here.
date_end = datetime.datetime.strptime (str(end), "%Y-%m-%d")
date_end_utc = date_end.strftime("%Y-%m-%d %H:%M:%S")
End=aliased(NodeHyperdata)
EndFormat = aliased(Hyperdata)
cooc_query = (cooc_query.join( End
, End.node_id == Node.id
)
.join( EndFormat
, EndFormat.id == End.hyperdata_id
)
.filter( EndFormat.name == 'publication_date' )
.filter( End.value_datetime <= date_end_utc )
)
if isMonopartite:
# Cooc is symetric, take only the main cooccurrences and cut at the limit
cooc_query = cooc_query.filter(NodeNgramX.ngram_id < NodeNgramY.ngram_id)
cooc_query = cooc_query.having(cooc_score > threshold)
if isMonopartite:
cooc_query = cooc_query.group_by(NodeNgramX.ngram_id, NodeNgramY.ngram_id)
else:
cooc_query = cooc_query.group_by(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id)
# Order according some scores
cooc_query = cooc_query.order_by(desc('cooc_score'))
matrix = WeightedMatrix(cooc_query)
mapList = UnweightedList( mapList_id )
group_list = Translations ( groupList_id )
cooc = matrix & (mapList * group_list)
cooc.save(coocNode_id)
return(coocNode_id)
from gargantext.models import Node, NodeNgram, NodeNgramNgram, \
NodeHyperdata
from gargantext.util.db import session, aliased
from graphExplorer.louvain import best_partition
from copy import copy
from collections import defaultdict
from math import log,sqrt
#from operator import itemgetter
import math
import numpy as np
import pandas as pd
import networkx as nx
def clusterByDistances( cooc_id
, field1=None, field2=None
, distance='conditional'):
'''
do_distance :: Int -> (Graph, Partition, {ids}, {weight})
'''
# implicit global session
authorized = ['conditional', 'distributional', 'cosine']
if distance not in authorized:
distance = 'conditional'
matrix = defaultdict(lambda : defaultdict(float))
ids = defaultdict(lambda : defaultdict(int))
labels = dict()
weight = dict()
Cooc = aliased(NodeNgramNgram)
query = session.query(Cooc).filter(Cooc.node_id==cooc_id).all()
for cooc in query:
matrix[cooc.ngram1_id][cooc.ngram2_id] = cooc.weight
matrix[cooc.ngram2_id][cooc.ngram1_id] = cooc.weight
ids[cooc.ngram1_id] = (field1, cooc.ngram1_id)
ids[cooc.ngram2_id] = (field2, cooc.ngram2_id)
weight[cooc.ngram1_id] = weight.get(cooc.ngram1_id, 0) + cooc.weight
weight[cooc.ngram2_id] = weight.get(cooc.ngram2_id, 0) + cooc.weight
x = pd.DataFrame(matrix).fillna(0)
if distance == 'conditional':
x = x / x.sum(axis=1)
#y = y / y.sum(axis=0)
xs = x.sum(axis=1) - x
ys = x.sum(axis=0) - x
# top inclus ou exclus
n = ( xs + ys) / (2 * (x.shape[0] - 1))
# top generic or specific
m = ( xs - ys) / (2 * (x.shape[0] - 1))
n = n.sort(inplace=False)
m = m.sort(inplace=False)
nodes_included = 500 #int(round(size/20,0))
#nodes_excluded = int(round(size/10,0))
nodes_specific = 500 #int(round(size/10,0))
#nodes_generic = int(round(size/10,0))
# TODO use the included score for the node size
n_index = pd.Index.intersection(x.index, n.index[:nodes_included])
# Generic:
#m_index = pd.Index.intersection(x.index, m.index[:nodes_generic])
# Specific:
m_index = pd.Index.intersection(x.index, m.index[-nodes_specific:])
#m_index = pd.Index.intersection(x.index, n.index[:nodes_included])
x_index = pd.Index.union(n_index, m_index)
xx = x[list(x_index)].T[list(x_index)]
# Removing unconnected nodes
xxx = xx.values
threshold = min(xxx.max(axis=1))
matrix_filtered = np.where(xxx >= threshold, xxx, 0)
#matrix_filtered = matrix_filtered.resize((90,90))
G = nx.from_numpy_matrix(np.matrix(matrix_filtered))
G = nx.relabel_nodes(G, dict(enumerate([ ids[id_][1] for id_ in list(xx.columns)])))
elif distance == 'cosine':
scd = defaultdict(lambda : defaultdict(int))
for i in matrix.keys():
for j in matrix.keys():
numerator = sum(
[
matrix[i][k] * matrix[j][k]
for k in matrix.keys()
if i != j and k != i and k != j
]
)
denominator = sqrt(
sum([
matrix[i][k]
for k in matrix.keys()
if k != i and k != j #and matrix[i][k] > 0
])
*
sum([
matrix[i][k]
for k in matrix.keys()
if k != i and k != j #and matrix[i][k] > 0
])
)
try:
scd[i][j] = numerator / denominator
except Exception as error:
scd[i][j] = 0
minmax = min([ max([ scd[i][j] for i in scd.keys()]) for j in scd.keys()])
G = nx.DiGraph()
G.add_edges_from(
[
(i, j, {'weight': scd[i][j]})
for i in scd.keys() for j in scd.keys()
if i != j and scd[i][j] > minmax and scd[i][j] > scd[j][i]
]
)
elif distance == 'distributional':
mi = defaultdict(lambda : defaultdict(int))
total_cooc = x.sum().sum()
for i in matrix.keys():
si = sum([matrix[i][j] for j in matrix[i].keys() if i != j])
for j in matrix[i].keys():
sj = sum([matrix[j][k] for k in matrix[j].keys() if j != k])
if i!=j :
mi[i][j] = log( matrix[i][j] / ((si * sj) / total_cooc) )
r = defaultdict(lambda : defaultdict(int))
for i in matrix.keys():
for j in matrix.keys():
sumMin = sum(
[
min(mi[i][k], mi[j][k])
for k in matrix.keys()
if i != j and k != i and k != j and mi[i][k] > 0
]
)
sumMi = sum(
[
mi[i][k]
for k in matrix.keys()
if k != i and k != j and mi[i][k] > 0
]
)
try:
r[i][j] = sumMin / sumMi
except Exception as error:
r[i][j] = 0
# Need to filter the weak links, automatic threshold here
minmax = min([ max([ r[i][j] for i in r.keys()]) for j in r.keys()])
G = nx.DiGraph()
G.add_edges_from(
[
(i, j, {'weight': r[i][j]})
for i in r.keys() for j in r.keys()
if i != j and r[i][j] > minmax and r[i][j] > r[j][i]
]
)
# degree_max = max([(n, d) for n,d in G.degree().items()], key=itemgetter(1))[1]
# nodes_to_remove = [n for (n,d) in G.degree().items() if d <= round(degree_max/2)]
# G.remove_nodes_from(nodes_to_remove)
# Removing too connected nodes (find automatic way to do it)
#edges_to_remove = [ e for e in G.edges_iter() if
# nodes_to_remove = [n for n in degree if degree[n] <= 1]
# G.remove_nodes_from(nodes_to_remove)
def getWeight(item):
return item[1]
#
# node_degree = sorted(G.degree().items(), key=getWeight, reverse=True)
# #print(node_degree)
# nodes_too_connected = [n[0] for n in node_degree[0:(round(len(node_degree)/5))]]
#
# for n in nodes_too_connected:
# n_edges = list()
# for v in nx.neighbors(G,n):
# #print((n, v), G[n][v]['weight'], ":", (v,n), G[v][n]['weight'])
# n_edges.append(((n, v), G[n][v]['weight']))
#
# n_edges_sorted = sorted(n_edges, key=getWeight, reverse=True)
# #G.remove_edges_from([ e[0] for e in n_edges_sorted[round(len(n_edges_sorted)/2):]])
# #G.remove_edges_from([ e[0] for e in n_edges_sorted[(round(len(nx.neighbors(G,n))/3)):]])
# G.remove_edges_from([ e[0] for e in n_edges_sorted[10:]])
G.remove_nodes_from(nx.isolates(G))
partition = best_partition(G.to_undirected())
return(G,partition,ids,weight)
# Gargantext lib
from gargantext.util.db import session
from gargantext.util.http import JsonHttpResponse
from gargantext.models import Node, Ngram, NodeNgram, NodeNgramNgram
#from gargantext.util.toolchain.ngram_coocs import compute_coocs
from graphExplorer.cooccurrences import countCooccurrences
from graphExplorer.distances import clusterByDistances
from graphExplorer.bridgeness import filterByBridgeness
# Prelude lib
from copy import copy, deepcopy
from collections import defaultdict
from sqlalchemy.orm import aliased
# Math/Graph lib
import math
import pandas as pd
import numpy as np
import networkx as nx
def get_graph( request=None , corpus=None
, field1='ngrams' , field2='ngrams'
, mapList_id = None , groupList_id = None
, cooc_id=None , type='node_link'
, start=None , end=None
, threshold=1
, distance='conditional'
, isMonopartite=True # By default, we compute terms/terms graph
, bridgeness=5
#, size=1000
):
'''
Get_graph : main steps:
1) count Cooccurrences (function countCooccurrences)
main parameters: threshold
2) filter and cluster By Distances (function clusterByDistances)
main parameter: distance
3) filter By Bridgeness (filter By Bridgeness)
main parameter: bridgness
4) format the graph (formatGraph)
main parameter: format_
'''
if cooc_id == None:
cooc_id = countCooccurrences( corpus=corpus
#, field1="ngrams", field2="ngrams"
, start=start , end =end
, mapList_id=mapList_id , groupList_id=groupList_id
, isMonopartite=True , threshold = threshold
#, limit=size
)
G, partition, ids, weight = clusterByDistances ( cooc_id
, field1="ngrams", field2="ngrams"
, distance=distance
)
data = filterByBridgeness(G,partition,ids,weight,bridgeness,type,field1,field2)
return data
This diff is collapsed.
#from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from gargantext.util.db import session
from gargantext.models.nodes import Node
from graphExplorer.graph import get_graph
from gargantext.util.http import APIView, APIException\
, JsonHttpResponse, requires_auth
# TODO check authentication
class Graph(APIView):
'''
REST part for graphs.
'''
def get(self, request, project_id, corpus_id):
'''
Graph.get :: Get graph data as REST api.
Get all the parameters first
graph?field1=ngrams&field2=ngrams&
graph?field1=ngrams&field2=ngrams&start=''&end=''
'''
# Get the node we are working with
corpus = session.query(Node).filter(Node.id==corpus_id).first()
# Get all the parameters in the URL
field1 = str(request.GET.get ('field1' , 'ngrams' ))
field2 = str(request.GET.get ('field2' , 'ngrams' ))
start = request.GET.get ('start' , None )
end = request.GET.get ('end' , None )
mapList_id = int(request.GET.get ('mapList' , 0 ))
groupList_id = int(request.GET.get ('groupList' , 0 ))
threshold = int(request.GET.get ('threshold' , 1 ))
bridgeness = int(request.GET.get ('bridgeness', -1 ))
format_ = str(request.GET.get ('format' , 'json' ))
type_ = str(request.GET.get ('type' , 'node_link' ))
distance = str(request.GET.get ('distance' , 'conditional'))
# Get default value if no map list
if mapList_id == 0 :
mapList_id = ( session.query ( Node.id )
.filter( Node.typename == "MAPLIST"
, Node.parent_id == corpus.id
)
.first()
)
mapList_id = mapList_id[0]
if mapList_id == None :
raise ValueError("MAPLIST node needed for cooccurrences")
# Get default value if no group list
if groupList_id == 0 :
groupList_id = ( session.query ( Node.id )
.filter( Node.typename == "GROUPLIST"
, Node.parent_id == corpus.id
)
.first()
)
groupList_id = groupList_id[0]
if groupList_id == None :
raise ValueError("GROUPLIST node needed for cooccurrences")
# Chec the options
accepted_field1 = ['ngrams', 'journal', 'source', 'authors']
accepted_field2 = ['ngrams', ]
options = ['start', 'end', 'threshold', 'distance' ]
if field1 in accepted_field1 :
if field2 in accepted_field2 :
if start is not None and end is not None :
data = get_graph( corpus=corpus
#, field1=field1 , field2=field2
, mapList_id = mapList_id , groupList_id = groupList_id
, start=start , end=end
, threshold =threshold , distance=distance
)
else:
data = get_graph( corpus = corpus
#, field1=field1, field2=field2
, mapList_id = mapList_id , groupList_id = groupList_id
, threshold = threshold
, distance = distance
, bridgeness = bridgeness
)
if format_ == 'json':
return JsonHttpResponse(data)
else:
return JsonHttpResponse({
'Warning USAGE' : 'One field for each range:'
, 'field1' : accepted_field1
, 'field2' : accepted_field2
, 'options': options
})
mv /srv/gargantext/static/js/tina* .
from django.conf.urls import patterns, url
from graphExplorer import views
# /!\ urls patterns here are *without* the trailing slash
urlpatterns = patterns('',
url(r'^register/$', views.Register.as_view()), # Register
url(r'^login/$', views.Login.as_view()), # Login
)
from gargantext.util.http import *
from gargantext.util.db import *
from gargantext.util.db_cache import cache
from gargantext.models import *
from gargantext.constants import *
from gargantext.settings import *
from datetime import datetime
@requires_auth
def explorer(request, project_id, corpus_id):
'''
Graph explorer, also known as TinaWebJS, using SigmaJS.
Nodes are ngrams (from title or abstract or journal name.
Links represent proximity measure.
'''
# we pass our corpus
corpus = cache.Node[corpus_id]
# and the project just for project.id in corpusBannerTop
project = cache.Node[project_id]
graphurl = "projects/" + str(project_id) + "/corpora/" + str(corpus_id) + "/node_link.json"
# rendered page : journals.html
return render(
template_name = 'graphExplorer/explorer.html',
request = request,
context = {
'debug' : settings.DEBUG,
'request' : request,
'user' : request.user,
'date' : datetime.now(),
'project' : project,
'corpus' : corpus,
#'list_id' : maplist.id,\
'graphfile' : graphurl,\
'view' : 'graph'
},
)
#!/bin/dash
# TODO do apt-get install --force-yes --force-yes
#postgresql3.4-server-dev
#+libxml2-dev
sudo apt-get install --force-yes postgresql
sudo apt-get install --force-yes postgresql-contrib
sudo apt-get install --force-yes rabbitmq-server
sudo apt-get install --force-yes tmux
sudo apt-get install --force-yes uwsgi uwsgi-plugin-python3
#apt-get install --force-yes python-virtualenv
sudo apt-get install --force-yes libpng12-dev
sudo apt-get install --force-yes libpng-dev
sudo apt-get install --force-yes libfreetype6-dev
sudo apt-get install --force-yes python-dev
sudo apt-get install --force-yes libpq-dev
sudo apt-get install --force-yes libpq-dev
#apt-get build-dep python-matplotlib
#apt-get install --force-yes python-matplotlib
#Paquets Debian a installer
# easy_install --force-yes -U distribute (matplotlib)
#lxml
sudo apt-get install --force-yes libffi-dev
sudo apt-get install --force-yes libxml2-dev
sudo apt-get install --force-yes libxslt1-dev
# ipython readline
sudo apt-get install --force-yes libncurses5-dev
sudo apt-get install --force-yes pandoc
# scipy:
sudo apt-get install --force-yes gfortran
sudo apt-get install --force-yes libopenblas-dev
sudo apt-get install --force-yes liblapack-dev
#nlpserver
sudo apt-get install --force-yes libgflags-dev
sudo apt-get install --force-yes libgoogle-glog-dev
# MElt
# soon
## SERVER Configuration
# server configuration
sudo apt-get install --force-yes nginx
# UWSGI with pcre support
sudo apt-get install --force-yes libpcre3 libpcre3-dev
sudo apt-get install --force-yes python3-pip
#pip3 install --force-yes uwsgi
#!/bin/bash
#MAINTAINER ISCPIF <alexandre.delanoe@iscpif.fr>
git clone ssh://gitolite@delanoe.org:1979/gargantext /srv/gargantext \
&& cd /srv/gargantext \
&& git fetch origin refactoring-alex \
&& git checkout refactoring-alex
cd /srv/gargantext/install \
&& /usr/bin/virtualenv --py=/usr/bin/python3.5 /srv/env_3-5 \
&& /bin/bash -c 'source /srv/env_3-5/bin/activate' \
&& /bin/bash -c '/srv/env_3-5/bin/pip install git+https://github.com/zzzeek/sqlalchemy.git@rel_1_1' \
&& /bin/bash -c '/srv/env_3-5/bin/pip install -r /srv/gargantext/install/python/requirements.txt' \
## INSTALL MAIN DEPENDENCIES
cd /tmp && wget http://dl.gargantext.org/gargantext_lib.tar.bz2 \
&& tar xvjf gargantext_lib.tar.bz2 -o /srv/gargantext_lib \
&& chown -R gargantua:gargantua /srv/gargantext_lib
## End of configuration
## be sure that postgres is running
cd /srv/gargantext && /bin/bash -c 'source /srv/bin/env_3-5/bin/activate' \
&& /srv/gargantext/manage.py shell < /srv/gargantext/init.py
echo "Gargantua: END of the installation of Gargantext"
#!/bin/bash
# ## CONFIGURE POSTGRESQL
psql -c "CREATE user gargantua WITH PASSWORD 'C8kdcUrAQy66U'" && createdb -O gargantua gargandb
#!/bin/bash
#MAINTAINER ISCPIF <alexandre.delanoe@iscpif.fr>
apt-get update && \
apt-get install -y \
apt-utils ca-certificates locales \
sudo aptitude gcc g++ wget git postgresql-9.5 vim
### Configure timezone and locale
echo "Europe/Paris" > /etc/timezone && \
dpkg-reconfigure -f noninteractive tzdata && \
sed -i -e 's/# en_GB.UTF-8 UTF-8/en_GB.UTF-8 UTF-8/' /etc/locale.gen && \
sed -i -e 's/# fr_FR.UTF-8 UTF-8/fr_FR.UTF-8 UTF-8/' /etc/locale.gen && \
echo 'LANG="fr_FR.UTF-8"' > /etc/default/locale && \
dpkg-reconfigure --frontend=noninteractive locales && \
update-locale LANG=fr_FR.UTF-8
## PROD VERSION OF GARGANTEXt
apt-get install -y uwsgi nginx
### CREATE USER and adding it to sudo
## USER gargantua cannot not connect with password but SSH key
adduser --disabled-password --gecos "" gargantua \
&& adduser gargantua sudo \
&& echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
# addgroup gargantext here with specific users
## Install Database, main dependencies and Python
## (installing some Debian version before pip to get dependencies)
apt-get update && apt-get install -y \
postgresql-server-dev-9.5 libpq-dev libxml2 \
libxml2-dev xml-core libgfortran-5-dev \
virtualenv python3-virtualenv \
python3.4 python3.4-dev \
python3.5 python3.5-dev \
python3-six python3-numpy python3-setuptools \ # for numpy, pandas
python3-numexpr \ # for numpy performance
libxml2-dev libxslt-dev # for lxml
#if [[ -e "/srv/gargantext" ]]
#rm -rf /srv/gargantext /srv/env_3-5
for dir in "/srv/gargantext"\
"/srv/gargantext_lib"\
"/srv/env_3-5"\
"/var/www/gargantext"; do \
mkdir $dir
chown gargantua:gargantua $dir
done
echo "Root: END of the installation of Gargantext by Root."
Docker installation
For dev: cd dev and run install
Fro prod : install dev-version, cd prod and run install
#FROM debian:stretch
FROM gargantext
#MAINTAINER ISCPIF <alexandre.delanoe@iscpif.fr>
#
## Install docker.io
## Install sudo
## wget http://dl.gargantext.
## cd /srv/gargantext/install
## sudo docker build -t gargantext .
# docker run -i -t gargantext /bin/bash
USER root
# RUN apt-get update && \
# apt-get install -y \
# apt-utils ca-certificates locales \
# sudo aptitude gcc wget git postgresql-9.5 vim
#
# ## Configure timezone and locale
# RUN echo "Europe/Paris" > /etc/timezone && \
# dpkg-reconfigure -f noninteractive tzdata && \
# sed -i -e 's/# en_GB.UTF-8 UTF-8/en_GB.UTF-8 UTF-8/' /etc/locale.gen && \
# sed -i -e 's/# fr_FR.UTF-8 UTF-8/fr_FR.UTF-8 UTF-8/' /etc/locale.gen && \
# echo 'LANG="fr_FR.UTF-8"' > /etc/default/locale && \
# dpkg-reconfigure --frontend=noninteractive locales && \
# update-locale LANG=fr_FR.UTF-8
#
#
# RUN apt-get update \
# && apt-get install -y postgresql-server-dev-9.5 \
# libpq-dev libxml2 libxml2-dev xml-core libgfortran-5-dev
#
# # PROD VERSION OF GARGANTEXt
# # RUN apt-get install uwsgi nginx
#
#
# ## CREATE USER and adding it to sudo
# ## TODO ask user for password
# RUN adduser --disabled-password --gecos "" gargantua
# RUN apt-get install -y sudo && adduser gargantua sudo \
# && echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
#
# # Python dependencies
# # (installing Debian version before pip to get dependencies)
# # TODO: update it with requirements.txt
# RUN apt-get update && apt-get install -y \
# virtualenv python3-virtualenv \
# python3.4 python3.4-dev \
# python3.5 python3.5-dev \
# python3-six python3-numpy
#
# # Installing pip version of python libs
WORKDIR /home/gargantua
# FIXME : pip install -r all requirements does not work, need to split the list
#RUN wget http://dl.delanoe.org/requirements.txt \
# && /usr/bin/virtualenv --py=/usr/bin/python3.5 env_3-5 \
# && /bin/bash -c 'source env_3-5/bin/activate' \
# && /bin/bash -c 'env_3-5/bin/pip install git+https://github.com/zzzeek/sqlalchemy.git@rel_1_1' \
# && /bin/bash -c 'env_3-5/bin/pip install -r requirements.txt'
# chown gargantua
#
## CONFIGURE POSTGRESQL
#
#VOLUME ["/home/gargantua","/data/gargantext"]
## INSTALL MAIN DEPENDENCIES
#WORKDIR /srv
#RUN mkdir -p gargantext
# configure postgres here
# OK
USER postgres
RUN /etc/init.d/postgresql start &&\
psql -c "CREATE user gargantua WITH PASSWORD 'C8kdcUrAQy66U'" &&\
createdb -O gargantua gargandb
# GET CONFIG FILES
# USER gargantua
#RUN wget http://dl.delanoe.org/gargantext_big.txt -o /srv/gargantext_big.txt
#RUN cd /srv/ && git clone https://gogs.iscpif.fr/gargantext.git
# script pour peupler la base
# mount /srv
# execute
##################### INSTALLATION END #####################
# Expose the default port
#EXPOSE 54332
# Default port to execute the entrypoint (MongoDB)
#CMD ["--port 27017"]
# Set default container command
#ENTRYPOINT usr/bin/mongod
# ENTRYPOINT /etc/init.d/postgresql start
#/bin/bash
# Install Docker
# Debian/Ubuntu: apt-get install docker
# run turboparser port, with python 3.4
#docker run -d -p 8000:8000 -v /srv:/srv -t gargantext python /srv/gargantext/gargantext.py
# launch
#cd /srv/gargantext
#source /srv/env_3-5/bin/activate &&
#docker run -d -p 8000:8000 -v /srv:/srv -t gargantext python /srv/gargantext/gargantext.py
docker build -t gargantext .
# try bottleneck
amqp==1.4.9 amqp==1.4.9
anyjson==0.3.3 anyjson==0.3.3
billiard==3.3.0.22 # multiprocessing fork billiard==3.3.0.22 # multiprocessing fork
...@@ -21,8 +22,10 @@ python-dateutil==2.4.2 ...@@ -21,8 +22,10 @@ python-dateutil==2.4.2
pytz==2015.7 # timezones pytz==2015.7 # timezones
PyYAML==3.11 PyYAML==3.11
RandomWords==0.1.12 RandomWords==0.1.12
six==1.10.0
SQLAlchemy==1.1.0b1.dev0 SQLAlchemy==1.1.0b1.dev0
ujson==1.35 ujson==1.35
umalqurra==0.2 # arabic calendars (?? why use ??) umalqurra==0.2 # arabic calendars (?? why use ??)
wheel==0.29.0 wheel==0.29.0
pandas==0.18.0
networkx==1.11
six==1.10.0
#!/bin/bash
echo "Need to finish the dependencies. So soon... :)"
#!/bin/bash
FILE="/var/log/gargantext/celery/$(date +%Y%m%d-%H:%M:%S).log"
source /srv/gargantext_env_3.5/bin/activate
./manage.py celery worker -f $FILE
#!/bin/bash
FILE="/var/log/gargantext/uwsgi/$(date +%Y%m%d-%H:%M:%S).log"
#touch /var/log/gargantext/uwsgi/$FILE && sudo
uwsgi gargantext.ini --logto $FILE
/*
AngularJS v1.2.28
(c) 2010-2014 Google, Inc. http://angularjs.org
License: MIT
*/
(function(p,f,n){'use strict';f.module("ngCookies",["ng"]).factory("$cookies",["$rootScope","$browser",function(e,b){var c={},g={},h,k=!1,l=f.copy,m=f.isUndefined;b.addPollFn(function(){var a=b.cookies();h!=a&&(h=a,l(a,g),l(a,c),k&&e.$apply())})();k=!0;e.$watch(function(){var a,d,e;for(a in g)m(c[a])&&b.cookies(a,n);for(a in c)d=c[a],f.isString(d)||(d=""+d,c[a]=d),d!==g[a]&&(b.cookies(a,d),e=!0);if(e)for(a in d=b.cookies(),c)c[a]!==d[a]&&(m(d[a])?delete c[a]:c[a]=d[a])});return c}]).factory("$cookieStore",
["$cookies",function(e){return{get:function(b){return(b=e[b])?f.fromJson(b):b},put:function(b,c){e[b]=f.toJson(c)},remove:function(b){delete e[b]}}}])})(window,window.angular);
//# sourceMappingURL=angular-cookies.min.js.map
/*
AngularJS v1.2.28
(c) 2010-2014 Google, Inc. http://angularjs.org
License: MIT
*/
(function(){'use strict';function d(a){return function(){var c=arguments[0],b,c="["+(a?a+":":"")+c+"] http://errors.angularjs.org/1.2.28/"+(a?a+"/":"")+c;for(b=1;b<arguments.length;b++)c=c+(1==b?"?":"&")+"p"+(b-1)+"="+encodeURIComponent("function"==typeof arguments[b]?arguments[b].toString().replace(/ \{[\s\S]*$/,""):"undefined"==typeof arguments[b]?"undefined":"string"!=typeof arguments[b]?JSON.stringify(arguments[b]):arguments[b]);return Error(c)}}(function(a){var c=d("$injector"),b=d("ng");a=a.angular||
(a.angular={});a.$$minErr=a.$$minErr||d;return a.module||(a.module=function(){var a={};return function(e,d,f){if("hasOwnProperty"===e)throw b("badname","module");d&&a.hasOwnProperty(e)&&(a[e]=null);return a[e]||(a[e]=function(){function a(c,d,e){return function(){b[e||"push"]([c,d,arguments]);return g}}if(!d)throw c("nomod",e);var b=[],h=[],k=a("$injector","invoke"),g={_invokeQueue:b,_runBlocks:h,requires:d,name:e,provider:a("$provide","provider"),factory:a("$provide","factory"),service:a("$provide",
"service"),value:a("$provide","value"),constant:a("$provide","constant","unshift"),animation:a("$animateProvider","register"),filter:a("$filterProvider","register"),controller:a("$controllerProvider","register"),directive:a("$compileProvider","directive"),config:k,run:function(a){h.push(a);return this}};f&&k(f);return g}())}}())})(window)})(window);
//# sourceMappingURL=angular-loader.min.js.map
/*
AngularJS v1.2.28
(c) 2010-2014 Google, Inc. http://angularjs.org
License: MIT
*/
(function(H,a,A){'use strict';function D(p,g){g=g||{};a.forEach(g,function(a,c){delete g[c]});for(var c in p)!p.hasOwnProperty(c)||"$"===c.charAt(0)&&"$"===c.charAt(1)||(g[c]=p[c]);return g}var v=a.$$minErr("$resource"),C=/^(\.[a-zA-Z_$][0-9a-zA-Z_$]*)+$/;a.module("ngResource",["ng"]).factory("$resource",["$http","$q",function(p,g){function c(a,c){this.template=a;this.defaults=c||{};this.urlParams={}}function t(n,w,l){function r(h,d){var e={};d=x({},w,d);s(d,function(b,d){u(b)&&(b=b());var k;if(b&&
b.charAt&&"@"==b.charAt(0)){k=h;var a=b.substr(1);if(null==a||""===a||"hasOwnProperty"===a||!C.test("."+a))throw v("badmember",a);for(var a=a.split("."),f=0,c=a.length;f<c&&k!==A;f++){var g=a[f];k=null!==k?k[g]:A}}else k=b;e[d]=k});return e}function e(a){return a.resource}function f(a){D(a||{},this)}var F=new c(n);l=x({},B,l);s(l,function(h,d){var c=/^(POST|PUT|PATCH)$/i.test(h.method);f[d]=function(b,d,k,w){var q={},n,l,y;switch(arguments.length){case 4:y=w,l=k;case 3:case 2:if(u(d)){if(u(b)){l=
b;y=d;break}l=d;y=k}else{q=b;n=d;l=k;break}case 1:u(b)?l=b:c?n=b:q=b;break;case 0:break;default:throw v("badargs",arguments.length);}var t=this instanceof f,m=t?n:h.isArray?[]:new f(n),z={},B=h.interceptor&&h.interceptor.response||e,C=h.interceptor&&h.interceptor.responseError||A;s(h,function(a,b){"params"!=b&&("isArray"!=b&&"interceptor"!=b)&&(z[b]=G(a))});c&&(z.data=n);F.setUrlParams(z,x({},r(n,h.params||{}),q),h.url);q=p(z).then(function(b){var d=b.data,k=m.$promise;if(d){if(a.isArray(d)!==!!h.isArray)throw v("badcfg",
h.isArray?"array":"object",a.isArray(d)?"array":"object");h.isArray?(m.length=0,s(d,function(b){"object"===typeof b?m.push(new f(b)):m.push(b)})):(D(d,m),m.$promise=k)}m.$resolved=!0;b.resource=m;return b},function(b){m.$resolved=!0;(y||E)(b);return g.reject(b)});q=q.then(function(b){var a=B(b);(l||E)(a,b.headers);return a},C);return t?q:(m.$promise=q,m.$resolved=!1,m)};f.prototype["$"+d]=function(b,a,k){u(b)&&(k=a,a=b,b={});b=f[d].call(this,b,this,a,k);return b.$promise||b}});f.bind=function(a){return t(n,
x({},w,a),l)};return f}var B={get:{method:"GET"},save:{method:"POST"},query:{method:"GET",isArray:!0},remove:{method:"DELETE"},"delete":{method:"DELETE"}},E=a.noop,s=a.forEach,x=a.extend,G=a.copy,u=a.isFunction;c.prototype={setUrlParams:function(c,g,l){var r=this,e=l||r.template,f,p,h=r.urlParams={};s(e.split(/\W/),function(a){if("hasOwnProperty"===a)throw v("badname");!/^\d+$/.test(a)&&(a&&RegExp("(^|[^\\\\]):"+a+"(\\W|$)").test(e))&&(h[a]=!0)});e=e.replace(/\\:/g,":");g=g||{};s(r.urlParams,function(d,
c){f=g.hasOwnProperty(c)?g[c]:r.defaults[c];a.isDefined(f)&&null!==f?(p=encodeURIComponent(f).replace(/%40/gi,"@").replace(/%3A/gi,":").replace(/%24/g,"$").replace(/%2C/gi,",").replace(/%20/g,"%20").replace(/%26/gi,"&").replace(/%3D/gi,"=").replace(/%2B/gi,"+"),e=e.replace(RegExp(":"+c+"(\\W|$)","g"),function(a,c){return p+c})):e=e.replace(RegExp("(/?):"+c+"(\\W|$)","g"),function(a,c,d){return"/"==d.charAt(0)?d:c+d})});e=e.replace(/\/+$/,"")||"/";e=e.replace(/\/\.(?=\w+($|\?))/,".");c.url=e.replace(/\/\\\./,
"/.");s(g,function(a,e){r.urlParams[e]||(c.params=c.params||{},c.params[e]=a)})}};return t}])})(window,window.angular);
//# sourceMappingURL=angular-resource.min.js.map
/* Include this file in your html if you are using the CSP mode. */
@charset "UTF-8";
[ng\:cloak], [ng-cloak], [data-ng-cloak], [x-ng-cloak],
.ng-cloak, .x-ng-cloak,
.ng-hide {
display: none !important;
}
ng\:form {
display: block;
}
.ng-animate-block-transitions {
transition:0s all!important;
-webkit-transition:0s all!important;
}
/* show the element during a show/hide animation when the
* animation is ongoing, but the .ng-hide class is active */
.ng-hide-add-active, .ng-hide-remove {
display: block!important;
}
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -87,8 +87,8 @@ function Final_UpdateTable( action ) { ...@@ -87,8 +87,8 @@ function Final_UpdateTable( action ) {
var UpdateTable = false var UpdateTable = false
if ( (action == "click" && !isCollapsed) || (action=="changerange" && isCollapsed) ) { if ( (action == "click" && !isCollapsed) || (action=="changerange" && isCollapsed) ) {
UpdateTable = true; UpdateTable = true;
$("#corpusdisplayer").html("Close Folder") $("#corpusdisplayer").html("View by titles")
} else $("#corpusdisplayer").html("Open Folder") } else $("#corpusdisplayer").html("View by titles")
pr("update table??: "+UpdateTable) pr("update table??: "+UpdateTable)
...@@ -230,9 +230,9 @@ $("#move2trash") ...@@ -230,9 +230,9 @@ $("#move2trash")
console.log(ids2trash) console.log(ids2trash)
$.ajax({ $.ajax({
url: "/tests/move2trash/", url : window.location.origin + "/api/nodes?ids="+ids2trash,
data: "nodeids="+JSON.stringify(ids2trash), //data: 'ids:'+JSON.stringify(ids2trash),
type: 'POST', type: 'DELETE',
beforeSend: function(xhr) { beforeSend: function(xhr) {
xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken")); xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
}, },
......
...@@ -113,7 +113,7 @@ function Final_UpdateTable( action ) { ...@@ -113,7 +113,7 @@ function Final_UpdateTable( action ) {
if ( (action == "click" && !isCollapsed) || (action=="changerange" && isCollapsed) ) { if ( (action == "click" && !isCollapsed) || (action=="changerange" && isCollapsed) ) {
UpdateTable = true; UpdateTable = true;
$("#corpusdisplayer").html("Close Folder") $("#corpusdisplayer").html("Close Folder")
} else $("#corpusdisplayer").html("Open Folder") } else $("#corpusdisplayer").html("View by journals")
pr("update table??: "+UpdateTable) pr("update table??: "+UpdateTable)
......
...@@ -334,7 +334,7 @@ function Final_UpdateTable( action ) { ...@@ -334,7 +334,7 @@ function Final_UpdateTable( action ) {
if ( (action == "click" && !isCollapsed) || (action=="changerange" && isCollapsed) ) { if ( (action == "click" && !isCollapsed) || (action=="changerange" && isCollapsed) ) {
UpdateTable = true; UpdateTable = true;
$("#corpusdisplayer").html("Close Term List") $("#corpusdisplayer").html("Close Term List")
} else $("#corpusdisplayer").html("Show Term List") } else $("#corpusdisplayer").html("View by terms")
pr("update table??: "+UpdateTable) pr("update table??: "+UpdateTable)
......
/srv/gargantext_lib/js/libs
\ No newline at end of file
/srv/gargantext_lib/js/settings_explorerjs.js
\ No newline at end of file
/srv/gargantext_lib/js/tinawebJS
\ No newline at end of file
This diff is collapsed.
...@@ -34,9 +34,11 @@ ...@@ -34,9 +34,11 @@
</center> </center>
</div> </div>
</div> </div>
<div class="row"> <div class="row">
<div id="monthly-volume-chart"></div> <div id="monthly-volume-chart"></div>
</div> </div>
<div id="content_loader"> <div id="content_loader">
<br> <br>
<center> <center>
...@@ -50,10 +52,9 @@ ...@@ -50,10 +52,9 @@
<div class="panel-heading"> <div class="panel-heading">
<h4 class="panel-title"> <h4 class="panel-title">
<a data-toggle="collapse" data-target="#journal_table" href="#"> <a data-toggle="collapse" data-parent="#accordion" href="#collapseOne">
<!-- Final_UpdateTable redraws the dynatable if necessary --> <!-- Final_UpdateTable redraws the dynatable if necessary -->
<p id="corpusdisplayer" onclick='Final_UpdateTable("click")' class="btn btn-primary btn-lg"> <p id="corpusdisplayer" onclick='Final_UpdateTable("click")' class="btn btn-primary btn-lg">Journals
Open Folder
</p> </p>
</a> </a>
</h4> </h4>
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment