Commit bfcabfad authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'unstable' into testing

parents 1a2a1006 55485f8e
......@@ -2,7 +2,7 @@
## Community
* [http://gargantext.org/about](http://gargantext.org/about)
* IRC Chat: (OFTC/FreeNode) #gargantex
* IRC Chat: (OFTC/FreeNode) #gargantext
##Tools
* gogs
......
......@@ -263,7 +263,7 @@ RESOURCETYPES = [
},
{ "type": 11,
"name": 'HAL [API]',
"name": 'HAL (english) [API]',
"parser": "HalParser",
"format": 'JSON',
'file_formats':["zip","json"],
......
from sqlalchemy.schema import Column, ForeignKey, UniqueConstraint, Index
from sqlalchemy.orm import relationship
from sqlalchemy.orm import relationship, validates
from sqlalchemy.types import TypeDecorator, \
Integer, Float, Boolean, DateTime, String, Text
from sqlalchemy.dialects.postgresql import JSONB, DOUBLE_PRECISION as Double
......@@ -7,6 +7,7 @@ from sqlalchemy.ext.mutable import MutableDict, MutableList
from sqlalchemy.ext.declarative import declarative_base
__all__ = ["Column", "ForeignKey", "UniqueConstraint", "relationship",
"validates", "ValidatorMixin",
"Integer", "Float", "Boolean", "DateTime", "String", "Text",
"TypeDecorator",
"JSONB", "Double",
......@@ -18,6 +19,25 @@ __all__ = ["Column", "ForeignKey", "UniqueConstraint", "relationship",
# all tables handled by Alembic migration scripts.
Base = declarative_base()
# To be used by tables already handled by Django ORM, such as User model. We
# separate them in order to keep those out of Alembic sight.
DjangoBase = declarative_base()
class ValidatorMixin(object):
def enforce_length(self, key, value):
"""Truncate a string according to its column length
Usage example:
.. code-block:: python
@validates('some_column')
def validate_some_column(self, key, value):
self.enforce_length(key, value)
"""
max_len = getattr(self.__class__, key).prop.columns[0].type.length
if value and len(value) > max_len:
return value[:max_len]
return value
......@@ -9,7 +9,7 @@ from datetime import datetime
from .base import Base, Column, ForeignKey, relationship, TypeDecorator, Index, \
Integer, Float, String, DateTime, JSONB, \
MutableList, MutableDict
MutableList, MutableDict, validates, ValidatorMixin
from .users import User
__all__ = ['Node', 'NodeNode', 'CorpusNode']
......@@ -26,7 +26,7 @@ class NodeType(TypeDecorator):
return NODETYPES[typeindex]
class Node(Base):
class Node(ValidatorMixin, Base):
"""This model can fit many purposes:
myFirstCorpus = session.query(CorpusNode).first()
......@@ -112,6 +112,10 @@ class Node(Base):
'user_id={0.user_id}, parent_id={0.parent_id}, ' \
'name={0.name!r}, date={0.date})>'.format(self)
@validates('name')
def validate_name(self, key, value):
return self.enforce_length(key, value)
@property
def ngrams(self):
"""Pseudo-attribute allowing to retrieve a node's ngrams.
......
......@@ -29,24 +29,33 @@ class HalCrawler(Crawler):
'''formating the query'''
#search_field="title_t"
search_field="abstract_t"
#search_field="abstract_t"
return (search_field + ":" + "(" + query + ")")
#return (search_field + ":" + "(" + query + ")")
return "(" + query + ")"
def _get(self, query, fromPage=1, count=10, lang=None):
# Parameters
fl = """ title_s
, abstract_s
fl = """ en_title_s
, en_title_s
, en_abstract_s
, submittedDate_s
, journalDate_s
, authFullName_s
, uri_s
, isbn_s
, issue_s
, journalTitle_s
, language_s
, doiId_s
, authId_i
, instStructId_i
, deptStructId_i
, labStructId_i
, rteamStructId_i
, docType_s
, journalPublisher_s
"""
#, authUrl_s
#, type_s
......@@ -113,7 +122,7 @@ class HalCrawler(Crawler):
msg = "Invalid sample size N = %i (max = %i)" % ( self.query_max
, QUERY_SIZE_N_MAX
)
print("ERROR (scrap: Multivac d/l ): " , msg)
print("ERROR (scrap: HAL d/l ): " , msg)
self.query_max = QUERY_SIZE_N_MAX
#for page in range(1, trunc(self.query_max / 100) + 2):
......
......@@ -2,6 +2,7 @@
import subprocess
import re
from .sparql import Service
from gargantext.settings import BOOL_TOOLS_PATH
#from sparql import Service
def bool2sparql(rawQuery, count=False, offset=None, limit=None):
......@@ -12,7 +13,7 @@ def bool2sparql(rawQuery, count=False, offset=None, limit=None):
See: https://github.com/delanoe/bool2sparql
"""
query = re.sub("\"", "\'", rawQuery)
bashCommand = ["/srv/gargantext/gargantext/util/crawlers/sparql/bool2sparql-exe","-q",query]
bashCommand = [BOOL_TOOLS_PATH + "/bool2sparql-exe","-q",query]
if count is True :
bashCommand.append("-c")
......
......@@ -5,15 +5,9 @@ from gargantext.util.json import json_dumps
########################################################################
# get engine, session, etc.
########################################################################
import sqlalchemy as sa
from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import delete
# To make Full Text search possible, uncomment lines below
# (and install it with pip before)
#from sqlalchemy_searchable import make_searchable
def get_engine():
from sqlalchemy import create_engine
return create_engine( settings.DATABASES['default']['URL']
......@@ -24,16 +18,8 @@ def get_engine():
engine = get_engine()
# To make Full Text search possible, uncomment lines below
# https://sqlalchemy-searchable.readthedocs.io/
#sa.orm.configure_mappers()
Base = declarative_base()
#Base.metadata.create_all(engine)
#make_searchable()
session = scoped_session(sessionmaker(bind=engine))
########################################################################
# useful for queries
########################################################################
......
......@@ -7,7 +7,7 @@ from gargantext.util.db import session, aliased
from gargantext.models import Ngram, NodeNgramNgram
from igraph import Graph # for group_union
def query_groups(groupings_id, details=False):
def query_groups(groupings_id, details=False, sort=False):
"""
Listing of couples (mainform, subform)
aka (ngram1_id, ngram2_id)
......@@ -15,24 +15,27 @@ def query_groups(groupings_id, details=False):
Parameter:
- details: if False, just send the array of couples
if True, send quadruplets with (ngram1_id, term1, ngram2_id, term2)
- sort: order results by terms of ngram1 then ngram2
"""
if details or sort:
Ngram1, Ngram2 = Ngram, aliased(Ngram)
if not details:
# simple contents
query = session.query(NodeNgramNgram.ngram1_id, NodeNgramNgram.ngram2_id)
columns = (NodeNgramNgram.ngram1_id, NodeNgramNgram.ngram2_id)
else:
# detailed contents (id + terms)
Ngram1 = aliased(Ngram)
Ngram2 = aliased(Ngram)
query = (session
.query(
NodeNgramNgram.ngram1_id,
Ngram1.terms,
NodeNgramNgram.ngram2_id,
Ngram2.terms,
)
.join(Ngram1, NodeNgramNgram.ngram1_id == Ngram1.id)
.join(Ngram2, NodeNgramNgram.ngram2_id == Ngram2.id)
)
columns = (Ngram1.id, Ngram1.terms,
Ngram2.id, Ngram2.terms)
query = session.query(*columns)
if details or sort:
query = (query.join(Ngram1, NodeNgramNgram.ngram1_id == Ngram1.id)
.join(Ngram2, NodeNgramNgram.ngram2_id == Ngram2.id))
if sort:
query = query.order_by(Ngram1.terms, Ngram2.terms)
# main filter
# -----------
......
......@@ -73,7 +73,8 @@ from rest_framework.views import APIView
from gargantext.util.json import json_encoder
def JsonHttpResponse(data, status=200):
return HttpResponse(
content = json_encoder.encode(data),
content = data.encode('utf-8') if isinstance(data, str) else \
json_encoder.encode(data),
content_type = 'application/json; charset=utf-8',
status = status
)
......
......@@ -50,6 +50,9 @@ class _BaseClass:
else:
return NotImplemented
def __len__(self):
return len(self.items)
def __repr__(self):
items = self.items
if isinstance(items, defaultdict):
......
......@@ -8,8 +8,7 @@ Tools to work with ngramlists (MAINLIST, MAPLIST, STOPLIST)
"""
from gargantext.util.group_tools import query_groups, group_union
from gargantext.util.db import session, desc, func, \
bulk_insert_ifnotexists
from gargantext.util.db import session, bulk_insert_ifnotexists
from gargantext.models import Ngram, NodeNgram, NodeNodeNgram, \
NodeNgramNgram, Node
......@@ -25,7 +24,6 @@ from gargantext.util.toolchain.ngrams_extraction import normalize_forms
# merge will also index the new ngrams in the docs of the corpus
from gargantext.util.toolchain.ngrams_addition import index_new_ngrams
from sqlalchemy.sql import exists
from os import path
from csv import writer, reader, QUOTE_MINIMAL
from collections import defaultdict
......@@ -35,8 +33,8 @@ from celery import shared_task
def query_list(list_id,
pagination_limit=None, pagination_offset=None,
details=False, scoring_metric_id=None, groupings_id=None
):
details=False, scoring_metric_id=None, groupings_id=None,
sort=False):
"""
Paginated listing of ngram_ids in a NodeNgram lists.
......@@ -51,6 +49,7 @@ def query_list(list_id,
(for details and sorting)
- groupings_id: optional id of a list of grouping relations (synonyms)
(each synonym will be added to the list if not already in there)
- sort: order by Ngram.terms (not possible if details is False)
FIXME: subforms appended recently and not generalized enough
=> add a common part for all "if groupings_id"
......@@ -114,7 +113,10 @@ def query_list(list_id,
query = query.limit(pagination_limit)
if pagination_offset:
query = query.offset(pagination_offsets)
query = query.offset(pagination_offset)
if details and sort:
query = query.order_by(Ngram.terms)
return query
......@@ -175,9 +177,7 @@ def ngrams_to_csv_rows(ngram_objs, ngram_dico={}, group_infos={},
# 3 columns = |status, | mainform, | forms
# (type_of_list) ( term ) ( subterm1|&|subterm2 )
csv_rows.append(
[list_type,ng_obj.terms,this_grouped_terms]
)
csv_rows.append([list_type, ng_obj.terms, this_grouped_terms])
return csv_rows
......@@ -220,9 +220,10 @@ def export_ngramlists(node,fname=None,delimiter=DEFAULT_CSV_DELIM,titles=True):
# listes de ngram_ids correspondantes
# ------------------------------------
# contenu: liste des objets ngrammes [(2562,"monterme",1),...]
stop_ngrams = query_list(stoplist_node.id, details=True, groupings_id=group_node.id).all()
main_ngrams = query_list(mainlist_node.id, details=True, groupings_id=group_node.id).all()
map_ngrams = query_list(maplist_node.id, details=True, groupings_id=group_node.id).all()
stop_ngrams, main_ngrams, map_ngrams = (
query_list(n.id, details=True, groupings_id=group_node.id, sort=True).all()
for n in (stoplist_node, mainlist_node, maplist_node)
)
# pour debug ---------->8 --------------------
#~ stop_ngrams = stop_ngrams[0:10]
......@@ -239,7 +240,7 @@ def export_ngramlists(node,fname=None,delimiter=DEFAULT_CSV_DELIM,titles=True):
# for the groups we got couples of ids in the DB
# -------------------
# ex: [(3544, 2353), (2787, 4032), ...]
group_ngram_id_couples = query_groups(group_node.id).all()
group_ngram_id_couples = query_groups(group_node.id, sort=True)
# we expend this to double structure for groups lookup
# 1) g['links'] = k couples (x,y_i) as a set [x => {y1,y2}]
......@@ -386,6 +387,9 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM,
NB: To merge the imported lists into a corpus node's lists,
chain this function with merge_ngramlists()
'''
list_types = ['stop','main','map']
# ---------------
# ngram storage
# ---------------
......@@ -450,7 +454,6 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM,
# headers
if i == 0:
n_cols = len(csv_row)
for j, colname in enumerate(csv_row):
if colname in ['label', 'status', 'forms']:
columns[colname] = j
......@@ -497,31 +500,30 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM,
continue
# --- check correct list type
if not this_list_type in ['stop','main','map']:
if not this_list_type in list_types:
print("IMPORT WARN: (skip line) wrong list type at CSV %s:l.%i" % (fname, i))
continue
# subforms can be duplicated (in forms and another label)
# but we must take care of unwanted other duplicates too
if this_row_label in imported_unique_ngramstrs:
print("TODO IMPORT DUPL: (skip line) term appears more than once at CSV %s:l.%i"
% (fname, i))
if imported_unique_ngramstrs.get(this_row_label) == 1:
print("TODO IMPORT DUPL: (skip line) term %r appears more than once at CSV %s:l.%i"
% (this_row_label, fname, i))
# ================= Store the data ====================
# the ngram census
imported_unique_ngramstrs[this_row_label] = True
imported_unique_ngramstrs[this_row_label] = 1
# and the "list to ngram" relation
imported_nodes_ngrams[this_list_type].append(this_row_label)
# ====== Store synonyms from the import (if any) ======
if len(this_row_forms) != 0:
other_terms = []
for raw_term_str in this_row_forms.split(group_delimiter):
# each subform is also like an ngram declaration
term_str = normalize_forms(normalize_chars(raw_term_str))
imported_unique_ngramstrs[term_str] = True
imported_unique_ngramstrs[term_str] = 2
imported_nodes_ngrams[this_list_type].append(term_str)
# the optional repeated mainform doesn't interest us
......@@ -599,7 +601,10 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM,
% (n_total_ng, n_added_ng, n_total_ng-n_added_ng) )
print("IMPORT: read %i grouping relations" % n_group_relations)
# print("IMPORT RESULT", result)
list_counts = [(typ, len(result.get(typ))) for typ in list_types]
list_counts.append(('total', sum(x[1] for x in list_counts)))
print("IMPORT: " + '; '.join('%s %s' % stats for stats in list_counts))
return result
def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
......@@ -707,9 +712,11 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
# ======== Merging all involved ngrams =========
# all memberships with resolved conflicts of interfering memberships
# all ngram memberships with resolved conflicts of interfering memberships
# (associates ngram ids with list types -- see linfos definition above)
resolved_memberships = {}
# iterates over each ngram of each list type for both old and new lists
for list_set in [old_lists, new_lists]:
for lid, info in enumerate(linfos):
list_type = info['key']
......@@ -738,12 +745,15 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
# ======== Merging old and new groups =========
# get the arcs already in the target DB (directed couples)
previous_links = session.query(
NodeNgramNgram.ngram1_id,
NodeNgramNgram.ngram2_id
).filter(
NodeNgramNgram.node_id == old_group_id
).all()
if 'groupings' in del_originals:
previous_links = []
else:
previous_links = session.query(
NodeNgramNgram.ngram1_id,
NodeNgramNgram.ngram2_id
).filter(
NodeNgramNgram.node_id == old_group_id
).all()
n_links_previous = len(previous_links)
......@@ -811,7 +821,7 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
list_type = linfos[lid]['key']
merged_results[list_type].items.add(ng_id)
# print("IMPORT: added %i elements in the lists indices" % added_nd_ng)
print("IMPORT: added %i elements in the lists indices" % added_nd_ng)
# ======== Overwrite old data with new =========
for lid, info in enumerate(linfos):
......@@ -834,13 +844,17 @@ def import_and_merge_ngramlists(file_contents, onto_corpus_id, overwrite=False):
"""
A single function to run import_ngramlists and merge_ngramlists together
"""
print("import list")
print("IMPORT CSV termlists file with %s lines in corpus %s (%s)" % (
len(file_contents),
onto_corpus_id, 'overwrite' if overwrite else 'merge'))
new_lists = import_ngramlists(file_contents)
corpus_node = session.query(Node).filter(Node.id == onto_corpus_id).first()
corpus_node = session.query(Node).get(onto_corpus_id)
# merge the new_lists onto those of the target corpus
del_originals = ['stop', 'main', 'map'] if overwrite else []
del_originals = ['stop', 'main', 'map', 'groupings'] if overwrite else []
log_msg = merge_ngramlists(new_lists, onto_corpus=corpus_node, del_originals=del_originals)
return log_msg
......@@ -4,128 +4,67 @@ import sys
import csv
csv.field_size_limit(sys.maxsize)
import numpy as np
import os
class CSVParser(Parser):
DELIMITERS = ", \t;|:"
def CSVsample( self, small_contents , delim) :
reader = csv.reader(small_contents, delimiter=delim)
def detect_delimiter(self, lines, sample_size=10):
sample = lines[:sample_size]
Freqs = []
for row in reader:
Freqs.append(len(row))
# Compute frequency of each delimiter on each input line
delimiters_freqs = {
d: [line.count(d) for line in sample]
for d in self.DELIMITERS
}
return Freqs
# Select delimiters with a standard deviation of zero, ie. delimiters
# for which we have the same number of fields on each line
selected_delimiters = [
(d, np.sum(freqs))
for d, freqs in delimiters_freqs.items()
if any(freqs) and np.std(freqs) == 0
]
if selected_delimiters:
# Choose the delimiter with highest frequency amongst selected ones
sorted_delimiters = sorted(selected_delimiters, key=lambda x: x[1])
return sorted_delimiters[-1][0]
def parse(self, filebuf):
print("CSV: parsing (assuming UTF-8 and LF line endings)")
contents = filebuf.read().decode("UTF-8").split("\n")
sample_size = 10
sample_contents = contents[0:sample_size]
hyperdata_list = []
# # = = = = [ Getting delimiters frequency ] = = = = #
PossibleDelimiters = [ ',',' ','\t', ';', '|', ':' ]
AllDelimiters = {}
for delim in PossibleDelimiters:
AllDelimiters[delim] = self.CSVsample( sample_contents , delim )
# # = = = = [ / Getting delimiters frequency ] = = = = #
# # OUTPUT example:
# # AllDelimiters = {
# # '\t': [1, 1, 1, 1, 1],
# # ' ': [1, 13, 261, 348, 330],
# # ',': [15, 15, 15, 15, 15],
# # ';': [1, 1, 1, 1, 1],
# # '|': [1, 1, 1, 1, 1]
# # }
# # = = = = [ Stand.Dev=0 & Sum of delimiters ] = = = = #
Delimiters = []
for d in AllDelimiters:
freqs = AllDelimiters[d]
suma = np.sum( freqs )
if suma >0:
std = np.std( freqs )
# print [ d , suma , len(freqs) , std]
if std == 0:
Delimiters.append ( [ d , suma , len(freqs) , std] )
# # = = = = [ / Stand.Dev=0 & Sum of delimiters ] = = = = #
# # OUTPUT example:
# # Delimiters = [
# # ['\t', 5, 5, 0.0],
# # [',', 75, 5, 0.0],
# # ['|', 5, 5, 0.0]
# # ]
# # = = = = [ Delimiter selection ] = = = = #
Sorted_Delims = sorted(Delimiters, key=lambda x: x[1], reverse=True)
HighestDelim = Sorted_Delims[0][0]
# HighestDelim = ","
print("CSV selected delimiter:",[HighestDelim])
# # = = = = [ / Delimiter selection ] = = = = #
# # = = = = [ First data coordinate ] = = = = #
Coords = {
"row": -1,
"column": -1
}
# Filter out empty lines
contents = [line for line in contents if line.strip()]
# Delimiter auto-detection
delimiter = self.detect_delimiter(contents, sample_size=10)
if delimiter is None:
raise ValueError("CSV: couldn't detect delimiter, bug or malformed data")
print("CSV: selected delimiter: %r" % delimiter)
# Parse CSV
reader = csv.reader(contents, delimiter=delimiter)
# Get first not empty row and its fields (ie. header row), or (0, [])
first_row, headers = \
next(((i, fields) for i, fields in enumerate(reader) if any(fields)),
(0, []))
# Get first not empty column of the first row, or 0
first_col = next((i for i, field in enumerate(headers) if field), 0)
# Strip out potential empty fields in headers
headers = headers[first_col:]
reader = csv.reader(contents, delimiter=HighestDelim)
for rownum, tokens in enumerate(reader):
if rownum % 250 == 0:
print("CSV row: ", rownum)
joined_tokens = "".join (tokens)
if Coords["row"]<0 and len( joined_tokens )>0 :
Coords["row"] = rownum
for columnum in range(len(tokens)):
t = tokens[columnum]
if len(t)>0:
Coords["column"] = columnum
break
# # = = = = [ / First data coordinate ] = = = = #
# # = = = = [ Setting Headers ] = = = = #
Headers_Int2Str = {}
reader = csv.reader(contents, delimiter=HighestDelim)
for rownum, tokens in enumerate(reader):
if rownum>=Coords["row"]:
for columnum in range( Coords["column"],len(tokens) ):
t = tokens[columnum]
Headers_Int2Str[columnum] = t
break
# print("Headers_Int2Str")
# print(Headers_Int2Str)
# # = = = = [ / Setting Headers ] = = = = #
# # OUTPUT example:
# # Headers_Int2Str = {
# # 0: 'publication_date',
# # 1: 'publication_month',
# # 2: 'publication_second',
# # 3: 'abstract'
# # }
# # = = = = [ Reading the whole CSV and saving ] = = = = #
hyperdata_list = []
reader = csv.reader(contents, delimiter=HighestDelim)
for rownum, tokens in enumerate(reader):
if rownum>Coords["row"]:
RecordDict = {}
for columnum in range( Coords["column"],len(tokens) ):
data = tokens[columnum]
RecordDict[ Headers_Int2Str[columnum] ] = data
if len(RecordDict.keys())>0:
hyperdata_list.append( RecordDict )
# # = = = = [ / Reading the whole CSV and saving ] = = = = #
return hyperdata_list
# Return a generator of dictionaries with column labels as keys,
# filtering out empty rows
for i, fields in enumerate(reader):
if i % 500 == 0:
print("CSV: parsing row #%s..." % (i+1))
if any(fields):
yield dict(zip(headers, fields[first_col:]))
......@@ -11,25 +11,26 @@ from datetime import datetime
import json
class HalParser(Parser):
def parse(self, filebuf):
'''
parse :: FileBuff -> [Hyperdata]
'''
contents = filebuf.read().decode("UTF-8")
data = json.loads(contents)
def _parse(self, json_docs):
filebuf.close()
json_docs = data
hyperdata_list = []
hyperdata_path = { "id" : "isbn_s"
, "title" : "title_s"
, "abstract" : "abstract_s"
, "source" : "journalPublisher_s"
, "url" : "uri_s"
, "authors" : "authFullName_s"
hyperdata_path = { "id" : "isbn_s"
, "title" : "en_title_s"
, "abstract" : "en_abstract_s"
, "source" : "journalTitle_s"
, "url" : "uri_s"
, "authors" : "authFullName_s"
, "isbn_s" : "isbn_s"
, "issue_s" : "issue_s"
, "language_s" : "language_s"
, "doiId_s" : "doiId_s"
, "authId_i" : "authId_i"
, "instStructId_i" : "instStructId_i"
, "deptStructId_i" : "deptStructId_i"
, "labStructId_i" : "labStructId_i"
, "rteamStructId_i" : "rteamStructId_i"
, "docType_s" : "docType_s"
}
uris = set()
......@@ -42,9 +43,9 @@ class HalParser(Parser):
field = doc.get(path, "NOT FOUND")
if isinstance(field, list):
hyperdata[key] = ", ".join(field)
hyperdata[key] = ", ".join(map(lambda x: str(x), field))
else:
hyperdata[key] = field
hyperdata[key] = str(field)
if hyperdata["url"] in uris:
print("Document already parsed")
......@@ -73,3 +74,13 @@ class HalParser(Parser):
hyperdata_list.append(hyperdata)
return hyperdata_list
def parse(self, filebuf):
'''
parse :: FileBuff -> [Hyperdata]
'''
contents = filebuf.read().decode("UTF-8")
data = json.loads(contents)
return self._parse(data)
......@@ -81,44 +81,45 @@ def extract_ngrams(corpus, keys=DEFAULT_INDEX_FIELDS, do_subngrams = DEFAULT_IND
corpus.hyperdata["skipped_docs"].append(document.id)
corpus.save_hyperdata()
continue
else:
# ready !
tagger = tagger_bots[language_iso2]
# to do verify if document has no KEYS to index
# eg: use set intersect (+ loop becomes direct! with no continue)
for key in keys:
try:
value = document.hyperdata[str(key)]
if not isinstance(value, str):
#print("DBG wrong content in doc for key", key)
continue
# get ngrams
for ngram in tagger.extract(value):
tokens = tuple(normalize_forms(token[0]) for token in ngram)
if do_subngrams:
# ex tokens = ["very", "cool", "exemple"]
# subterms = [['very', 'cool'],...]
subterms = subsequences(tokens)
else:
subterms = [tokens]
for seqterm in subterms:
ngram = ' '.join(seqterm)
nbwords = len(seqterm)
nbchars = len(ngram)
if nbchars > 1:
if nbchars > 255:
# max ngram length (DB constraint)
ngram = ngram[:255]
# doc <=> ngram index
nodes_ngrams_count[(document.id, ngram)] += 1
# add fields : terms n
ngrams_data.add((ngram, nbwords, ))
except:
#value not in doc
# ready !
tagger = tagger_bots[language_iso2]
# to do verify if document has no KEYS to index
# eg: use set intersect (+ loop becomes direct! with no continue)
for key in keys:
try:
value = document.hyperdata[str(key)]
if not isinstance(value, str):
#print("DBG wrong content in doc for key", key)
continue
# get ngrams
for ngram in tagger.extract(value):
normal_forms = (normalize_forms(t[0]) for t in ngram)
tokens = tuple(nf for nf in normal_forms if nf)
if do_subngrams:
# ex tokens = ["very", "cool", "exemple"]
# subterms = [['very', 'cool'],...]
subterms = subsequences(tokens)
else:
subterms = [tokens]
for seqterm in subterms:
ngram = ' '.join(seqterm)
nbwords = len(seqterm)
nbchars = len(ngram)
if nbchars > 1:
if nbchars > 255:
# max ngram length (DB constraint)
ngram = ngram[:255]
# doc <=> ngram index
nodes_ngrams_count[(document.id, ngram)] += 1
# add fields : terms n
ngrams_data.add((ngram, nbwords, ))
except:
#value not in doc
continue
# integrate ngrams and nodes-ngrams
if len(nodes_ngrams_count) >= BATCH_NGRAMSEXTRACTION_SIZE:
......
......@@ -155,7 +155,12 @@ class CSVLists(APIView):
try:
# merge the source_lists onto those of the target corpus
delete = todo_lists if bool(params.get('overwrite')) else []
if len(delete) == len(list_types):
delete.append('groupings')
log_msg = merge_ngramlists(source_lists, onto_corpus=corpus_node, del_originals=delete)
return JsonHttpResponse({
'log': log_msg,
}, 200)
......
from django.conf.urls import url
from rest_framework_jwt.views import obtain_jwt_token
from . import nodes
from . import projects
from . import corpora
......@@ -10,78 +12,81 @@ from . import ngramlists
from . import analytics
from graph.rest import Graph
urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view() )
, url(r'^nodes/(\d+)$' , nodes.NodeResource.as_view() )
, url(r'^nodes/(\d+)/having$' , nodes.NodeListHaving.as_view() )
, url(r'^nodes/(\d+)/status$' , nodes.Status.as_view() )
#Projects
, url(r'^projects$' , projects.ProjectList.as_view() )
, url(r'^projects/(\d+)$' , projects.ProjectView.as_view() )
#?view=resource
#?view=docs
#Corpora
, url(r'^projects/(\d+)/corpora/(\d+)$' , corpora.CorpusView.as_view() )
#?view=source
#?view=title
#?view=analytics
#Sources
#, url(r'^projects/(\d+)/corpora/(\d+)/sources$' , corpora.CorpusSources.as_view() )
#, url(r'^projects/(\d+)/corpora/(\d+)/sources/(\d+)$' , corpora.CorpusSourceView.as_view() )
#Facets
, url(r'^projects/(\d+)/corpora/(\d+)/facets$' , nodes.CorpusFacet.as_view() )
#Favorites
, url(r'^projects/(\d+)/corpora/(\d+)/favorites$', nodes.CorpusFavorites.as_view() )
#Metrics
, url(r'^projects/(\d+)/corpora/(\d+)/metrics$', metrics.CorpusMetrics.as_view() )
#GraphExplorer
, url(r'^projects/(\d+)/corpora/(\d+)/explorer$' , Graph.as_view())
urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view())
, url(r'^nodes/(\d+)$' , nodes.NodeResource.as_view())
, url(r'^nodes/(\d+)/having$' , nodes.NodeListHaving.as_view())
, url(r'^nodes/(\d+)/status$' , nodes.Status.as_view())
# Projects
, url(r'^projects$' , projects.ProjectList.as_view())
, url(r'^projects/(\d+)$' , projects.ProjectView.as_view())
# Corpora
, url(r'^projects/(\d+)/corpora/(\d+)$', corpora.CorpusView.as_view())
# Sources
#, url(r'^projects/(\d+)/corpora/(\d+)/sources$', corpora.CorpusSources.as_view())
#, url(r'^projects/(\d+)/corpora/(\d+)/sources/(\d+)$ , corpora.CorpusSourceView.as_view())
# Facets
, url(r'^projects/(\d+)/corpora/(\d+)/facets$', nodes.CorpusFacet.as_view())
# Favorites
, url(r'^projects/(\d+)/corpora/(\d+)/favorites$', nodes.CorpusFavorites.as_view())
# Metrics
, url(r'^projects/(\d+)/corpora/(\d+)/metrics$', metrics.CorpusMetrics.as_view())
# GraphExplorer
, url(r'^projects/(\d+)/corpora/(\d+)/explorer$', Graph.as_view())
# data for graph explorer (json)
# GET /api/projects/43198/corpora/111107/explorer?
# Corresponding view is : /projects/43198/corpora/111107/explorer?
# Parameters (example):
# explorer?field1=ngrams&field2=ngrams&distance=conditional&bridgeness=5&start=1996-6-1&end=2002-10-5
# Ngrams
, url(r'^ngrams/?$' , ngrams.ApiNgrams.as_view() )
, url(r'^ngrams/?$' , ngrams.ApiNgrams.as_view())
# Analytics
, url(r'^nodes/(\d+)/histories$', analytics.NodeNgramsQueries.as_view())
, url(r'hyperdata$' , analytics.ApiHyperdata.as_view() )
, url(r'hyperdata$' , analytics.ApiHyperdata.as_view())
# get a list of ngram_ids or ngram_infos by list_id
# url(r'^ngramlists/(\d+)$', ngramlists.List.as_view()),
, url(r'^nodes/(\d+)/facets$' , nodes.CorpusFacet.as_view() )
, url(r'^nodes/(\d+)/favorites$', nodes.CorpusFavorites.as_view() )
# in these two routes the node is supposed to be a *corpus* node
, url(r'^nodes/(\d+)/facets$' , nodes.CorpusFacet.as_view())
, url(r'^nodes/(\d+)/favorites$', nodes.CorpusFavorites.as_view())
# in these two routes the node is supposed to be a *corpus* node
, url(r'^metrics/(\d+)$', metrics.CorpusMetrics.as_view() )
, url(r'^metrics/(\d+)$' , metrics.CorpusMetrics.as_view())
# update all metrics for a corpus
# ex: PUT metrics/123
# \
# corpus id
, url(r'^ngramlists/export$', ngramlists.CSVLists.as_view() )
, url(r'^ngramlists/export$', ngramlists.CSVLists.as_view())
# get a CSV export of the ngramlists of a corpus
# ex: GET ngramlists/export?corpus=43
# TODO : unify to a /api/ngrams?formatted=csv
# (similar to /api/nodes?formatted=csv)
, url(r'^ngramlists/import$', ngramlists.CSVLists.as_view() )
, url(r'^ngramlists/import$', ngramlists.CSVLists.as_view())
# same handling class as export (CSVLists)
# but this route used only for POST + file
# or PATCH + other corpus id
, url(r'^ngramlists/change$', ngramlists.ListChange.as_view() )
, url(r'^ngramlists/change$', ngramlists.ListChange.as_view())
# add or remove ngram from a list
# ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2
# rm <=> DEL ngramlists/change?list=42&ngrams=1,2
, url(r'^ngramlists/groups$', ngramlists.GroupChange.as_view() )
, url(r'^ngramlists/groups$', ngramlists.GroupChange.as_view())
# modify grouping couples of a group node
# ex: PUT/DEL ngramlists/groups?node=43
# & group data also in url: 767[]=209,640 & 779[]=436,265,385
, url(r'^ngramlists/family$' , ngramlists.ListFamily.as_view() )
, url(r'^ngramlists/family$', ngramlists.ListFamily.as_view())
# entire combination of lists from a corpus, dedicated to termtable
# (or any combination of lists that go together :
# - a mainlist
......@@ -89,8 +94,11 @@ urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view()
# - an optional maplist
# - an optional grouplist
, url(r'^ngramlists/maplist$' , ngramlists.MapListGlance.as_view() )
, url(r'^ngramlists/maplist$', ngramlists.MapListGlance.as_view())
# fast access to maplist, similarly formatted for termtable
, url(r'^user/parameters/$', users.UserParameters.as_view())
, url(r'^user/parameters/$', users.UserParameters.as_view())
, url('^auth/token$', obtain_jwt_token)
]
......@@ -11,6 +11,7 @@ django-celery==3.2.1
django-pgfields==1.4.4
django-pgjsonb==0.0.23
djangorestframework==3.5.3
djangorestframework-jwt==1.9.0
html5lib==0.9999999
python-igraph>=0.7.1
jdatetime==1.7.2
......
......@@ -16,7 +16,7 @@ sudo docker run \
--env POSTGRES_HOST=localhost \
-v /srv/gargantext:/srv/gargantext \
-it garg-notebook:latest \
/bin/bash -c "/bin/su notebooks -c 'source /env_3-5/bin/activate && cd /srv/gargantext/ && jupyter notebook --port=8899 --ip=0.0.0.0 --no-browser'"
/bin/bash -c "/bin/su notebooks -c 'source /env_3-5/bin/activate && cd /home/notebooks && jupyter notebook --port=8899 --ip=0.0.0.0 --no-browser'"
# #&& jupyter nbextension enable --py widgetsnbextension --sys-prefix
#/bin/bash -c "/bin/su notebooks -c 'source /env_3-5/bin/activate && cd /srv/gargantext/ && jupyter notebook --port=8899 --ip=0.0.0.0 --no-browser --notebook-dir=/home/notebooks/'"
......
......@@ -78,32 +78,8 @@ RUN . /env_3-5/bin/activate && pip3 install -r requirements.txt
#RUN ./psql_configure.sh
#RUN ./django_configure.sh
RUN chown notebooks:notebooks -R /env_3-5
########################################################################
### Notebook IHaskell and IPYTHON ENVIRONNEMENT
########################################################################
#RUN apt-get update && apt-get install -y \
# libtinfo-dev \
# libzmq3-dev \
# libcairo2-dev \
# libpango1.0-dev \
# libmagic-dev \
# libblas-dev \
# liblapack-dev
#RUN curl -sSL https://get.haskellstack.org/ | sh
#RUN stack setup
#RUN git clone https://github.com/gibiansky/IHaskell
#RUN . /env_3-5/bin/activate \
# && cd IHaskell \
# && stack install gtk2hs-buildtools \
# && stack install --fast \
# && /root/.local/bin/ihaskell install --stack
#
#
########################################################################
### POSTGRESQL DATA (as ROOT)
########################################################################
......@@ -115,3 +91,32 @@ RUN chown notebooks:notebooks -R /env_3-5
EXPOSE 5432 8899
VOLUME ["/srv/","/home/notebooks/"]
########################################################################
### Notebook IHaskell and IPYTHON ENVIRONNEMENT
########################################################################
RUN apt-get update && apt-get install -y \
libtinfo-dev \
libzmq3-dev \
libcairo2-dev \
libpango1.0-dev \
libmagic-dev \
libblas-dev \
liblapack-dev
USER notebooks
RUN cd /home/notebooks \
&& curl -sSL https://get.haskellstack.org/ | sh \
&& stack setup \
&& git clone https://github.com/gibiansky/IHaskell \
&& . /env_3-5/bin/activate \
&& cd IHaskell \
&& stack install gtk2hs-buildtools \
&& stack install --fast \
&& /root/.local/bin/ihaskell install --stack
#!/usr/bin/env python
"""
Gargantext Software Copyright (c) 2016-2017 CNRS ISC-PIF -
http://iscpif.fr
......@@ -6,45 +7,29 @@ http://gitlab.iscpif.fr/humanities/gargantext/blob/stable/LICENSE )
- In France : a CECILL variant affero compliant
- GNU aGPLV3 for all other countries
"""
#!/usr/bin/env python
import sys
import os
import os
import django
# Django settings
dirname = os.path.dirname(os.path.realpath(__file__))
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext.settings")
# initialize Django application
from django.core.wsgi import get_wsgi_application
application = get_wsgi_application()
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'gargantext.settings')
django.setup()
from gargantext.util.toolchain.main import parse_extract_indexhyperdata
from gargantext.util.db import *
from gargantext.models import Node
from gargantext.constants import QUERY_SIZE_N_MAX, get_resource, get_resource_by_name
from gargantext.models import ProjectNode, DocumentNode, UserNode, User
from gargantext.util.db import session, get_engine
from collections import Counter
import importlib
from django.http import Http404
from nltk.tokenize import wordpunct_tokenize
from gargantext.models import *
from nltk.tokenize import word_tokenize
import nltk as nltk
from statistics import mean
from math import log
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
import datetime
class NotebookError(Exception):
pass
from collections import Counter
from langdetect import detect as detect_lang
def documents(corpus_id):
return (session.query(Node).filter( Node.parent_id==corpus_id
, Node.typename=="DOCUMENT"
)
# .order_by(Node.hyperdata['publication_date'])
.all()
)
return (session.query(DocumentNode).filter_by(parent_id=corpus_id)
#.order_by(Node.hyperdata['publication_date'])
.all())
#import seaborn as sns
......@@ -63,13 +48,134 @@ def scan_hal(request):
hal = HalCrawler()
return hal.scan_results(request)
def scan_gargantext(corpus_id, lang, request):
connection = get_engine().connect()
# TODO add some sugar the request (ideally request should be the same for hal and garg)
query = """select count(n.id) from nodes n
where to_tsvector('%s', hyperdata ->> 'abstract' || 'title')
where to_tsvector('%s', hyperdata ->> 'abstract' || 'title')
@@ to_tsquery('%s')
AND n.parent_id = %s;""" % (lang, request, corpus_id)
AND n.parent_id = %s;""" % (lang, request, corpus_id)
return [i for i in connection.execute(query)][0][0]
connection.close()
def myProject_fromUrl(url):
"""
myProject :: String -> Project
"""
project_id = url.split("/")[4]
project = session.query(ProjectNode).get(project_id)
return project
def newCorpus(project, source, name=None, query=None):
error = False
if name is None:
name = query
if not isinstance(project, ProjectNode):
error = "a valid project"
if not isinstance(source, int) and not isinstance(source, str):
error = "a valid source identifier: id or name"
elif not isinstance(query, str):
error = "a valid query"
elif not isinstance(name, str):
error = "a valid name"
if error:
raise NotebookError("Please provide %s." % error)
resource = get_resource(source) if isinstance(source, int) else \
get_resource_by_name(source)
moissonneur_name = get_moissonneur_name(resource) if resource else \
source.lower()
try:
moissonneur = get_moissonneur(moissonneur_name)
except ImportError:
raise NotebookError("Invalid source identifier: %r" % source)
return run_moissonneur(moissonneur, project, name, query)
def get_moissonneur_name(ident):
""" Return moissonneur module name from RESOURCETYPE or crawler name """
# Does it quacks like a RESOURCETYPE ?
if hasattr(ident, 'get'):
ident = ident.get('crawler')
# Extract name from crawler class name, otherwise assume ident is already
# a moissonneur name.
if isinstance(ident, str) and ident.endswith('Crawler'):
return ident[:-len('Crawler')].lower()
def get_moissonneur(name):
""" Return moissonneur module from its name """
if not isinstance(name, str) or not name.islower():
raise NotebookError("Invalid moissonneur name: %r" % name)
module = importlib.import_module('moissonneurs.%s' % name)
module.name = name
return module
def run_moissonneur(moissonneur, project, name, query):
""" Run moissonneur and return resulting corpus """
# XXX Uber-kludge with gory details. Spaghetti rulezzzzz!
class Dummy(object):
pass
request = Dummy()
request.method = 'POST'
request.path = 'nowhere'
request.META = {}
# XXX 'string' only have effect on moissonneurs.pubmed; its value is added
# when processing request client-side, take a deep breath and see
# templates/projects/project.html for more details.
request.POST = {'string': name,
'query': query,
'N': QUERY_SIZE_N_MAX}
request.user = Dummy()
request.user.id = project.user_id
request.user.is_authenticated = lambda: True
if moissonneur.name == 'istex':
# Replace ALL spaces by plus signs
request.POST['query'] = '+'.join(filter(None, query.split(' ')))
try:
import json
r = moissonneur.query(request)
raw_json = r.content.decode('utf-8')
data = json.loads(raw_json)
if moissonneur.name == 'pubmed':
count = sum(x['count'] for x in data)
request.POST['query'] = raw_json
elif moissonneur.name == 'istex':
count = data.get('total', 0)
else:
count = data.get('results_nb', 0)
if count > 0:
corpus = moissonneur.save(request, project.id, return_corpus=True)
else:
return None
except (ValueError, Http404) as e:
raise e
# Sometimes strange things happens...
if corpus.name != name:
corpus.name = name
session.commit()
return corpus
......@@ -30,7 +30,7 @@ def query( request):
#ids = crawlerbot.get_ids(query)
return JsonHttpResponse({"results_nb":crawlerbot.results_nb})
def save(request, project_id):
def save(request, project_id, return_corpus=False):
'''save'''
if request.method == "POST":
......@@ -101,6 +101,9 @@ def save(request, project_id):
session.rollback()
# --------------------------------------------
if return_corpus:
return corpus
return render(
template_name = 'pages/projects/wait.html',
request = request,
......
......@@ -33,7 +33,7 @@ def query( request):
print(results)
return JsonHttpResponse({"results_nb":crawlerbot.results_nb})
def save(request, project_id):
def save(request, project_id, return_corpus=False):
'''save'''
if request.method == "POST":
......@@ -103,6 +103,9 @@ def save(request, project_id):
session.rollback()
# --------------------------------------------
if return_corpus:
return corpus
return render(
template_name = 'pages/projects/wait.html',
request = request,
......
......@@ -29,7 +29,7 @@ def query( request):
#ids = crawlerbot.get_ids(query)
return JsonHttpResponse({"results_nb":crawlerbot.results_nb})
def save(request, project_id):
def save(request, project_id, return_corpus=False):
'''save'''
if request.method == "POST":
......@@ -100,6 +100,9 @@ def save(request, project_id):
session.rollback()
# --------------------------------------------
if return_corpus:
return corpus
return render(
template_name = 'pages/projects/wait.html',
request = request,
......
......@@ -52,7 +52,7 @@ def query( request ):
def save(request , project_id):
def save(request , project_id, return_corpus=False):
print("testISTEX:")
print(request.method)
alist = ["bar","foo"]
......@@ -171,6 +171,9 @@ def save(request , project_id):
session.rollback()
# --------------------------------------------
if return_corpus:
return corpus
return render(
template_name = 'pages/projects/wait.html',
request = request,
......
......@@ -33,7 +33,7 @@ def query( request):
print(results)
return JsonHttpResponse({"results_nb":crawlerbot.results_nb})
def save(request, project_id):
def save(request, project_id, return_corpus=False):
'''save'''
if request.method == "POST":
......@@ -104,6 +104,9 @@ def save(request, project_id):
session.rollback()
# --------------------------------------------
if return_corpus:
return corpus
return render(
template_name = 'pages/projects/wait.html',
request = request,
......
......@@ -69,7 +69,7 @@ def query( request ):
return JsonHttpResponse(data)
def save( request , project_id ) :
def save( request , project_id, return_corpus=False ) :
# implicit global session
# do we have a valid project id?
try:
......@@ -164,6 +164,10 @@ def save( request , project_id ) :
session.rollback()
# --------------------------------------------
sleep(1)
if return_corpus:
return corpus
return HttpResponseRedirect('/projects/' + str(project_id))
data = alist
......
......@@ -2,11 +2,38 @@
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Advanced Gargantext Tutorial (Python)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "TypeError",
"evalue": "'list' object is not callable",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-3-a8e3501c9a54>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'/srv/gargantext'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m: 'list' object is not callable"
]
}
],
"source": [
"import sys\n",
"sys.pa"
]
},
{
"cell_type": "code",
"execution_count": 1,
......@@ -28,7 +55,9 @@
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
......
This diff is collapsed.
This diff is collapsed.
......@@ -57,7 +57,7 @@
<center id="corpus" class="help">
<a data-toggle="modal" href="#addcorpus" >
<button
type="button"
......@@ -440,11 +440,12 @@
// in the form "Add a corpus"
var type = $("#id_type").val()
var file = $("#id_file").val()
// 5 booleans
var nameField = $("#id_name").val() != ""
var typeField = (type != "") && (type != "0")
var fileField = $("#id_file").val() != ""
var fileField = file != ""
var wantfileField = $("#file_yes").prop("checked")
var crawling = ((type==3)||(type==8)||(type==9)) && ! wantfileField
......@@ -457,6 +458,23 @@
if (! crawling) {
$("#submit_thing").prop('disabled' , !(nameField && typeField && fileField))
}
// Automatically select CSV when type is undefined
// and we have a .csv file
if (!typeField && file && file.match(/.csv$/i)) {
// Get CSV type id
var csv = $('#id_type > option')
.filter(function() {
return $(this).text() === 'CSV'
})
.attr('value')
// Select CSV type
$('#id_type').val(csv)
// Focus on name field
setTimeout(function() {
$("#id_name").focus()
})
}
}
function bringDaNoise() {
......@@ -532,7 +550,7 @@
$("#submit_thing").html("Process a {{ query_size }} sample!")
thequeries = data
var N=0,k=0;
var N=0;
for(var i in thequeries) N += thequeries[i].count
if( N>0) {
......@@ -571,12 +589,11 @@
$("#submit_thing").html("Process a {{ query_size }} sample!")
thequeries = data
var N=data.length,k=0;
// for(var i in thequeries) N += thequeries[i].count
if( N>1) {
var total = JSON.parse(data).total
console.log("N: "+total)
$("#theresults").html("<i> <b>"+pubmedquery+"</b>: "+total+" publications.</i><br>")
var N = data.total;
if (N > 0) {
console.log("N: "+N)
$("#theresults").html("<i> <b>"+pubmedquery+"</b>: "+N+" publications.</i><br>")
$('#submit_thing').prop('disabled', false);
} else {
$("#theresults").html("<i> <b>"+data[0]+"</b></i><br>")
......@@ -661,7 +678,7 @@
console.log(data)
console.log("SUCCESS")
console.log("enabling "+"#"+value.id)
// $("#"+value.id).attr('onclick','getGlobalResults(this);');
$("#submit_thing").prop('disabled' , false)
//$("#submit_thing").html("Process a {{ query_size }} sample!")
......@@ -721,7 +738,7 @@
console.log(data)
console.log("SUCCESS")
console.log("enabling "+"#"+value.id)
// $("#"+value.id).attr('onclick','getGlobalResults(this);');
$("#submit_thing").prop('disabled' , false)
//$("#submit_thing").html("Process a {{ query_size }} sample!")
......@@ -781,7 +798,7 @@
console.log(data)
console.log("SUCCESS")
console.log("enabling "+"#"+value.id)
// $("#"+value.id).attr('onclick','getGlobalResults(this);');
$("#submit_thing").prop('disabled' , false)
//$("#submit_thing").html("Process a {{ query_size }} sample!")
......@@ -876,12 +893,12 @@
console.log("selected:", selectedId);
// by typeID: 3 = PUBMED, 8 = ISTEX, 9 = CERN
if ( selectedId == "3"
|| selectedId == "8"
|| selectedId == "9"
|| selectedId == "10"
|| selectedId == "11"
|| selectedId == "12"
if ( selectedId == "3"
|| selectedId == "8"
|| selectedId == "9"
|| selectedId == "10"
|| selectedId == "11"
|| selectedId == "12"
) {
console.log("show the button for: " + selectedId)
$("#div-fileornot").css("visibility", "visible");
......@@ -1019,16 +1036,16 @@
function saveMultivac(query, N){
console.log("In Multivac")
if(!query || query=="") return;
console.log(query)
//var origQuery = query
var data = { "query" : query , "N": N };
// Replace all the slashes
var projectid = window.location.href.split("projects")[1].replace(/\//g, '')
console.log(data)
$.ajax({
dataType: 'json',
......@@ -1066,16 +1083,16 @@
function save(query, N, urlGarg){
console.log("In Gargantext")
if(!query || query=="") return;
console.log(query)
//var origQuery = query
var data = { "query" : query , "N": N };
// Replace all the slashes
var projectid = window.location.href.split("projects")[1].replace(/\//g, '')
console.log(data)
$.ajax({
dataType: 'json',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment