Commit fe23f25f authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'testing' into stable

parents a3e8e25d c12a0dae
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
## Community ## Community
* [http://gargantext.org/about](http://gargantext.org/about) * [http://gargantext.org/about](http://gargantext.org/about)
* IRC Chat: (OFTC/FreeNode) #gargantex * IRC Chat: (OFTC/FreeNode) #gargantext
##Tools ##Tools
* gogs * gogs
......
...@@ -263,7 +263,7 @@ RESOURCETYPES = [ ...@@ -263,7 +263,7 @@ RESOURCETYPES = [
}, },
{ "type": 11, { "type": 11,
"name": 'HAL [API]', "name": 'HAL (english) [API]',
"parser": "HalParser", "parser": "HalParser",
"format": 'JSON', "format": 'JSON',
'file_formats':["zip","json"], 'file_formats':["zip","json"],
......
from sqlalchemy.schema import Column, ForeignKey, UniqueConstraint, Index from sqlalchemy.schema import Column, ForeignKey, UniqueConstraint, Index
from sqlalchemy.orm import relationship from sqlalchemy.orm import relationship, validates
from sqlalchemy.types import TypeDecorator, \ from sqlalchemy.types import TypeDecorator, \
Integer, Float, Boolean, DateTime, String, Text Integer, Float, Boolean, DateTime, String, Text
from sqlalchemy.dialects.postgresql import JSONB, DOUBLE_PRECISION as Double from sqlalchemy.dialects.postgresql import JSONB, DOUBLE_PRECISION as Double
...@@ -7,6 +7,7 @@ from sqlalchemy.ext.mutable import MutableDict, MutableList ...@@ -7,6 +7,7 @@ from sqlalchemy.ext.mutable import MutableDict, MutableList
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
__all__ = ["Column", "ForeignKey", "UniqueConstraint", "relationship", __all__ = ["Column", "ForeignKey", "UniqueConstraint", "relationship",
"validates", "ValidatorMixin",
"Integer", "Float", "Boolean", "DateTime", "String", "Text", "Integer", "Float", "Boolean", "DateTime", "String", "Text",
"TypeDecorator", "TypeDecorator",
"JSONB", "Double", "JSONB", "Double",
...@@ -18,6 +19,25 @@ __all__ = ["Column", "ForeignKey", "UniqueConstraint", "relationship", ...@@ -18,6 +19,25 @@ __all__ = ["Column", "ForeignKey", "UniqueConstraint", "relationship",
# all tables handled by Alembic migration scripts. # all tables handled by Alembic migration scripts.
Base = declarative_base() Base = declarative_base()
# To be used by tables already handled by Django ORM, such as User model. We # To be used by tables already handled by Django ORM, such as User model. We
# separate them in order to keep those out of Alembic sight. # separate them in order to keep those out of Alembic sight.
DjangoBase = declarative_base() DjangoBase = declarative_base()
class ValidatorMixin(object):
def enforce_length(self, key, value):
"""Truncate a string according to its column length
Usage example:
.. code-block:: python
@validates('some_column')
def validate_some_column(self, key, value):
self.enforce_length(key, value)
"""
max_len = getattr(self.__class__, key).prop.columns[0].type.length
if value and len(value) > max_len:
return value[:max_len]
return value
...@@ -9,7 +9,7 @@ from datetime import datetime ...@@ -9,7 +9,7 @@ from datetime import datetime
from .base import Base, Column, ForeignKey, relationship, TypeDecorator, Index, \ from .base import Base, Column, ForeignKey, relationship, TypeDecorator, Index, \
Integer, Float, String, DateTime, JSONB, \ Integer, Float, String, DateTime, JSONB, \
MutableList, MutableDict MutableList, MutableDict, validates, ValidatorMixin
from .users import User from .users import User
__all__ = ['Node', 'NodeNode', 'CorpusNode'] __all__ = ['Node', 'NodeNode', 'CorpusNode']
...@@ -26,7 +26,7 @@ class NodeType(TypeDecorator): ...@@ -26,7 +26,7 @@ class NodeType(TypeDecorator):
return NODETYPES[typeindex] return NODETYPES[typeindex]
class Node(Base): class Node(ValidatorMixin, Base):
"""This model can fit many purposes: """This model can fit many purposes:
myFirstCorpus = session.query(CorpusNode).first() myFirstCorpus = session.query(CorpusNode).first()
...@@ -112,6 +112,10 @@ class Node(Base): ...@@ -112,6 +112,10 @@ class Node(Base):
'user_id={0.user_id}, parent_id={0.parent_id}, ' \ 'user_id={0.user_id}, parent_id={0.parent_id}, ' \
'name={0.name!r}, date={0.date})>'.format(self) 'name={0.name!r}, date={0.date})>'.format(self)
@validates('name')
def validate_name(self, key, value):
return self.enforce_length(key, value)
@property @property
def ngrams(self): def ngrams(self):
"""Pseudo-attribute allowing to retrieve a node's ngrams. """Pseudo-attribute allowing to retrieve a node's ngrams.
......
...@@ -14,12 +14,12 @@ from gargantext.util.files import save ...@@ -14,12 +14,12 @@ from gargantext.util.files import save
class HalCrawler(Crawler): class HalCrawler(Crawler):
''' HAL API CLIENT''' ''' HAL API CLIENT'''
def __init__(self): def __init__(self):
# Main EndPoints # Main EndPoints
self.BASE_URL = "https://api.archives-ouvertes.fr" self.BASE_URL = "https://api.archives-ouvertes.fr"
self.API_URL = "search" self.API_URL = "search"
# Final EndPoints # Final EndPoints
# TODO : Change endpoint according type of database # TODO : Change endpoint according type of database
self.URL = self.BASE_URL + "/" + self.API_URL self.URL = self.BASE_URL + "/" + self.API_URL
...@@ -29,28 +29,39 @@ class HalCrawler(Crawler): ...@@ -29,28 +29,39 @@ class HalCrawler(Crawler):
'''formating the query''' '''formating the query'''
#search_field="title_t" #search_field="title_t"
search_field="abstract_t" #search_field="abstract_t"
return (search_field + ":" + "(" + query + ")") #return (search_field + ":" + "(" + query + ")")
return "(" + query + ")"
def _get(self, query, fromPage=1, count=10, lang=None): def _get(self, query, fromPage=1, count=10, lang=None):
# Parameters # Parameters
fl = """ title_s fl = """ docid
, title_s
, abstract_s , abstract_s
, en_title_s
, en_abstract_s
, submittedDate_s , submittedDate_s
, journalDate_s , journalDate_s
, authFullName_s , authFullName_s
, uri_s , uri_s
, isbn_s , isbn_s
, issue_s , issue_s
, journalTitle_s
, language_s
, doiId_s
, authId_i
, instStructId_i
, deptStructId_i
, labStructId_i
, rteamStructId_i
, docType_s , docType_s
, journalPublisher_s
""" """
#, authUrl_s #, authUrl_s
#, type_s #, type_s
wt = "json" wt = "json"
querystring = { "q" : query querystring = { "q" : query
...@@ -59,18 +70,18 @@ class HalCrawler(Crawler): ...@@ -59,18 +70,18 @@ class HalCrawler(Crawler):
, "fl" : fl , "fl" : fl
, "wt" : wt , "wt" : wt
} }
# Specify Headers # Specify Headers
headers = { "cache-control" : "no-cache" } headers = { "cache-control" : "no-cache" }
# Do Request and get response # Do Request and get response
response = requests.request( "GET" response = requests.request( "GET"
, self.URL , self.URL
, headers = headers , headers = headers
, params = querystring , params = querystring
) )
#print(querystring) #print(querystring)
# Validation : 200 if ok else raise Value # Validation : 200 if ok else raise Value
if response.status_code == 200: if response.status_code == 200:
...@@ -81,27 +92,27 @@ class HalCrawler(Crawler): ...@@ -81,27 +92,27 @@ class HalCrawler(Crawler):
return (json.loads(response.content.decode(charset))) return (json.loads(response.content.decode(charset)))
else: else:
raise ValueError(response.status_code, response.reason) raise ValueError(response.status_code, response.reason)
def scan_results(self, query): def scan_results(self, query):
''' '''
scan_results : Returns the number of results scan_results : Returns the number of results
Query String -> Int Query String -> Int
''' '''
self.results_nb = 0 self.results_nb = 0
total = ( self._get(query) total = ( self._get(query)
.get("response", {}) .get("response", {})
.get("numFound" , 0) .get("numFound" , 0)
) )
self.results_nb = total self.results_nb = total
return self.results_nb return self.results_nb
def download(self, query): def download(self, query):
downloaded = False downloaded = False
self.status.append("fetching results") self.status.append("fetching results")
corpus = [] corpus = []
...@@ -113,9 +124,9 @@ class HalCrawler(Crawler): ...@@ -113,9 +124,9 @@ class HalCrawler(Crawler):
msg = "Invalid sample size N = %i (max = %i)" % ( self.query_max msg = "Invalid sample size N = %i (max = %i)" % ( self.query_max
, QUERY_SIZE_N_MAX , QUERY_SIZE_N_MAX
) )
print("ERROR (scrap: Multivac d/l ): " , msg) print("ERROR (scrap: HAL d/l ): " , msg)
self.query_max = QUERY_SIZE_N_MAX self.query_max = QUERY_SIZE_N_MAX
#for page in range(1, trunc(self.query_max / 100) + 2): #for page in range(1, trunc(self.query_max / 100) + 2):
for page in range(0, self.query_max, paging): for page in range(0, self.query_max, paging):
print("Downloading page %s to %s results" % (page, paging)) print("Downloading page %s to %s results" % (page, paging))
...@@ -132,5 +143,5 @@ class HalCrawler(Crawler): ...@@ -132,5 +143,5 @@ class HalCrawler(Crawler):
, basedir=UPLOAD_DIRECTORY , basedir=UPLOAD_DIRECTORY
) )
downloaded = True downloaded = True
return downloaded return downloaded
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
import subprocess import subprocess
import re import re
from .sparql import Service from .sparql import Service
from gargantext.settings import BOOL_TOOLS_PATH
#from sparql import Service #from sparql import Service
def bool2sparql(rawQuery, count=False, offset=None, limit=None): def bool2sparql(rawQuery, count=False, offset=None, limit=None):
...@@ -12,7 +13,7 @@ def bool2sparql(rawQuery, count=False, offset=None, limit=None): ...@@ -12,7 +13,7 @@ def bool2sparql(rawQuery, count=False, offset=None, limit=None):
See: https://github.com/delanoe/bool2sparql See: https://github.com/delanoe/bool2sparql
""" """
query = re.sub("\"", "\'", rawQuery) query = re.sub("\"", "\'", rawQuery)
bashCommand = ["/srv/gargantext/gargantext/util/crawlers/sparql/bool2sparql-exe","-q",query] bashCommand = [BOOL_TOOLS_PATH + "/bool2sparql-exe","-q",query]
if count is True : if count is True :
bashCommand.append("-c") bashCommand.append("-c")
......
...@@ -5,15 +5,9 @@ from gargantext.util.json import json_dumps ...@@ -5,15 +5,9 @@ from gargantext.util.json import json_dumps
######################################################################## ########################################################################
# get engine, session, etc. # get engine, session, etc.
######################################################################## ########################################################################
import sqlalchemy as sa
from sqlalchemy.orm import sessionmaker, scoped_session from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import delete from sqlalchemy import delete
# To make Full Text search possible, uncomment lines below
# (and install it with pip before)
#from sqlalchemy_searchable import make_searchable
def get_engine(): def get_engine():
from sqlalchemy import create_engine from sqlalchemy import create_engine
return create_engine( settings.DATABASES['default']['URL'] return create_engine( settings.DATABASES['default']['URL']
...@@ -24,16 +18,8 @@ def get_engine(): ...@@ -24,16 +18,8 @@ def get_engine():
engine = get_engine() engine = get_engine()
# To make Full Text search possible, uncomment lines below
# https://sqlalchemy-searchable.readthedocs.io/
#sa.orm.configure_mappers()
Base = declarative_base()
#Base.metadata.create_all(engine)
#make_searchable()
session = scoped_session(sessionmaker(bind=engine)) session = scoped_session(sessionmaker(bind=engine))
######################################################################## ########################################################################
# useful for queries # useful for queries
######################################################################## ########################################################################
......
...@@ -7,7 +7,7 @@ from gargantext.util.db import session, aliased ...@@ -7,7 +7,7 @@ from gargantext.util.db import session, aliased
from gargantext.models import Ngram, NodeNgramNgram from gargantext.models import Ngram, NodeNgramNgram
from igraph import Graph # for group_union from igraph import Graph # for group_union
def query_groups(groupings_id, details=False): def query_groups(groupings_id, details=False, sort=False):
""" """
Listing of couples (mainform, subform) Listing of couples (mainform, subform)
aka (ngram1_id, ngram2_id) aka (ngram1_id, ngram2_id)
...@@ -15,24 +15,27 @@ def query_groups(groupings_id, details=False): ...@@ -15,24 +15,27 @@ def query_groups(groupings_id, details=False):
Parameter: Parameter:
- details: if False, just send the array of couples - details: if False, just send the array of couples
if True, send quadruplets with (ngram1_id, term1, ngram2_id, term2) if True, send quadruplets with (ngram1_id, term1, ngram2_id, term2)
- sort: order results by terms of ngram1 then ngram2
""" """
if details or sort:
Ngram1, Ngram2 = Ngram, aliased(Ngram)
if not details: if not details:
# simple contents # simple contents
query = session.query(NodeNgramNgram.ngram1_id, NodeNgramNgram.ngram2_id) columns = (NodeNgramNgram.ngram1_id, NodeNgramNgram.ngram2_id)
else: else:
# detailed contents (id + terms) # detailed contents (id + terms)
Ngram1 = aliased(Ngram) columns = (Ngram1.id, Ngram1.terms,
Ngram2 = aliased(Ngram) Ngram2.id, Ngram2.terms)
query = (session
.query( query = session.query(*columns)
NodeNgramNgram.ngram1_id,
Ngram1.terms, if details or sort:
NodeNgramNgram.ngram2_id, query = (query.join(Ngram1, NodeNgramNgram.ngram1_id == Ngram1.id)
Ngram2.terms, .join(Ngram2, NodeNgramNgram.ngram2_id == Ngram2.id))
)
.join(Ngram1, NodeNgramNgram.ngram1_id == Ngram1.id) if sort:
.join(Ngram2, NodeNgramNgram.ngram2_id == Ngram2.id) query = query.order_by(Ngram1.terms, Ngram2.terms)
)
# main filter # main filter
# ----------- # -----------
......
...@@ -73,7 +73,8 @@ from rest_framework.views import APIView ...@@ -73,7 +73,8 @@ from rest_framework.views import APIView
from gargantext.util.json import json_encoder from gargantext.util.json import json_encoder
def JsonHttpResponse(data, status=200): def JsonHttpResponse(data, status=200):
return HttpResponse( return HttpResponse(
content = json_encoder.encode(data), content = data.encode('utf-8') if isinstance(data, str) else \
json_encoder.encode(data),
content_type = 'application/json; charset=utf-8', content_type = 'application/json; charset=utf-8',
status = status status = status
) )
......
...@@ -50,6 +50,9 @@ class _BaseClass: ...@@ -50,6 +50,9 @@ class _BaseClass:
else: else:
return NotImplemented return NotImplemented
def __len__(self):
return len(self.items)
def __repr__(self): def __repr__(self):
items = self.items items = self.items
if isinstance(items, defaultdict): if isinstance(items, defaultdict):
......
...@@ -8,8 +8,7 @@ Tools to work with ngramlists (MAINLIST, MAPLIST, STOPLIST) ...@@ -8,8 +8,7 @@ Tools to work with ngramlists (MAINLIST, MAPLIST, STOPLIST)
""" """
from gargantext.util.group_tools import query_groups, group_union from gargantext.util.group_tools import query_groups, group_union
from gargantext.util.db import session, desc, func, \ from gargantext.util.db import session, bulk_insert_ifnotexists
bulk_insert_ifnotexists
from gargantext.models import Ngram, NodeNgram, NodeNodeNgram, \ from gargantext.models import Ngram, NodeNgram, NodeNodeNgram, \
NodeNgramNgram, Node NodeNgramNgram, Node
...@@ -25,7 +24,6 @@ from gargantext.util.toolchain.ngrams_extraction import normalize_forms ...@@ -25,7 +24,6 @@ from gargantext.util.toolchain.ngrams_extraction import normalize_forms
# merge will also index the new ngrams in the docs of the corpus # merge will also index the new ngrams in the docs of the corpus
from gargantext.util.toolchain.ngrams_addition import index_new_ngrams from gargantext.util.toolchain.ngrams_addition import index_new_ngrams
from sqlalchemy.sql import exists
from os import path from os import path
from csv import writer, reader, QUOTE_MINIMAL from csv import writer, reader, QUOTE_MINIMAL
from collections import defaultdict from collections import defaultdict
...@@ -35,8 +33,8 @@ from celery import shared_task ...@@ -35,8 +33,8 @@ from celery import shared_task
def query_list(list_id, def query_list(list_id,
pagination_limit=None, pagination_offset=None, pagination_limit=None, pagination_offset=None,
details=False, scoring_metric_id=None, groupings_id=None details=False, scoring_metric_id=None, groupings_id=None,
): sort=False):
""" """
Paginated listing of ngram_ids in a NodeNgram lists. Paginated listing of ngram_ids in a NodeNgram lists.
...@@ -51,6 +49,7 @@ def query_list(list_id, ...@@ -51,6 +49,7 @@ def query_list(list_id,
(for details and sorting) (for details and sorting)
- groupings_id: optional id of a list of grouping relations (synonyms) - groupings_id: optional id of a list of grouping relations (synonyms)
(each synonym will be added to the list if not already in there) (each synonym will be added to the list if not already in there)
- sort: order by Ngram.terms (not possible if details is False)
FIXME: subforms appended recently and not generalized enough FIXME: subforms appended recently and not generalized enough
=> add a common part for all "if groupings_id" => add a common part for all "if groupings_id"
...@@ -125,7 +124,10 @@ def query_list(list_id, ...@@ -125,7 +124,10 @@ def query_list(list_id,
query = query.limit(pagination_limit) query = query.limit(pagination_limit)
if pagination_offset: if pagination_offset:
query = query.offset(pagination_offsets) query = query.offset(pagination_offset)
if details and sort:
query = query.order_by(Ngram.terms)
return query return query
...@@ -186,9 +188,7 @@ def ngrams_to_csv_rows(ngram_objs, ngram_dico={}, group_infos={}, ...@@ -186,9 +188,7 @@ def ngrams_to_csv_rows(ngram_objs, ngram_dico={}, group_infos={},
# 3 columns = |status, | mainform, | forms # 3 columns = |status, | mainform, | forms
# (type_of_list) ( term ) ( subterm1|&|subterm2 ) # (type_of_list) ( term ) ( subterm1|&|subterm2 )
csv_rows.append( csv_rows.append([list_type, ng_obj.terms, this_grouped_terms])
[list_type,ng_obj.terms,this_grouped_terms]
)
return csv_rows return csv_rows
...@@ -231,9 +231,10 @@ def export_ngramlists(node,fname=None,delimiter=DEFAULT_CSV_DELIM,titles=True): ...@@ -231,9 +231,10 @@ def export_ngramlists(node,fname=None,delimiter=DEFAULT_CSV_DELIM,titles=True):
# listes de ngram_ids correspondantes # listes de ngram_ids correspondantes
# ------------------------------------ # ------------------------------------
# contenu: liste des objets ngrammes [(2562,"monterme",1),...] # contenu: liste des objets ngrammes [(2562,"monterme",1),...]
stop_ngrams = query_list(stoplist_node.id, details=True, groupings_id=group_node.id).all() stop_ngrams, main_ngrams, map_ngrams = (
main_ngrams = query_list(mainlist_node.id, details=True, groupings_id=group_node.id).all() query_list(n.id, details=True, groupings_id=group_node.id, sort=True).all()
map_ngrams = query_list(maplist_node.id, details=True, groupings_id=group_node.id).all() for n in (stoplist_node, mainlist_node, maplist_node)
)
# pour debug ---------->8 -------------------- # pour debug ---------->8 --------------------
#~ stop_ngrams = stop_ngrams[0:10] #~ stop_ngrams = stop_ngrams[0:10]
...@@ -250,7 +251,7 @@ def export_ngramlists(node,fname=None,delimiter=DEFAULT_CSV_DELIM,titles=True): ...@@ -250,7 +251,7 @@ def export_ngramlists(node,fname=None,delimiter=DEFAULT_CSV_DELIM,titles=True):
# for the groups we got couples of ids in the DB # for the groups we got couples of ids in the DB
# ------------------- # -------------------
# ex: [(3544, 2353), (2787, 4032), ...] # ex: [(3544, 2353), (2787, 4032), ...]
group_ngram_id_couples = query_groups(group_node.id).all() group_ngram_id_couples = query_groups(group_node.id, sort=True)
# we expend this to double structure for groups lookup # we expend this to double structure for groups lookup
# 1) g['links'] = k couples (x,y_i) as a set [x => {y1,y2}] # 1) g['links'] = k couples (x,y_i) as a set [x => {y1,y2}]
...@@ -397,6 +398,9 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM, ...@@ -397,6 +398,9 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM,
NB: To merge the imported lists into a corpus node's lists, NB: To merge the imported lists into a corpus node's lists,
chain this function with merge_ngramlists() chain this function with merge_ngramlists()
''' '''
list_types = ['stop','main','map']
# --------------- # ---------------
# ngram storage # ngram storage
# --------------- # ---------------
...@@ -461,7 +465,6 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM, ...@@ -461,7 +465,6 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM,
# headers # headers
if i == 0: if i == 0:
n_cols = len(csv_row)
for j, colname in enumerate(csv_row): for j, colname in enumerate(csv_row):
if colname in ['label', 'status', 'forms']: if colname in ['label', 'status', 'forms']:
columns[colname] = j columns[colname] = j
...@@ -508,31 +511,30 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM, ...@@ -508,31 +511,30 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM,
continue continue
# --- check correct list type # --- check correct list type
if not this_list_type in ['stop','main','map']: if not this_list_type in list_types:
print("IMPORT WARN: (skip line) wrong list type at CSV %s:l.%i" % (fname, i)) print("IMPORT WARN: (skip line) wrong list type at CSV %s:l.%i" % (fname, i))
continue continue
# subforms can be duplicated (in forms and another label) # subforms can be duplicated (in forms and another label)
# but we must take care of unwanted other duplicates too # but we must take care of unwanted other duplicates too
if this_row_label in imported_unique_ngramstrs: if imported_unique_ngramstrs.get(this_row_label) == 1:
print("TODO IMPORT DUPL: (skip line) term appears more than once at CSV %s:l.%i" print("TODO IMPORT DUPL: (skip line) term %r appears more than once at CSV %s:l.%i"
% (fname, i)) % (this_row_label, fname, i))
# ================= Store the data ==================== # ================= Store the data ====================
# the ngram census # the ngram census
imported_unique_ngramstrs[this_row_label] = True imported_unique_ngramstrs[this_row_label] = 1
# and the "list to ngram" relation # and the "list to ngram" relation
imported_nodes_ngrams[this_list_type].append(this_row_label) imported_nodes_ngrams[this_list_type].append(this_row_label)
# ====== Store synonyms from the import (if any) ====== # ====== Store synonyms from the import (if any) ======
if len(this_row_forms) != 0: if len(this_row_forms) != 0:
other_terms = []
for raw_term_str in this_row_forms.split(group_delimiter): for raw_term_str in this_row_forms.split(group_delimiter):
# each subform is also like an ngram declaration # each subform is also like an ngram declaration
term_str = normalize_forms(normalize_chars(raw_term_str)) term_str = normalize_forms(normalize_chars(raw_term_str))
imported_unique_ngramstrs[term_str] = True imported_unique_ngramstrs[term_str] = 2
imported_nodes_ngrams[this_list_type].append(term_str) imported_nodes_ngrams[this_list_type].append(term_str)
# the optional repeated mainform doesn't interest us # the optional repeated mainform doesn't interest us
...@@ -610,7 +612,10 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM, ...@@ -610,7 +612,10 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM,
% (n_total_ng, n_added_ng, n_total_ng-n_added_ng) ) % (n_total_ng, n_added_ng, n_total_ng-n_added_ng) )
print("IMPORT: read %i grouping relations" % n_group_relations) print("IMPORT: read %i grouping relations" % n_group_relations)
# print("IMPORT RESULT", result) list_counts = [(typ, len(result.get(typ))) for typ in list_types]
list_counts.append(('total', sum(x[1] for x in list_counts)))
print("IMPORT: " + '; '.join('%s %s' % stats for stats in list_counts))
return result return result
def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]): def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
...@@ -718,9 +723,11 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]): ...@@ -718,9 +723,11 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
# ======== Merging all involved ngrams ========= # ======== Merging all involved ngrams =========
# all memberships with resolved conflicts of interfering memberships # all ngram memberships with resolved conflicts of interfering memberships
# (associates ngram ids with list types -- see linfos definition above)
resolved_memberships = {} resolved_memberships = {}
# iterates over each ngram of each list type for both old and new lists
for list_set in [old_lists, new_lists]: for list_set in [old_lists, new_lists]:
for lid, info in enumerate(linfos): for lid, info in enumerate(linfos):
list_type = info['key'] list_type = info['key']
...@@ -749,12 +756,15 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]): ...@@ -749,12 +756,15 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
# ======== Merging old and new groups ========= # ======== Merging old and new groups =========
# get the arcs already in the target DB (directed couples) # get the arcs already in the target DB (directed couples)
previous_links = session.query( if 'groupings' in del_originals:
NodeNgramNgram.ngram1_id, previous_links = []
NodeNgramNgram.ngram2_id else:
).filter( previous_links = session.query(
NodeNgramNgram.node_id == old_group_id NodeNgramNgram.ngram1_id,
).all() NodeNgramNgram.ngram2_id
).filter(
NodeNgramNgram.node_id == old_group_id
).all()
n_links_previous = len(previous_links) n_links_previous = len(previous_links)
...@@ -822,7 +832,7 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]): ...@@ -822,7 +832,7 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
list_type = linfos[lid]['key'] list_type = linfos[lid]['key']
merged_results[list_type].items.add(ng_id) merged_results[list_type].items.add(ng_id)
# print("IMPORT: added %i elements in the lists indices" % added_nd_ng) print("IMPORT: added %i elements in the lists indices" % added_nd_ng)
# ======== Overwrite old data with new ========= # ======== Overwrite old data with new =========
for lid, info in enumerate(linfos): for lid, info in enumerate(linfos):
...@@ -845,13 +855,17 @@ def import_and_merge_ngramlists(file_contents, onto_corpus_id, overwrite=False): ...@@ -845,13 +855,17 @@ def import_and_merge_ngramlists(file_contents, onto_corpus_id, overwrite=False):
""" """
A single function to run import_ngramlists and merge_ngramlists together A single function to run import_ngramlists and merge_ngramlists together
""" """
print("import list")
print("IMPORT CSV termlists file with %s lines in corpus %s (%s)" % (
len(file_contents),
onto_corpus_id, 'overwrite' if overwrite else 'merge'))
new_lists = import_ngramlists(file_contents) new_lists = import_ngramlists(file_contents)
corpus_node = session.query(Node).filter(Node.id == onto_corpus_id).first() corpus_node = session.query(Node).get(onto_corpus_id)
# merge the new_lists onto those of the target corpus # merge the new_lists onto those of the target corpus
del_originals = ['stop', 'main', 'map'] if overwrite else [] del_originals = ['stop', 'main', 'map', 'groupings'] if overwrite else []
log_msg = merge_ngramlists(new_lists, onto_corpus=corpus_node, del_originals=del_originals) log_msg = merge_ngramlists(new_lists, onto_corpus=corpus_node, del_originals=del_originals)
return log_msg return log_msg
...@@ -4,128 +4,67 @@ import sys ...@@ -4,128 +4,67 @@ import sys
import csv import csv
csv.field_size_limit(sys.maxsize) csv.field_size_limit(sys.maxsize)
import numpy as np import numpy as np
import os
class CSVParser(Parser): class CSVParser(Parser):
DELIMITERS = ", \t;|:"
def CSVsample( self, small_contents , delim) : def detect_delimiter(self, lines, sample_size=10):
reader = csv.reader(small_contents, delimiter=delim) sample = lines[:sample_size]
Freqs = [] # Compute frequency of each delimiter on each input line
for row in reader: delimiters_freqs = {
Freqs.append(len(row)) d: [line.count(d) for line in sample]
for d in self.DELIMITERS
}
return Freqs # Select delimiters with a standard deviation of zero, ie. delimiters
# for which we have the same number of fields on each line
selected_delimiters = [
(d, np.sum(freqs))
for d, freqs in delimiters_freqs.items()
if any(freqs) and np.std(freqs) == 0
]
if selected_delimiters:
# Choose the delimiter with highest frequency amongst selected ones
sorted_delimiters = sorted(selected_delimiters, key=lambda x: x[1])
return sorted_delimiters[-1][0]
def parse(self, filebuf): def parse(self, filebuf):
print("CSV: parsing (assuming UTF-8 and LF line endings)") print("CSV: parsing (assuming UTF-8 and LF line endings)")
contents = filebuf.read().decode("UTF-8").split("\n") contents = filebuf.read().decode("UTF-8").split("\n")
sample_size = 10 # Filter out empty lines
sample_contents = contents[0:sample_size] contents = [line for line in contents if line.strip()]
hyperdata_list = [] # Delimiter auto-detection
delimiter = self.detect_delimiter(contents, sample_size=10)
# # = = = = [ Getting delimiters frequency ] = = = = #
PossibleDelimiters = [ ',',' ','\t', ';', '|', ':' ] if delimiter is None:
AllDelimiters = {} raise ValueError("CSV: couldn't detect delimiter, bug or malformed data")
for delim in PossibleDelimiters:
AllDelimiters[delim] = self.CSVsample( sample_contents , delim ) print("CSV: selected delimiter: %r" % delimiter)
# # = = = = [ / Getting delimiters frequency ] = = = = #
# # OUTPUT example: # Parse CSV
# # AllDelimiters = { reader = csv.reader(contents, delimiter=delimiter)
# # '\t': [1, 1, 1, 1, 1],
# # ' ': [1, 13, 261, 348, 330], # Get first not empty row and its fields (ie. header row), or (0, [])
# # ',': [15, 15, 15, 15, 15], first_row, headers = \
# # ';': [1, 1, 1, 1, 1], next(((i, fields) for i, fields in enumerate(reader) if any(fields)),
# # '|': [1, 1, 1, 1, 1] (0, []))
# # }
# Get first not empty column of the first row, or 0
# # = = = = [ Stand.Dev=0 & Sum of delimiters ] = = = = # first_col = next((i for i, field in enumerate(headers) if field), 0)
Delimiters = []
for d in AllDelimiters: # Strip out potential empty fields in headers
freqs = AllDelimiters[d] headers = headers[first_col:]
suma = np.sum( freqs )
if suma >0:
std = np.std( freqs )
# print [ d , suma , len(freqs) , std]
if std == 0:
Delimiters.append ( [ d , suma , len(freqs) , std] )
# # = = = = [ / Stand.Dev=0 & Sum of delimiters ] = = = = #
# # OUTPUT example:
# # Delimiters = [
# # ['\t', 5, 5, 0.0],
# # [',', 75, 5, 0.0],
# # ['|', 5, 5, 0.0]
# # ]
# # = = = = [ Delimiter selection ] = = = = #
Sorted_Delims = sorted(Delimiters, key=lambda x: x[1], reverse=True)
HighestDelim = Sorted_Delims[0][0]
# HighestDelim = ","
print("CSV selected delimiter:",[HighestDelim])
# # = = = = [ / Delimiter selection ] = = = = #
# # = = = = [ First data coordinate ] = = = = #
Coords = {
"row": -1,
"column": -1
}
reader = csv.reader(contents, delimiter=HighestDelim) # Return a generator of dictionaries with column labels as keys,
# filtering out empty rows
for rownum, tokens in enumerate(reader): for i, fields in enumerate(reader):
if rownum % 250 == 0: if i % 500 == 0:
print("CSV row: ", rownum) print("CSV: parsing row #%s..." % (i+1))
joined_tokens = "".join (tokens) if any(fields):
if Coords["row"]<0 and len( joined_tokens )>0 : yield dict(zip(headers, fields[first_col:]))
Coords["row"] = rownum
for columnum in range(len(tokens)):
t = tokens[columnum]
if len(t)>0:
Coords["column"] = columnum
break
# # = = = = [ / First data coordinate ] = = = = #
# # = = = = [ Setting Headers ] = = = = #
Headers_Int2Str = {}
reader = csv.reader(contents, delimiter=HighestDelim)
for rownum, tokens in enumerate(reader):
if rownum>=Coords["row"]:
for columnum in range( Coords["column"],len(tokens) ):
t = tokens[columnum]
Headers_Int2Str[columnum] = t
break
# print("Headers_Int2Str")
# print(Headers_Int2Str)
# # = = = = [ / Setting Headers ] = = = = #
# # OUTPUT example:
# # Headers_Int2Str = {
# # 0: 'publication_date',
# # 1: 'publication_month',
# # 2: 'publication_second',
# # 3: 'abstract'
# # }
# # = = = = [ Reading the whole CSV and saving ] = = = = #
hyperdata_list = []
reader = csv.reader(contents, delimiter=HighestDelim)
for rownum, tokens in enumerate(reader):
if rownum>Coords["row"]:
RecordDict = {}
for columnum in range( Coords["column"],len(tokens) ):
data = tokens[columnum]
RecordDict[ Headers_Int2Str[columnum] ] = data
if len(RecordDict.keys())>0:
hyperdata_list.append( RecordDict )
# # = = = = [ / Reading the whole CSV and saving ] = = = = #
return hyperdata_list
...@@ -11,25 +11,26 @@ from datetime import datetime ...@@ -11,25 +11,26 @@ from datetime import datetime
import json import json
class HalParser(Parser): class HalParser(Parser):
def _parse(self, json_docs):
def parse(self, filebuf):
'''
parse :: FileBuff -> [Hyperdata]
'''
contents = filebuf.read().decode("UTF-8")
data = json.loads(contents)
filebuf.close()
json_docs = data
hyperdata_list = [] hyperdata_list = []
hyperdata_path = { "id" : "isbn_s" hyperdata_path = { "id" : "docid"
, "title" : "title_s" , "title" : ["en_title_s", "title_s"]
, "abstract" : "abstract_s" , "abstract" : ["en_abstract_s", "abstract_s"]
, "source" : "journalPublisher_s" , "source" : "journalTitle_s"
, "url" : "uri_s" , "url" : "uri_s"
, "authors" : "authFullName_s" , "authors" : "authFullName_s"
, "isbn_s" : "isbn_s"
, "issue_s" : "issue_s"
, "language_s" : "language_s"
, "doiId_s" : "doiId_s"
, "authId_i" : "authId_i"
, "instStructId_i" : "instStructId_i"
, "deptStructId_i" : "deptStructId_i"
, "labStructId_i" : "labStructId_i"
, "rteamStructId_i" : "rteamStructId_i"
, "docType_s" : "docType_s"
} }
uris = set() uris = set()
...@@ -37,29 +38,32 @@ class HalParser(Parser): ...@@ -37,29 +38,32 @@ class HalParser(Parser):
for doc in json_docs: for doc in json_docs:
hyperdata = {} hyperdata = {}
for key, path in hyperdata_path.items(): for key, path in hyperdata_path.items():
field = doc.get(path, "NOT FOUND") # A path can be a field name or a sequence of field names
if isinstance(field, list): if isinstance(path, (list, tuple)):
hyperdata[key] = ", ".join(field) # Get first non-empty value of fields in path sequence, or None
else: field = next((x for x in (doc.get(p) for p in path) if x), None)
hyperdata[key] = field else:
# Get field value
field = doc.get(path)
if field is None:
field = "NOT FOUND"
if isinstance(field, list):
hyperdata[key] = ", ".join(map(str, field))
else:
hyperdata[key] = str(field)
if hyperdata["url"] in uris: if hyperdata["url"] in uris:
print("Document already parsed") print("Document already parsed")
else: else:
uris.add(hyperdata["url"]) uris.add(hyperdata["url"])
# hyperdata["authors"] = ", ".join(
# [ p.get("person", {})
# .get("name" , "")
#
# for p in doc.get("hasauthor", [])
# ]
# )
#
maybeDate = doc.get("submittedDate_s", None)
maybeDate = doc.get("submittedDate_s", None)
if maybeDate is not None: if maybeDate is not None:
date = datetime.strptime(maybeDate, "%Y-%m-%d %H:%M:%S") date = datetime.strptime(maybeDate, "%Y-%m-%d %H:%M:%S")
else: else:
...@@ -69,7 +73,17 @@ class HalParser(Parser): ...@@ -69,7 +73,17 @@ class HalParser(Parser):
hyperdata["publication_year"] = str(date.year) hyperdata["publication_year"] = str(date.year)
hyperdata["publication_month"] = str(date.month) hyperdata["publication_month"] = str(date.month)
hyperdata["publication_day"] = str(date.day) hyperdata["publication_day"] = str(date.day)
hyperdata_list.append(hyperdata) hyperdata_list.append(hyperdata)
return hyperdata_list return hyperdata_list
def parse(self, filebuf):
'''
parse :: FileBuff -> [Hyperdata]
'''
contents = filebuf.read().decode("UTF-8")
data = json.loads(contents)
return self._parse(data)
...@@ -81,44 +81,45 @@ def extract_ngrams(corpus, keys=DEFAULT_INDEX_FIELDS, do_subngrams = DEFAULT_IND ...@@ -81,44 +81,45 @@ def extract_ngrams(corpus, keys=DEFAULT_INDEX_FIELDS, do_subngrams = DEFAULT_IND
corpus.hyperdata["skipped_docs"].append(document.id) corpus.hyperdata["skipped_docs"].append(document.id)
corpus.save_hyperdata() corpus.save_hyperdata()
continue continue
else:
# ready ! # ready !
tagger = tagger_bots[language_iso2] tagger = tagger_bots[language_iso2]
# to do verify if document has no KEYS to index # to do verify if document has no KEYS to index
# eg: use set intersect (+ loop becomes direct! with no continue) # eg: use set intersect (+ loop becomes direct! with no continue)
for key in keys: for key in keys:
try: try:
value = document.hyperdata[str(key)] value = document.hyperdata[str(key)]
if not isinstance(value, str): if not isinstance(value, str):
#print("DBG wrong content in doc for key", key) #print("DBG wrong content in doc for key", key)
continue
# get ngrams
for ngram in tagger.extract(value):
tokens = tuple(normalize_forms(token[0]) for token in ngram)
if do_subngrams:
# ex tokens = ["very", "cool", "exemple"]
# subterms = [['very', 'cool'],...]
subterms = subsequences(tokens)
else:
subterms = [tokens]
for seqterm in subterms:
ngram = ' '.join(seqterm)
nbwords = len(seqterm)
nbchars = len(ngram)
if nbchars > 1:
if nbchars > 255:
# max ngram length (DB constraint)
ngram = ngram[:255]
# doc <=> ngram index
nodes_ngrams_count[(document.id, ngram)] += 1
# add fields : terms n
ngrams_data.add((ngram, nbwords, ))
except:
#value not in doc
continue continue
# get ngrams
for ngram in tagger.extract(value):
normal_forms = (normalize_forms(t[0]) for t in ngram)
tokens = tuple(nf for nf in normal_forms if nf)
if do_subngrams:
# ex tokens = ["very", "cool", "exemple"]
# subterms = [['very', 'cool'],...]
subterms = subsequences(tokens)
else:
subterms = [tokens]
for seqterm in subterms:
ngram = ' '.join(seqterm)
nbwords = len(seqterm)
nbchars = len(ngram)
if nbchars > 1:
if nbchars > 255:
# max ngram length (DB constraint)
ngram = ngram[:255]
# doc <=> ngram index
nodes_ngrams_count[(document.id, ngram)] += 1
# add fields : terms n
ngrams_data.add((ngram, nbwords, ))
except:
#value not in doc
continue
# integrate ngrams and nodes-ngrams # integrate ngrams and nodes-ngrams
if len(nodes_ngrams_count) >= BATCH_NGRAMSEXTRACTION_SIZE: if len(nodes_ngrams_count) >= BATCH_NGRAMSEXTRACTION_SIZE:
......
...@@ -155,7 +155,12 @@ class CSVLists(APIView): ...@@ -155,7 +155,12 @@ class CSVLists(APIView):
try: try:
# merge the source_lists onto those of the target corpus # merge the source_lists onto those of the target corpus
delete = todo_lists if bool(params.get('overwrite')) else [] delete = todo_lists if bool(params.get('overwrite')) else []
if len(delete) == len(list_types):
delete.append('groupings')
log_msg = merge_ngramlists(source_lists, onto_corpus=corpus_node, del_originals=delete) log_msg = merge_ngramlists(source_lists, onto_corpus=corpus_node, del_originals=delete)
return JsonHttpResponse({ return JsonHttpResponse({
'log': log_msg, 'log': log_msg,
}, 200) }, 200)
......
from django.conf.urls import url from django.conf.urls import url
from rest_framework_jwt.views import obtain_jwt_token
from . import nodes from . import nodes
from . import projects from . import projects
from . import corpora from . import corpora
...@@ -10,78 +12,81 @@ from . import ngramlists ...@@ -10,78 +12,81 @@ from . import ngramlists
from . import analytics from . import analytics
from graph.rest import Graph from graph.rest import Graph
urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view() ) urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view())
, url(r'^nodes/(\d+)$' , nodes.NodeResource.as_view() ) , url(r'^nodes/(\d+)$' , nodes.NodeResource.as_view())
, url(r'^nodes/(\d+)/having$' , nodes.NodeListHaving.as_view() ) , url(r'^nodes/(\d+)/having$' , nodes.NodeListHaving.as_view())
, url(r'^nodes/(\d+)/status$' , nodes.Status.as_view() ) , url(r'^nodes/(\d+)/status$' , nodes.Status.as_view())
#Projects
, url(r'^projects$' , projects.ProjectList.as_view() ) # Projects
, url(r'^projects/(\d+)$' , projects.ProjectView.as_view() ) , url(r'^projects$' , projects.ProjectList.as_view())
#?view=resource , url(r'^projects/(\d+)$' , projects.ProjectView.as_view())
#?view=docs
#Corpora # Corpora
, url(r'^projects/(\d+)/corpora/(\d+)$' , corpora.CorpusView.as_view() ) , url(r'^projects/(\d+)/corpora/(\d+)$', corpora.CorpusView.as_view())
#?view=source
#?view=title # Sources
#?view=analytics #, url(r'^projects/(\d+)/corpora/(\d+)/sources$', corpora.CorpusSources.as_view())
#Sources #, url(r'^projects/(\d+)/corpora/(\d+)/sources/(\d+)$ , corpora.CorpusSourceView.as_view())
#, url(r'^projects/(\d+)/corpora/(\d+)/sources$' , corpora.CorpusSources.as_view() )
#, url(r'^projects/(\d+)/corpora/(\d+)/sources/(\d+)$' , corpora.CorpusSourceView.as_view() ) # Facets
#Facets , url(r'^projects/(\d+)/corpora/(\d+)/facets$', nodes.CorpusFacet.as_view())
, url(r'^projects/(\d+)/corpora/(\d+)/facets$' , nodes.CorpusFacet.as_view() )
#Favorites # Favorites
, url(r'^projects/(\d+)/corpora/(\d+)/favorites$', nodes.CorpusFavorites.as_view() ) , url(r'^projects/(\d+)/corpora/(\d+)/favorites$', nodes.CorpusFavorites.as_view())
#Metrics
, url(r'^projects/(\d+)/corpora/(\d+)/metrics$', metrics.CorpusMetrics.as_view() ) # Metrics
#GraphExplorer , url(r'^projects/(\d+)/corpora/(\d+)/metrics$', metrics.CorpusMetrics.as_view())
, url(r'^projects/(\d+)/corpora/(\d+)/explorer$' , Graph.as_view())
# GraphExplorer
, url(r'^projects/(\d+)/corpora/(\d+)/explorer$', Graph.as_view())
# data for graph explorer (json) # data for graph explorer (json)
# GET /api/projects/43198/corpora/111107/explorer? # GET /api/projects/43198/corpora/111107/explorer?
# Corresponding view is : /projects/43198/corpora/111107/explorer? # Corresponding view is : /projects/43198/corpora/111107/explorer?
# Parameters (example): # Parameters (example):
# explorer?field1=ngrams&field2=ngrams&distance=conditional&bridgeness=5&start=1996-6-1&end=2002-10-5 # explorer?field1=ngrams&field2=ngrams&distance=conditional&bridgeness=5&start=1996-6-1&end=2002-10-5
# Ngrams # Ngrams
, url(r'^ngrams/?$' , ngrams.ApiNgrams.as_view() ) , url(r'^ngrams/?$' , ngrams.ApiNgrams.as_view())
# Analytics # Analytics
, url(r'^nodes/(\d+)/histories$', analytics.NodeNgramsQueries.as_view()) , url(r'^nodes/(\d+)/histories$', analytics.NodeNgramsQueries.as_view())
, url(r'hyperdata$' , analytics.ApiHyperdata.as_view() ) , url(r'hyperdata$' , analytics.ApiHyperdata.as_view())
# get a list of ngram_ids or ngram_infos by list_id # get a list of ngram_ids or ngram_infos by list_id
# url(r'^ngramlists/(\d+)$', ngramlists.List.as_view()), # url(r'^ngramlists/(\d+)$', ngramlists.List.as_view()),
, url(r'^nodes/(\d+)/facets$' , nodes.CorpusFacet.as_view() ) , url(r'^nodes/(\d+)/facets$' , nodes.CorpusFacet.as_view())
, url(r'^nodes/(\d+)/favorites$', nodes.CorpusFavorites.as_view() ) , url(r'^nodes/(\d+)/favorites$', nodes.CorpusFavorites.as_view())
# in these two routes the node is supposed to be a *corpus* node # in these two routes the node is supposed to be a *corpus* node
, url(r'^metrics/(\d+)$', metrics.CorpusMetrics.as_view() ) , url(r'^metrics/(\d+)$' , metrics.CorpusMetrics.as_view())
# update all metrics for a corpus # update all metrics for a corpus
# ex: PUT metrics/123 # ex: PUT metrics/123
# \ # \
# corpus id # corpus id
, url(r'^ngramlists/export$', ngramlists.CSVLists.as_view() ) , url(r'^ngramlists/export$', ngramlists.CSVLists.as_view())
# get a CSV export of the ngramlists of a corpus # get a CSV export of the ngramlists of a corpus
# ex: GET ngramlists/export?corpus=43 # ex: GET ngramlists/export?corpus=43
# TODO : unify to a /api/ngrams?formatted=csv # TODO : unify to a /api/ngrams?formatted=csv
# (similar to /api/nodes?formatted=csv) # (similar to /api/nodes?formatted=csv)
, url(r'^ngramlists/import$', ngramlists.CSVLists.as_view() ) , url(r'^ngramlists/import$', ngramlists.CSVLists.as_view())
# same handling class as export (CSVLists) # same handling class as export (CSVLists)
# but this route used only for POST + file # but this route used only for POST + file
# or PATCH + other corpus id # or PATCH + other corpus id
, url(r'^ngramlists/change$', ngramlists.ListChange.as_view() ) , url(r'^ngramlists/change$', ngramlists.ListChange.as_view())
# add or remove ngram from a list # add or remove ngram from a list
# ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2 # ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2
# rm <=> DEL ngramlists/change?list=42&ngrams=1,2 # rm <=> DEL ngramlists/change?list=42&ngrams=1,2
, url(r'^ngramlists/groups$', ngramlists.GroupChange.as_view() ) , url(r'^ngramlists/groups$', ngramlists.GroupChange.as_view())
# modify grouping couples of a group node # modify grouping couples of a group node
# ex: PUT/DEL ngramlists/groups?node=43 # ex: PUT/DEL ngramlists/groups?node=43
# & group data also in url: 767[]=209,640 & 779[]=436,265,385 # & group data also in url: 767[]=209,640 & 779[]=436,265,385
, url(r'^ngramlists/family$' , ngramlists.ListFamily.as_view() ) , url(r'^ngramlists/family$', ngramlists.ListFamily.as_view())
# entire combination of lists from a corpus, dedicated to termtable # entire combination of lists from a corpus, dedicated to termtable
# (or any combination of lists that go together : # (or any combination of lists that go together :
# - a mainlist # - a mainlist
...@@ -89,8 +94,11 @@ urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view() ...@@ -89,8 +94,11 @@ urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view()
# - an optional maplist # - an optional maplist
# - an optional grouplist # - an optional grouplist
, url(r'^ngramlists/maplist$' , ngramlists.MapListGlance.as_view() ) , url(r'^ngramlists/maplist$', ngramlists.MapListGlance.as_view())
# fast access to maplist, similarly formatted for termtable # fast access to maplist, similarly formatted for termtable
, url(r'^user/parameters/$', users.UserParameters.as_view())
, url(r'^user/parameters/$', users.UserParameters.as_view())
, url('^auth/token$', obtain_jwt_token)
] ]
...@@ -11,6 +11,7 @@ django-celery==3.2.1 ...@@ -11,6 +11,7 @@ django-celery==3.2.1
django-pgfields==1.4.4 django-pgfields==1.4.4
django-pgjsonb==0.0.23 django-pgjsonb==0.0.23
djangorestframework==3.5.3 djangorestframework==3.5.3
djangorestframework-jwt==1.9.0
html5lib==0.9999999 html5lib==0.9999999
python-igraph>=0.7.1 python-igraph>=0.7.1
jdatetime==1.7.2 jdatetime==1.7.2
......
...@@ -16,7 +16,7 @@ sudo docker run \ ...@@ -16,7 +16,7 @@ sudo docker run \
--env POSTGRES_HOST=localhost \ --env POSTGRES_HOST=localhost \
-v /srv/gargantext:/srv/gargantext \ -v /srv/gargantext:/srv/gargantext \
-it garg-notebook:latest \ -it garg-notebook:latest \
/bin/bash -c "/bin/su notebooks -c 'source /env_3-5/bin/activate && cd /srv/gargantext/ && jupyter notebook --port=8899 --ip=0.0.0.0 --no-browser'" /bin/bash -c "/bin/su notebooks -c 'source /env_3-5/bin/activate && cd /home/notebooks && jupyter notebook --port=8899 --ip=0.0.0.0 --no-browser'"
# #&& jupyter nbextension enable --py widgetsnbextension --sys-prefix # #&& jupyter nbextension enable --py widgetsnbextension --sys-prefix
#/bin/bash -c "/bin/su notebooks -c 'source /env_3-5/bin/activate && cd /srv/gargantext/ && jupyter notebook --port=8899 --ip=0.0.0.0 --no-browser --notebook-dir=/home/notebooks/'" #/bin/bash -c "/bin/su notebooks -c 'source /env_3-5/bin/activate && cd /srv/gargantext/ && jupyter notebook --port=8899 --ip=0.0.0.0 --no-browser --notebook-dir=/home/notebooks/'"
......
...@@ -78,32 +78,8 @@ RUN . /env_3-5/bin/activate && pip3 install -r requirements.txt ...@@ -78,32 +78,8 @@ RUN . /env_3-5/bin/activate && pip3 install -r requirements.txt
#RUN ./psql_configure.sh #RUN ./psql_configure.sh
#RUN ./django_configure.sh #RUN ./django_configure.sh
RUN chown notebooks:notebooks -R /env_3-5 RUN chown notebooks:notebooks -R /env_3-5
########################################################################
### Notebook IHaskell and IPYTHON ENVIRONNEMENT
########################################################################
#RUN apt-get update && apt-get install -y \
# libtinfo-dev \
# libzmq3-dev \
# libcairo2-dev \
# libpango1.0-dev \
# libmagic-dev \
# libblas-dev \
# liblapack-dev
#RUN curl -sSL https://get.haskellstack.org/ | sh
#RUN stack setup
#RUN git clone https://github.com/gibiansky/IHaskell
#RUN . /env_3-5/bin/activate \
# && cd IHaskell \
# && stack install gtk2hs-buildtools \
# && stack install --fast \
# && /root/.local/bin/ihaskell install --stack
#
#
######################################################################## ########################################################################
### POSTGRESQL DATA (as ROOT) ### POSTGRESQL DATA (as ROOT)
######################################################################## ########################################################################
...@@ -115,3 +91,32 @@ RUN chown notebooks:notebooks -R /env_3-5 ...@@ -115,3 +91,32 @@ RUN chown notebooks:notebooks -R /env_3-5
EXPOSE 5432 8899 EXPOSE 5432 8899
VOLUME ["/srv/","/home/notebooks/"] VOLUME ["/srv/","/home/notebooks/"]
########################################################################
### Notebook IHaskell and IPYTHON ENVIRONNEMENT
########################################################################
RUN apt-get update && apt-get install -y \
libtinfo-dev \
libzmq3-dev \
libcairo2-dev \
libpango1.0-dev \
libmagic-dev \
libblas-dev \
liblapack-dev
USER notebooks
RUN cd /home/notebooks \
&& curl -sSL https://get.haskellstack.org/ | sh \
&& stack setup \
&& git clone https://github.com/gibiansky/IHaskell \
&& . /env_3-5/bin/activate \
&& cd IHaskell \
&& stack install gtk2hs-buildtools \
&& stack install --fast \
&& /root/.local/bin/ihaskell install --stack
#!/usr/bin/env python
""" """
Gargantext Software Copyright (c) 2016-2017 CNRS ISC-PIF - Gargantext Software Copyright (c) 2016-2017 CNRS ISC-PIF -
http://iscpif.fr http://iscpif.fr
...@@ -6,45 +7,33 @@ http://gitlab.iscpif.fr/humanities/gargantext/blob/stable/LICENSE ) ...@@ -6,45 +7,33 @@ http://gitlab.iscpif.fr/humanities/gargantext/blob/stable/LICENSE )
- In France : a CECILL variant affero compliant - In France : a CECILL variant affero compliant
- GNU aGPLV3 for all other countries - GNU aGPLV3 for all other countries
""" """
#!/usr/bin/env python
import sys
import os
import os
import django
# Django settings os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'gargantext.settings')
dirname = os.path.dirname(os.path.realpath(__file__)) django.setup()
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext.settings")
# initialize Django application from gargantext.constants import QUERY_SIZE_N_MAX, get_resource, get_resource_by_name
from django.core.wsgi import get_wsgi_application from gargantext.models import ProjectNode, DocumentNode
application = get_wsgi_application() from gargantext.util.db import session, get_engine
from collections import Counter
import importlib
from django.http import Http404
from gargantext.util.toolchain.main import parse_extract_indexhyperdata # Import those to be available by notebook user
from gargantext.util.db import * from langdetect import detect as detect_lang
from gargantext.models import Node from gargantext.models import UserNode, User
from nltk.tokenize import wordpunct_tokenize
from gargantext.models import * class NotebookError(Exception):
from nltk.tokenize import word_tokenize pass
import nltk as nltk
from statistics import mean
from math import log
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
import datetime
from collections import Counter
from langdetect import detect as detect_lang
def documents(corpus_id): def documents(corpus_id):
return (session.query(Node).filter( Node.parent_id==corpus_id return (session.query(DocumentNode).filter_by(parent_id=corpus_id)
, Node.typename=="DOCUMENT" #.order_by(Node.hyperdata['publication_date'])
) .all())
# .order_by(Node.hyperdata['publication_date'])
.all()
)
#import seaborn as sns #import seaborn as sns
...@@ -63,13 +52,134 @@ def scan_hal(request): ...@@ -63,13 +52,134 @@ def scan_hal(request):
hal = HalCrawler() hal = HalCrawler()
return hal.scan_results(request) return hal.scan_results(request)
def scan_gargantext(corpus_id, lang, request): def scan_gargantext(corpus_id, lang, request):
connection = get_engine().connect() connection = get_engine().connect()
# TODO add some sugar the request (ideally request should be the same for hal and garg) # TODO add some sugar the request (ideally request should be the same for hal and garg)
query = """select count(n.id) from nodes n query = """select count(n.id) from nodes n
where to_tsvector('%s', hyperdata ->> 'abstract' || 'title') where to_tsvector('%s', hyperdata ->> 'abstract' || 'title')
@@ to_tsquery('%s') @@ to_tsquery('%s')
AND n.parent_id = %s;""" % (lang, request, corpus_id) AND n.parent_id = %s;""" % (lang, request, corpus_id)
return [i for i in connection.execute(query)][0][0] return [i for i in connection.execute(query)][0][0]
connection.close() connection.close()
def myProject_fromUrl(url):
"""
myProject :: String -> Project
"""
project_id = url.split("/")[4]
project = session.query(ProjectNode).get(project_id)
return project
def newCorpus(project, source, name=None, query=None):
error = False
if name is None:
name = query
if not isinstance(project, ProjectNode):
error = "a valid project"
if not isinstance(source, int) and not isinstance(source, str):
error = "a valid source identifier: id or name"
elif not isinstance(query, str):
error = "a valid query"
elif not isinstance(name, str):
error = "a valid name"
if error:
raise NotebookError("Please provide %s." % error)
resource = get_resource(source) if isinstance(source, int) else \
get_resource_by_name(source)
moissonneur_name = get_moissonneur_name(resource) if resource else \
source.lower()
try:
moissonneur = get_moissonneur(moissonneur_name)
except ImportError:
raise NotebookError("Invalid source identifier: %r" % source)
return run_moissonneur(moissonneur, project, name, query)
def get_moissonneur_name(ident):
""" Return moissonneur module name from RESOURCETYPE or crawler name """
# Does it quacks like a RESOURCETYPE ?
if hasattr(ident, 'get'):
ident = ident.get('crawler')
# Extract name from crawler class name, otherwise assume ident is already
# a moissonneur name.
if isinstance(ident, str) and ident.endswith('Crawler'):
return ident[:-len('Crawler')].lower()
def get_moissonneur(name):
""" Return moissonneur module from its name """
if not isinstance(name, str) or not name.islower():
raise NotebookError("Invalid moissonneur name: %r" % name)
module = importlib.import_module('moissonneurs.%s' % name)
module.name = name
return module
def run_moissonneur(moissonneur, project, name, query):
""" Run moissonneur and return resulting corpus """
# XXX Uber-kludge with gory details. Spaghetti rulezzzzz!
class Dummy(object):
pass
request = Dummy()
request.method = 'POST'
request.path = 'nowhere'
request.META = {}
# XXX 'string' only have effect on moissonneurs.pubmed; its value is added
# when processing request client-side, take a deep breath and see
# templates/projects/project.html for more details.
request.POST = {'string': name,
'query': query,
'N': QUERY_SIZE_N_MAX}
request.user = Dummy()
request.user.id = project.user_id
request.user.is_authenticated = lambda: True
if moissonneur.name == 'istex':
# Replace ALL spaces by plus signs
request.POST['query'] = '+'.join(filter(None, query.split(' ')))
try:
import json
r = moissonneur.query(request)
raw_json = r.content.decode('utf-8')
data = json.loads(raw_json)
if moissonneur.name == 'pubmed':
count = sum(x['count'] for x in data)
request.POST['query'] = raw_json
elif moissonneur.name == 'istex':
count = data.get('total', 0)
else:
count = data.get('results_nb', 0)
if count > 0:
corpus = moissonneur.save(request, project.id, return_corpus=True)
else:
return None
except (ValueError, Http404) as e:
raise e
# Sometimes strange things happens...
if corpus.name != name:
corpus.name = name
session.commit()
return corpus
...@@ -30,7 +30,7 @@ def query( request): ...@@ -30,7 +30,7 @@ def query( request):
#ids = crawlerbot.get_ids(query) #ids = crawlerbot.get_ids(query)
return JsonHttpResponse({"results_nb":crawlerbot.results_nb}) return JsonHttpResponse({"results_nb":crawlerbot.results_nb})
def save(request, project_id): def save(request, project_id, return_corpus=False):
'''save''' '''save'''
if request.method == "POST": if request.method == "POST":
...@@ -101,6 +101,9 @@ def save(request, project_id): ...@@ -101,6 +101,9 @@ def save(request, project_id):
session.rollback() session.rollback()
# -------------------------------------------- # --------------------------------------------
if return_corpus:
return corpus
return render( return render(
template_name = 'pages/projects/wait.html', template_name = 'pages/projects/wait.html',
request = request, request = request,
......
...@@ -33,7 +33,7 @@ def query( request): ...@@ -33,7 +33,7 @@ def query( request):
print(results) print(results)
return JsonHttpResponse({"results_nb":crawlerbot.results_nb}) return JsonHttpResponse({"results_nb":crawlerbot.results_nb})
def save(request, project_id): def save(request, project_id, return_corpus=False):
'''save''' '''save'''
if request.method == "POST": if request.method == "POST":
...@@ -103,6 +103,9 @@ def save(request, project_id): ...@@ -103,6 +103,9 @@ def save(request, project_id):
session.rollback() session.rollback()
# -------------------------------------------- # --------------------------------------------
if return_corpus:
return corpus
return render( return render(
template_name = 'pages/projects/wait.html', template_name = 'pages/projects/wait.html',
request = request, request = request,
......
...@@ -29,7 +29,7 @@ def query( request): ...@@ -29,7 +29,7 @@ def query( request):
#ids = crawlerbot.get_ids(query) #ids = crawlerbot.get_ids(query)
return JsonHttpResponse({"results_nb":crawlerbot.results_nb}) return JsonHttpResponse({"results_nb":crawlerbot.results_nb})
def save(request, project_id): def save(request, project_id, return_corpus=False):
'''save''' '''save'''
if request.method == "POST": if request.method == "POST":
...@@ -100,6 +100,9 @@ def save(request, project_id): ...@@ -100,6 +100,9 @@ def save(request, project_id):
session.rollback() session.rollback()
# -------------------------------------------- # --------------------------------------------
if return_corpus:
return corpus
return render( return render(
template_name = 'pages/projects/wait.html', template_name = 'pages/projects/wait.html',
request = request, request = request,
......
...@@ -52,7 +52,7 @@ def query( request ): ...@@ -52,7 +52,7 @@ def query( request ):
def save(request , project_id): def save(request , project_id, return_corpus=False):
print("testISTEX:") print("testISTEX:")
print(request.method) print(request.method)
alist = ["bar","foo"] alist = ["bar","foo"]
...@@ -171,6 +171,9 @@ def save(request , project_id): ...@@ -171,6 +171,9 @@ def save(request , project_id):
session.rollback() session.rollback()
# -------------------------------------------- # --------------------------------------------
if return_corpus:
return corpus
return render( return render(
template_name = 'pages/projects/wait.html', template_name = 'pages/projects/wait.html',
request = request, request = request,
......
...@@ -33,7 +33,7 @@ def query( request): ...@@ -33,7 +33,7 @@ def query( request):
print(results) print(results)
return JsonHttpResponse({"results_nb":crawlerbot.results_nb}) return JsonHttpResponse({"results_nb":crawlerbot.results_nb})
def save(request, project_id): def save(request, project_id, return_corpus=False):
'''save''' '''save'''
if request.method == "POST": if request.method == "POST":
...@@ -104,6 +104,9 @@ def save(request, project_id): ...@@ -104,6 +104,9 @@ def save(request, project_id):
session.rollback() session.rollback()
# -------------------------------------------- # --------------------------------------------
if return_corpus:
return corpus
return render( return render(
template_name = 'pages/projects/wait.html', template_name = 'pages/projects/wait.html',
request = request, request = request,
......
...@@ -69,7 +69,7 @@ def query( request ): ...@@ -69,7 +69,7 @@ def query( request ):
return JsonHttpResponse(data) return JsonHttpResponse(data)
def save( request , project_id ) : def save( request , project_id, return_corpus=False ) :
# implicit global session # implicit global session
# do we have a valid project id? # do we have a valid project id?
try: try:
...@@ -164,6 +164,10 @@ def save( request , project_id ) : ...@@ -164,6 +164,10 @@ def save( request , project_id ) :
session.rollback() session.rollback()
# -------------------------------------------- # --------------------------------------------
sleep(1) sleep(1)
if return_corpus:
return corpus
return HttpResponseRedirect('/projects/' + str(project_id)) return HttpResponseRedirect('/projects/' + str(project_id))
data = alist data = alist
......
...@@ -2,11 +2,38 @@ ...@@ -2,11 +2,38 @@
"cells": [ "cells": [
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"# Advanced Gargantext Tutorial (Python)" "# Advanced Gargantext Tutorial (Python)"
] ]
}, },
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "TypeError",
"evalue": "'list' object is not callable",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-3-a8e3501c9a54>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'/srv/gargantext'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m: 'list' object is not callable"
]
}
],
"source": [
"import sys\n",
"sys.pa"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 1,
...@@ -28,7 +55,9 @@ ...@@ -28,7 +55,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 8,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [ "outputs": [
{ {
......
This diff is collapsed.
This diff is collapsed.
...@@ -203,6 +203,7 @@ ...@@ -203,6 +203,7 @@
// do something… // do something…
resetStatusForm("#createForm"); resetStatusForm("#createForm");
}) })
return false;
}) })
......
...@@ -57,7 +57,7 @@ ...@@ -57,7 +57,7 @@
<center id="corpus" class="help"> <center id="corpus" class="help">
<a data-toggle="modal" href="#addcorpus" > <a data-toggle="modal" href="#addcorpus" >
<button <button
type="button" type="button"
...@@ -440,11 +440,12 @@ ...@@ -440,11 +440,12 @@
// in the form "Add a corpus" // in the form "Add a corpus"
var type = $("#id_type").val() var type = $("#id_type").val()
var file = $("#id_file").val()
// 5 booleans // 5 booleans
var nameField = $("#id_name").val() != "" var nameField = $("#id_name").val() != ""
var typeField = (type != "") && (type != "0") var typeField = (type != "") && (type != "0")
var fileField = $("#id_file").val() != "" var fileField = file != ""
var wantfileField = $("#file_yes").prop("checked") var wantfileField = $("#file_yes").prop("checked")
var crawling = ((type==3)||(type==8)||(type==9)) && ! wantfileField var crawling = ((type==3)||(type==8)||(type==9)) && ! wantfileField
...@@ -457,6 +458,23 @@ ...@@ -457,6 +458,23 @@
if (! crawling) { if (! crawling) {
$("#submit_thing").prop('disabled' , !(nameField && typeField && fileField)) $("#submit_thing").prop('disabled' , !(nameField && typeField && fileField))
} }
// Automatically select CSV when type is undefined
// and we have a .csv file
if (!typeField && file && file.match(/.csv$/i)) {
// Get CSV type id
var csv = $('#id_type > option')
.filter(function() {
return $(this).text() === 'CSV'
})
.attr('value')
// Select CSV type
$('#id_type').val(csv)
// Focus on name field
setTimeout(function() {
$("#id_name").focus()
})
}
} }
function bringDaNoise() { function bringDaNoise() {
...@@ -532,7 +550,7 @@ ...@@ -532,7 +550,7 @@
$("#submit_thing").html("Process a {{ query_size }} sample!") $("#submit_thing").html("Process a {{ query_size }} sample!")
thequeries = data thequeries = data
var N=0,k=0; var N=0;
for(var i in thequeries) N += thequeries[i].count for(var i in thequeries) N += thequeries[i].count
if( N>0) { if( N>0) {
...@@ -571,12 +589,11 @@ ...@@ -571,12 +589,11 @@
$("#submit_thing").html("Process a {{ query_size }} sample!") $("#submit_thing").html("Process a {{ query_size }} sample!")
thequeries = data thequeries = data
var N=data.length,k=0; var N = data.total;
// for(var i in thequeries) N += thequeries[i].count
if( N>1) { if (N > 0) {
var total = JSON.parse(data).total console.log("N: "+N)
console.log("N: "+total) $("#theresults").html("<i> <b>"+pubmedquery+"</b>: "+N+" publications.</i><br>")
$("#theresults").html("<i> <b>"+pubmedquery+"</b>: "+total+" publications.</i><br>")
$('#submit_thing').prop('disabled', false); $('#submit_thing').prop('disabled', false);
} else { } else {
$("#theresults").html("<i> <b>"+data[0]+"</b></i><br>") $("#theresults").html("<i> <b>"+data[0]+"</b></i><br>")
...@@ -661,7 +678,7 @@ ...@@ -661,7 +678,7 @@
console.log(data) console.log(data)
console.log("SUCCESS") console.log("SUCCESS")
console.log("enabling "+"#"+value.id) console.log("enabling "+"#"+value.id)
// $("#"+value.id).attr('onclick','getGlobalResults(this);'); // $("#"+value.id).attr('onclick','getGlobalResults(this);');
$("#submit_thing").prop('disabled' , false) $("#submit_thing").prop('disabled' , false)
//$("#submit_thing").html("Process a {{ query_size }} sample!") //$("#submit_thing").html("Process a {{ query_size }} sample!")
...@@ -721,7 +738,7 @@ ...@@ -721,7 +738,7 @@
console.log(data) console.log(data)
console.log("SUCCESS") console.log("SUCCESS")
console.log("enabling "+"#"+value.id) console.log("enabling "+"#"+value.id)
// $("#"+value.id).attr('onclick','getGlobalResults(this);'); // $("#"+value.id).attr('onclick','getGlobalResults(this);');
$("#submit_thing").prop('disabled' , false) $("#submit_thing").prop('disabled' , false)
//$("#submit_thing").html("Process a {{ query_size }} sample!") //$("#submit_thing").html("Process a {{ query_size }} sample!")
...@@ -781,7 +798,7 @@ ...@@ -781,7 +798,7 @@
console.log(data) console.log(data)
console.log("SUCCESS") console.log("SUCCESS")
console.log("enabling "+"#"+value.id) console.log("enabling "+"#"+value.id)
// $("#"+value.id).attr('onclick','getGlobalResults(this);'); // $("#"+value.id).attr('onclick','getGlobalResults(this);');
$("#submit_thing").prop('disabled' , false) $("#submit_thing").prop('disabled' , false)
//$("#submit_thing").html("Process a {{ query_size }} sample!") //$("#submit_thing").html("Process a {{ query_size }} sample!")
...@@ -876,12 +893,12 @@ ...@@ -876,12 +893,12 @@
console.log("selected:", selectedId); console.log("selected:", selectedId);
// by typeID: 3 = PUBMED, 8 = ISTEX, 9 = CERN // by typeID: 3 = PUBMED, 8 = ISTEX, 9 = CERN
if ( selectedId == "3" if ( selectedId == "3"
|| selectedId == "8" || selectedId == "8"
|| selectedId == "9" || selectedId == "9"
|| selectedId == "10" || selectedId == "10"
|| selectedId == "11" || selectedId == "11"
|| selectedId == "12" || selectedId == "12"
) { ) {
console.log("show the button for: " + selectedId) console.log("show the button for: " + selectedId)
$("#div-fileornot").css("visibility", "visible"); $("#div-fileornot").css("visibility", "visible");
...@@ -1019,16 +1036,16 @@ ...@@ -1019,16 +1036,16 @@
function saveMultivac(query, N){ function saveMultivac(query, N){
console.log("In Multivac") console.log("In Multivac")
if(!query || query=="") return; if(!query || query=="") return;
console.log(query) console.log(query)
//var origQuery = query //var origQuery = query
var data = { "query" : query , "N": N }; var data = { "query" : query , "N": N };
// Replace all the slashes // Replace all the slashes
var projectid = window.location.href.split("projects")[1].replace(/\//g, '') var projectid = window.location.href.split("projects")[1].replace(/\//g, '')
console.log(data) console.log(data)
$.ajax({ $.ajax({
dataType: 'json', dataType: 'json',
...@@ -1066,16 +1083,16 @@ ...@@ -1066,16 +1083,16 @@
function save(query, N, urlGarg){ function save(query, N, urlGarg){
console.log("In Gargantext") console.log("In Gargantext")
if(!query || query=="") return; if(!query || query=="") return;
console.log(query) console.log(query)
//var origQuery = query //var origQuery = query
var data = { "query" : query , "N": N }; var data = { "query" : query , "N": N };
// Replace all the slashes // Replace all the slashes
var projectid = window.location.href.split("projects")[1].replace(/\//g, '') var projectid = window.location.href.split("projects")[1].replace(/\//g, '')
console.log(data) console.log(data)
$.ajax({ $.ajax({
dataType: 'json', dataType: 'json',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment