Commit c7e81064 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'testing-merge' into stable-imt-merge

parents 5a9f3b3a aa325e73
...@@ -14,7 +14,7 @@ TELL ALEMBIC TO NOT START FROM SCRATCH ...@@ -14,7 +14,7 @@ TELL ALEMBIC TO NOT START FROM SCRATCH
# "upgrade head" command. If you don't want to do this, you can of course # "upgrade head" command. If you don't want to do this, you can of course
# drop your database and really start from scratch. # drop your database and really start from scratch.
alembic stamp 601e9d9baa4c alembic stamp bedce47c9e34
UPGRADE TO LATEST DATABASE VERSION UPGRADE TO LATEST DATABASE VERSION
......
...@@ -7,7 +7,7 @@ Create Date: 2017-07-06 10:52:16.161118 ...@@ -7,7 +7,7 @@ Create Date: 2017-07-06 10:52:16.161118
""" """
from alembic import op from alembic import op
import sqlalchemy as sa import sqlalchemy as sa
from gargantext.tools.alembic import ReplaceableObject from gargantext.util.alembic import ReplaceableObject
# revision identifiers, used by Alembic. # revision identifiers, used by Alembic.
......
"""Fix issue with Node.hyperdata index
Revision ID: bedce47c9e34
Revises: 08230100f262
Create Date: 2017-07-10 11:30:59.168190
"""
from alembic import op
import sqlalchemy as sa
import gargantext
# revision identifiers, used by Alembic.
revision = 'bedce47c9e34'
down_revision = '08230100f262'
branch_labels = None
depends_on = None
def upgrade():
op.drop_index('nodes_hyperdata_idx', table_name='nodes')
op.create_index('nodes_hyperdata_idx', 'nodes', ['hyperdata'], unique=False, postgresql_using="gin")
def downgrade():
# We won't unfix the bug when downgrading...
pass
...@@ -36,7 +36,7 @@ import os ...@@ -36,7 +36,7 @@ import os
import re import re
import importlib import importlib
from gargantext.util.lists import * from gargantext.util.lists import *
from gargantext.util.tools import datetime, convert_to_date from gargantext.util import datetime, convert_to_datetime
from .settings import BASE_DIR from .settings import BASE_DIR
# types & models (nodes, lists, hyperdata, resource) --------------------------------------------- # types & models (nodes, lists, hyperdata, resource) ---------------------------------------------
...@@ -108,9 +108,9 @@ INDEXED_HYPERDATA = { ...@@ -108,9 +108,9 @@ INDEXED_HYPERDATA = {
'publication_date': 'publication_date':
{ 'id' : 2 { 'id' : 2
, 'type' : datetime.datetime , 'type' : datetime
, 'convert_to_db' : convert_to_date , 'convert_to_db' : convert_to_datetime
, 'convert_from_db': datetime.datetime.fromtimestamp , 'convert_from_db': convert_to_datetime
}, },
'title': 'title':
......
from django.core.management.base import BaseCommand, CommandError from django.core.management.base import BaseCommand, CommandError
from gargantext.tools.show_nodes import tree_show, nodes from gargantext.util.show_nodes import tree_show, nodes
import colorama import colorama
......
from django.core.management.base import BaseCommand, CommandError
from gargantext.models import Node
class Command(BaseCommand):
help = 'Something'
def handle(self, *args, **options):
self.stdout.write(self.style.SUCCESS('Oh yeah!'))
...@@ -58,26 +58,26 @@ class Node(Base): ...@@ -58,26 +58,26 @@ class Node(Base):
__tablename__ = 'nodes' __tablename__ = 'nodes'
__table_args__ = ( __table_args__ = (
Index('nodes_user_id_typename_parent_id_idx', 'user_id', 'typename', 'parent_id'), Index('nodes_user_id_typename_parent_id_idx', 'user_id', 'typename', 'parent_id'),
Index('nodes_hyperdata_idx', 'hyperdata')) Index('nodes_hyperdata_idx', 'hyperdata', postgresql_using='gin'))
# TODO # TODO
# create INDEX full_text_idx on nodes using gin(to_tsvector('english', hyperdata ->> 'abstract' || 'title')); # create INDEX full_text_idx on nodes using gin(to_tsvector('english', hyperdata ->> 'abstract' || 'title'));
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
typename = Column(NodeType, index=True) typename = Column(NodeType, index=True)
__mapper_args__ = { 'polymorphic_on': typename } __mapper_args__ = { 'polymorphic_on': typename }
# foreign keys # foreign keys
user_id = Column(Integer, ForeignKey(User.id, ondelete='CASCADE')) user_id = Column(Integer, ForeignKey(User.id, ondelete='CASCADE'))
user = relationship(User) user = relationship(User)
parent_id = Column(Integer, ForeignKey('nodes.id', ondelete='CASCADE')) parent_id = Column(Integer, ForeignKey('nodes.id', ondelete='CASCADE'))
parent = relationship('Node', remote_side=[id]) parent = relationship('Node', remote_side=[id])
name = Column(String(255)) name = Column(String(255))
date = Column(DateTime(timezone=True), default=datetime.now) date = Column(DateTime(timezone=True), default=datetime.now)
hyperdata = Column(JSONB, default=dict) hyperdata = Column(JSONB, default=dict)
# metadata (see https://bashelton.com/2014/03/updating-postgresql-json-fields-via-sqlalchemy/) # metadata (see https://bashelton.com/2014/03/updating-postgresql-json-fields-via-sqlalchemy/)
# To make search possible uncomment the line below # To make search possible uncomment the line below
......
from .dates import datetime, convert_to_datetime, MINYEAR
import os
from gargantext.settings import MEDIA_ROOT
from datetime import MINYEAR
from django.utils.dateparse import parse_datetime
from django.utils.timezone import datetime as _datetime, utc as UTC, now as utcnow
__all__ = ['convert_to_datetime', 'datetime', 'MINYEAR']
class datetime(_datetime):
@staticmethod
def now():
return utcnow()
@staticmethod
def utcfromtimestamp(ts):
return _datetime.utcfromtimestamp(ts).replace(tzinfo=UTC)
@staticmethod
def parse(s):
dt = parse_datetime(s)
return dt.astimezone(UTC) if dt.tzinfo else dt.replace(tzinfo=UTC)
def convert_to_datetime(dt):
if isinstance(dt, (int, float)):
return datetime.utcfromtimestamp(dt)
elif isinstance(dt, str):
return datetime.parse(dt)
elif isinstance(dt, _datetime):
args = (dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)
return datetime(*args, tzinfo=dt.tzinfo or UTC).astimezone(UTC)
else:
raise ValueError("Can't convert to datetime: %r" % dt)
...@@ -29,6 +29,7 @@ class ModelCache(dict): ...@@ -29,6 +29,7 @@ class ModelCache(dict):
continue continue
if formatted_key in self: if formatted_key in self:
self[key] = self[formatted_key] self[key] = self[formatted_key]
element = self[key]
else: else:
element = session.query(self._model).filter(or_(*conditions)).first() element = session.query(self._model).filter(or_(*conditions)).first()
if element is None: if element is None:
......
...@@ -461,6 +461,7 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM, ...@@ -461,6 +461,7 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM,
raise ValueError('Wrong header "%s" on line %i (only possible headers are "label", "forms" and "status")' % (colname, n_read_lines)) raise ValueError('Wrong header "%s" on line %i (only possible headers are "label", "forms" and "status")' % (colname, n_read_lines))
if 'label' not in columns: if 'label' not in columns:
raise ValueError('CSV must contain at least one column with the header "label"') raise ValueError('CSV must contain at least one column with the header "label"')
continue
if not len(csv_row): if not len(csv_row):
continue continue
...@@ -567,7 +568,8 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM, ...@@ -567,7 +568,8 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM,
'map': UnweightedList(), 'map': UnweightedList(),
'main': UnweightedList(), 'main': UnweightedList(),
'stop': UnweightedList(), 'stop': UnweightedList(),
'groupings' : Translations() 'groupings' : Translations(),
'new_ngram_count': n_added_ng,
} }
for list_type in imported_nodes_ngrams: for list_type in imported_nodes_ngrams:
...@@ -663,12 +665,13 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]): ...@@ -663,12 +665,13 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
for ng_id in new_lists[list_type].items: for ng_id in new_lists[list_type].items:
collect(ng_id) collect(ng_id)
from gargantext.util.toolchain.main import t if new_lists.get('new_ngram_count', 0) > 0:
print("MERGE DEBUG: starting index_new_ngrams", t()) from gargantext.util.toolchain.main import t
n_added = index_new_ngrams(all_possibly_new_ngram_ids, onto_corpus) print("MERGE DEBUG: starting index_new_ngrams", t())
print("MERGE DEBUG: finished index_new_ngrams", t()) n_added = index_new_ngrams(all_possibly_new_ngram_ids, onto_corpus)
print("MERGE DEBUG: finished index_new_ngrams", t())
my_log.append("MERGE: added %i new ngram occurrences in docs" % n_added) my_log.append("MERGE: added %i new ngram occurrences in docs" % n_added)
# ======== Get the old lists ========= # ======== Get the old lists =========
old_lists = {} old_lists = {}
...@@ -827,7 +830,7 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]): ...@@ -827,7 +830,7 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
@shared_task @shared_task
def import_and_merge_ngramlists(file_contents, onto_corpus_id): def import_and_merge_ngramlists(file_contents, onto_corpus_id, overwrite=False):
""" """
A single function to run import_ngramlists and merge_ngramlists together A single function to run import_ngramlists and merge_ngramlists together
""" """
...@@ -837,6 +840,7 @@ def import_and_merge_ngramlists(file_contents, onto_corpus_id): ...@@ -837,6 +840,7 @@ def import_and_merge_ngramlists(file_contents, onto_corpus_id):
corpus_node = session.query(Node).filter(Node.id == onto_corpus_id).first() corpus_node = session.query(Node).filter(Node.id == onto_corpus_id).first()
# merge the new_lists onto those of the target corpus # merge the new_lists onto those of the target corpus
log_msg = merge_ngramlists(new_lists, onto_corpus=corpus_node) del_originals = ['stop', 'main', 'map'] if overwrite else []
log_msg = merge_ngramlists(new_lists, onto_corpus=corpus_node, del_originals=del_originals)
return log_msg return log_msg
...@@ -18,30 +18,30 @@ class MultivacParser(Parser): ...@@ -18,30 +18,30 @@ class MultivacParser(Parser):
''' '''
contents = filebuf.read().decode("UTF-8") contents = filebuf.read().decode("UTF-8")
data = json.loads(contents) data = json.loads(contents)
filebuf.close() filebuf.close()
json_docs = data json_docs = data
hyperdata_list = [] hyperdata_list = []
hyperdata_path = { "id" : "id" hyperdata_path = { "id" : "id"
, "title" : "title" , "title" : "title"
, "abstract" : "abstract" , "abstract" : "abstract"
, "type" : "type" , "type" : "type"
} }
for json_doc in json_docs: for json_doc in json_docs:
hyperdata = {} hyperdata = {}
doc = json_doc["_source"] doc = json_doc["_source"]
for key, path in hyperdata_path.items(): for key, path in hyperdata_path.items():
hyperdata[key] = doc.get(path, "") hyperdata[key] = doc.get(path, "")
hyperdata["source"] = doc.get("serial" , {})\ hyperdata["source"] = doc.get("serial" , {})\
.get("journaltitle", "REPEC Database") .get("journaltitle", "REPEC Database")
try: try:
hyperdata["url"] = doc.get("file", {})\ hyperdata["url"] = doc.get("file", {})\
.get("url" , "") .get("url" , "")
...@@ -51,15 +51,15 @@ class MultivacParser(Parser): ...@@ -51,15 +51,15 @@ class MultivacParser(Parser):
hyperdata["authors"] = ", ".join( hyperdata["authors"] = ", ".join(
[ p.get("person", {}) [ p.get("person", {})
.get("name" , "") .get("name" , "")
for p in doc.get("hasauthor", []) for p in doc.get("hasauthor", [])
] ]
) )
year = doc.get("serial" , {})\ year = doc.get("serial" , {})\
.get("issuedate", None) .get("issuedate", None)
if year == "Invalide date": if year == "Invalide date":
year = doc.get("issuedate" , None) year = doc.get("issuedate" , None)
...@@ -73,10 +73,7 @@ class MultivacParser(Parser): ...@@ -73,10 +73,7 @@ class MultivacParser(Parser):
date = datetime.now() date = datetime.now()
hyperdata["publication_date"] = date hyperdata["publication_date"] = date
hyperdata["publication_year"] = str(date.year)
hyperdata["publication_month"] = str(date.month)
hyperdata["publication_day"] = str(date.day)
hyperdata_list.append(hyperdata) hyperdata_list.append(hyperdata)
return hyperdata_list return hyperdata_list
import datetime
import dateutil.parser import dateutil.parser
import zipfile import zipfile
import re import re
import dateparser as date_parser import dateparser as date_parser
from gargantext.util.languages import languages from gargantext.util.languages import languages
from gargantext.util import datetime, convert_to_datetime, MINYEAR
DEFAULT_DATE = datetime.datetime(datetime.MINYEAR, 1, 1) DEFAULT_DATE = datetime(MINYEAR, 1, 1)
class Parser: class Parser:
...@@ -34,29 +34,29 @@ class Parser: ...@@ -34,29 +34,29 @@ class Parser:
def format_hyperdata_dates(self, hyperdata): def format_hyperdata_dates(self, hyperdata):
"""Format the dates found in the hyperdata. """Format the dates found in the hyperdata.
Examples: Examples:
{"publication_date": "2014-10-23 09:57:42"} {"publication_date": "2014-10-23 09:57:42+00:00"}
-> {"publication_date": "2014-10-23 09:57:42", "publication_year": "2014", ...} -> {"publication_date": "2014-10-23 09:57:42+00:00", "publication_year": "2014", ...}
{"publication_year": "2014"} {"publication_year": "2014"}
-> {"publication_date": "2014-01-01 00:00:00", "publication_year": "2014", ...} -> {"publication_date": "2014-01-01 00:00:00+00:00", "publication_year": "2014", ...}
""" """
# First, check the split dates... # First, check the split dates...
# This part mainly deal with Zotero data but can be usefull for others # This part mainly deal with Zotero data but can be usefull for others
# parts # parts
date_string = hyperdata.get('publication_date_to_parse', None) date_string = hyperdata.get('publication_date_to_parse')
if date_string is not None: if date_string is not None:
date_string = re.sub(r'\/\/+(\w*|\d*)', '', date_string) date_string = re.sub(r'\/\/+(\w*|\d*)', '', date_string)
try: try:
hyperdata['publication' + "_date"] = dateutil.parser.parse( hyperdata['publication_date'] = dateutil.parser.parse(
date_string, date_string,
default=DEFAULT_DATE default=DEFAULT_DATE
).strftime("%Y-%m-%d %H:%M:%S") )
except Exception as error: except Exception as error:
print(error, 'Date not parsed for:', date_string) print(error, 'Date not parsed for:', date_string)
hyperdata['publication_date'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") hyperdata['publication_date'] = datetime.now()
elif hyperdata.get('publication_year', None) is not None: elif hyperdata.get('publication_year') is not None:
prefixes = [key[:-5] for key in hyperdata.keys() if key[-5:] == "_year"] prefixes = [key[:-5] for key in hyperdata.keys() if key[-5:] == "_year"]
# eg prefixes : ['publication'] # eg prefixes : ['publication']
...@@ -64,56 +64,45 @@ class Parser: ...@@ -64,56 +64,45 @@ class Parser:
for prefix in prefixes: for prefix in prefixes:
date_string = hyperdata[prefix + "_year"] date_string = hyperdata[prefix + "_year"]
# FIXME: except for year is it necessary to test that key exists for part in ('month', 'day', 'hour', 'minute', 'second'):
# when we have a default value in .get(key, "01") ?? key = prefix + '_' + part
key = prefix + "_month" if key not in hyperdata:
if key in hyperdata: break
date_string += " " + hyperdata.get(key, "01")
key = prefix + "_day" sep = ":" if key in ('minute', 'second') else " "
if key in hyperdata: date_string += sep + hyperdata.get(key, '01')
date_string += " " + hyperdata.get(key, "01")
key = prefix + "_hour"
if key in hyperdata:
date_string += " " + hyperdata.get(key, "01")
key = prefix + "_minute"
if key in hyperdata:
date_string += ":" + hyperdata.get(key, "01")
key = prefix + "_second"
if key in hyperdata:
date_string += ":" + hyperdata.get(key, "01")
try: try:
hyperdata[prefix + "_date"] = dateutil.parser.parse(date_string).strftime("%Y-%m-%d %H:%M:%S") hyperdata[prefix + "_date"] = dateutil.parser.parse(date_string)
except Exception as error: except Exception as error:
try: try:
print("_Parser: error in full date parse", error, date_string) print("_Parser: error in full date parse", error, date_string)
# Date format: 1994 NOV-DEC # Date format: 1994 NOV-DEC
hyperdata[prefix + "_date"] = date_parser.parse(str(date_string)[:8]).strftime("%Y-%m-%d %H:%M:%S") hyperdata[prefix + "_date"] = date_parser.parse(str(date_string)[:8])
except Exception as error: except Exception as error:
try: try:
print("_Parser: error in short date parse", error) print("_Parser: error in short date parse", error)
# FIXME Date format: 1994 SPR # FIXME Date format: 1994 SPR
# By default, we take the year only # By default, we take the year only
hyperdata[prefix + "_date"] = date_parser.parse(str(date_string)[:4]).strftime("%Y-%m-%d %H:%M:%S") hyperdata[prefix + "_date"] = date_parser.parse(str(date_string)[:4])
except Exception as error: except Exception as error:
print("_Parser:", error) print("_Parser:", error)
else: else:
print("WARNING: Date unknown at _Parser level, using now()") print("WARNING: Date unknown at _Parser level, using now()")
hyperdata['publication_date'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") hyperdata['publication_date'] = datetime.now()
# ...then parse all the "date" fields, to parse it into separate elements # ...then parse all the "date" fields, to parse it into separate elements
prefixes = [key[:-5] for key in hyperdata.keys() if key[-5:] == "_date"] prefixes = [key[:-5] for key in hyperdata.keys() if key[-5:] == "_date"]
for prefix in prefixes: for prefix in prefixes:
date = dateutil.parser.parse(hyperdata[prefix + "_date"]) name = prefix + "_date"
#print(date) date = hyperdata[name]
hyperdata[name] = str(convert_to_datetime(date))
hyperdata[prefix + "_year"] = date.strftime("%Y")
hyperdata[prefix + "_month"] = date.strftime("%m") for part in ('year', 'month', 'day', 'hour', 'minute', 'second'):
hyperdata[prefix + "_day"] = date.strftime("%d") hyperdata[prefix + '_' + part] = getattr(date, part)
hyperdata[prefix + "_hour"] = date.strftime("%H")
hyperdata[prefix + "_minute"] = date.strftime("%M")
hyperdata[prefix + "_second"] = date.strftime("%S")
# print("line 116", hyperdata['publication_date']) # print("line 116", hyperdata['publication_date'])
# finally, return the transformed result! # finally, return the transformed result!
return hyperdata return hyperdata
......
# Make this a standalone script... # Make this a standalone script...
# Can be called this way: python3 gargantext/tools/show_nodes.py # Can be called this way: python3 gargantext/util/show_nodes.py
import os import os
import django import django
......
...@@ -43,8 +43,7 @@ def _nodes_hyperdata_generator(corpus): ...@@ -43,8 +43,7 @@ def _nodes_hyperdata_generator(corpus):
key['id'], key['id'],
None, None,
None, None,
value.strftime("%Y-%m-%d %H:%M:%S"), str(value),
# FIXME check timestamp +%Z
None, None,
None, None,
) )
......
...@@ -9,7 +9,6 @@ from gargantext.util.db import get_engine ...@@ -9,7 +9,6 @@ from gargantext.util.db import get_engine
from gargantext.util.db_cache import cache from gargantext.util.db_cache import cache
from gargantext.constants import DEFAULT_COOC_THRESHOLD, NODETYPES from gargantext.constants import DEFAULT_COOC_THRESHOLD, NODETYPES
from gargantext.constants import INDEXED_HYPERDATA from gargantext.constants import INDEXED_HYPERDATA
from gargantext.util.tools import datetime, convert_to_date
def compute_coocs( corpus, def compute_coocs( corpus,
overwrite_id = None, overwrite_id = None,
...@@ -95,7 +94,7 @@ def compute_coocs( corpus, ...@@ -95,7 +94,7 @@ def compute_coocs( corpus,
# 2b) stating the filters # 2b) stating the filters
cooc_filter_sql = """ cooc_filter_sql = """
WHERE WHERE
n.typename = {nodetype_id} n.typename = {nodetype_id}
AND n.parent_id = {corpus_id} AND n.parent_id = {corpus_id}
GROUP BY 1,2 GROUP BY 1,2
...@@ -105,7 +104,7 @@ def compute_coocs( corpus, ...@@ -105,7 +104,7 @@ def compute_coocs( corpus,
""".format( nodetype_id = NODETYPES.index('DOCUMENT') """.format( nodetype_id = NODETYPES.index('DOCUMENT')
, corpus_id=corpus.id , corpus_id=corpus.id
) )
# 3) taking the cooccurrences of ngram x2 # 3) taking the cooccurrences of ngram x2
ngram_filter_A_sql += """ ngram_filter_A_sql += """
-- STEP 1: X axis of the matrix -- STEP 1: X axis of the matrix
...@@ -162,25 +161,25 @@ def compute_coocs( corpus, ...@@ -162,25 +161,25 @@ def compute_coocs( corpus,
# 4) prepare the synonyms # 4) prepare the synonyms
if groupings_id: if groupings_id:
ngram_filter_A_sql += """ ngram_filter_A_sql += """
LEFT JOIN nodes_ngrams_ngrams LEFT JOIN nodes_ngrams_ngrams
AS grA ON wlA.ngram_id = grA.ngram1_id AS grA ON wlA.ngram_id = grA.ngram1_id
AND grA.node_id = {groupings_id} AND grA.node_id = {groupings_id}
-- \--> adding (joining) ngrams that are grouped -- \--> adding (joining) ngrams that are grouped
LEFT JOIN nodes_ngrams LEFT JOIN nodes_ngrams
AS wlAA ON grA.ngram2_id = wlAA.ngram_id AS wlAA ON grA.ngram2_id = wlAA.ngram_id
AND wlAA.node_id = wlA.node_id AND wlAA.node_id = wlA.node_id
-- \--> adding (joining) ngrams that are not grouped -- \--> adding (joining) ngrams that are not grouped
--LEFT JOIN ngrams AS wlAA ON grA.ngram2_id = wlAA.id --LEFT JOIN ngrams AS wlAA ON grA.ngram2_id = wlAA.id
-- \--> for joining all synonyms even if they are not in the main list (white list) -- \--> for joining all synonyms even if they are not in the main list (white list)
""".format(groupings_id = groupings_id) """.format(groupings_id = groupings_id)
ngram_filter_B_sql += """ ngram_filter_B_sql += """
LEFT JOIN nodes_ngrams_ngrams LEFT JOIN nodes_ngrams_ngrams
AS grB ON wlB.ngram_id = grB.ngram1_id AS grB ON wlB.ngram_id = grB.ngram1_id
AND grB.node_id = {groupings_id} AND grB.node_id = {groupings_id}
-- \--> adding (joining) ngrams that are grouped -- \--> adding (joining) ngrams that are grouped
LEFT JOIN nodes_ngrams LEFT JOIN nodes_ngrams
AS wlBB ON grB.ngram2_id = wlBB.ngram_id AS wlBB ON grB.ngram2_id = wlBB.ngram_id
AND wlBB.node_id = wlB.node_id AND wlBB.node_id = wlB.node_id
-- \--> adding (joining) ngrams that are not grouped -- \--> adding (joining) ngrams that are not grouped
......
import os
from gargantext.settings import MEDIA_ROOT
import datetime
import dateutil
def convert_to_date(date):
if isinstance(date, (int, float)):
return datetime.datetime.timestamp(date)
else:
return dateutil.parser.parse(date)
def ensure_dir(user):
'''
If user is new, folder does not exist yet, create it then
'''
dirpath = '%s/corpora/%s' % (MEDIA_ROOT, user.username)
if not os.path.exists(dirpath):
print("Creating folder %s" % dirpath)
os.makedirs(dirpath)
...@@ -90,10 +90,11 @@ class CSVLists(APIView): ...@@ -90,10 +90,11 @@ class CSVLists(APIView):
# import the csv # import the csv
# try: # try:
log_msg = "Async generation" log_msg = "Async generation"
corpus_node_id = corpus_node.id corpus_node_id = corpus_node.id
scheduled(import_and_merge_ngramlists)(csv_contents, corpus_node_id) scheduled(import_and_merge_ngramlists)(csv_contents, corpus_node_id,
overwrite=bool(params.get('overwrite')))
return JsonHttpResponse({ return JsonHttpResponse({
'log': log_msg, 'log': log_msg,
}, 200) }, 200)
...@@ -153,7 +154,8 @@ class CSVLists(APIView): ...@@ -153,7 +154,8 @@ class CSVLists(APIView):
# attempt to merge and send response # attempt to merge and send response
try: try:
# merge the source_lists onto those of the target corpus # merge the source_lists onto those of the target corpus
log_msg = merge_ngramlists(source_lists, onto_corpus=corpus_node) delete = todo_lists if bool(params.get('overwrite')) else []
log_msg = merge_ngramlists(source_lists, onto_corpus=corpus_node, del_originals=delete)
return JsonHttpResponse({ return JsonHttpResponse({
'log': log_msg, 'log': log_msg,
}, 200) }, 200)
......
...@@ -250,6 +250,23 @@ em { ...@@ -250,6 +250,23 @@ em {
<br/> <br/>
<div class="checkbox">
<label>
<input type="checkbox" id="importoverwrite"> Overwrite old lists
<script>
function updateSubmitLabel() {
$('#importsubmit').val($(this).is(':checked') ? 'Overwrite current table' : 'Import and merge with current table');
}
$(function() {
updateSubmitLabel.call($('#importoverwrite'));
$('#importoverwrite').change(updateSubmitLabel);
});
</script>
</label>
</div>
<br/>
<input type="submit" class="btn btn-xs btn-info" id="importsubmit" value="Import and merge with current table" /> <input type="submit" class="btn btn-xs btn-info" id="importsubmit" value="Import and merge with current table" />
</form> </form>
</div> </div>
...@@ -372,6 +389,8 @@ function listmergeUpdate(aFormData){ ...@@ -372,6 +389,8 @@ function listmergeUpdate(aFormData){
// all params are added in the url like a GET // all params are added in the url like a GET
theUrl += "&from_corpus="+sourceCorpusId theUrl += "&from_corpus="+sourceCorpusId
theUrl += "&todo="+todoLists.join(',') theUrl += "&todo="+todoLists.join(',')
if ($('#importoverwrite').is(':checked'))
theUrl += "&overwrite=1"
// result url looks like this : /api/ngramlists/import?onto_corpus=2&from=13308&todo=map,stop // result url looks like this : /api/ngramlists/import?onto_corpus=2&from=13308&todo=map,stop
// console.log(theUrl) // console.log(theUrl)
...@@ -424,7 +443,7 @@ function listmergeCsvPost(theFile){ ...@@ -424,7 +443,7 @@ function listmergeCsvPost(theFile){
//postCorpusFile //postCorpusFile
$.ajax({ $.ajax({
url: "{{importroute | safe}}", url: "{{importroute | safe}}" + ($('#importoverwrite').is(':checked') ? '&overwrite=1' : ''),
type: 'POST', type: 'POST',
async: true, async: true,
contentType: false, contentType: false,
...@@ -436,11 +455,11 @@ function listmergeCsvPost(theFile){ ...@@ -436,11 +455,11 @@ function listmergeCsvPost(theFile){
success: function(response) { success: function(response) {
my_html = '<h3 style="color:green">File upload, you will receive a notification email</h3>' my_html = '<h3 style="color:green">File upload, you will receive a notification email</h3>'
my_html += "<p class='note'>" + response['log'].replace(/\n/g, '<br/>') + "</p>" my_html += "<p class='note'>" + response['log'].replace(/\n/g, '<br/>') + "</p>"
my_html += "<p'>(this page will reload in 3s)</p>" my_html += "<p'>(this page will reload in 30s)</p>"
$('#formanswer').html(my_html); $('#formanswer').html(my_html);
console.log(response) ; console.log(response) ;
// reload after 3s // reload after 3s
setTimeout("location.reload(true)", 3000); setTimeout("location.reload(true)", 30000);
}, },
error: function(result, t) { error: function(result, t) {
if (t != 'timeout') { if (t != 'timeout') {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment