Commit 6622956b authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'testing' into stable

parents cde03f55 2bf705d9
......@@ -18,7 +18,8 @@ from gargantext import settings, models
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config
config.set_main_option("sqlalchemy.url", settings.DATABASES['default']['URL'])
# Interpret the config file for Python logging.
# This line sets up loggers basically.
......@@ -10,7 +10,7 @@ from sqlalchemy import delete
def get_engine():
from sqlalchemy import create_engine
return create_engine( settings.DATABASES['default']['URL']
return create_engine( settings.DATABASES['default']['SECRET_URL']
, use_native_hstore = True
, json_serializer = json_dumps
, pool_size=20, max_overflow=0
......@@ -16,7 +16,8 @@ class CSVParser(Parser):
def parse(self, fp=None):
fp = fp or self._file
df = pandas.read_csv(fp, dtype=object, skip_blank_lines=True, sep=None,
df = pandas.read_csv(fp, dtype=object, engine='python',
skip_blank_lines=True, sep=None,
na_values=[], keep_default_na=False)
# Return a generator of dictionaries with column labels as keys,
import re
from .RIS import RISParser
......@@ -17,3 +19,34 @@ class ISIParser(RISParser):
"AB": {"type": "hyperdata", "key": "abstract", "separator": " "},
"WC": {"type": "hyperdata", "key": "fields"},
_year = re.compile(r'\b\d{4}\b')
_season = re.compile(r'\b(SPR|SUM|FAL|WIN)\b', re.I)
_month_interval = re.compile(r'\b([A-Z]{3})-([A-Z]{3})\b', re.I)
_day_interval = re.compile(r'\b(\d{1,2})-(\d{1,2})\b')
def _preprocess_PD(self, PD, PY):
# Add a year to date if applicable
if PY and and not
PD = PY + " " + PD
# Drop season if any
PD = self._season.sub('', PD).strip()
# If a month interval is present, keep only the first month
PD = self._month_interval.sub(r'\1', PD)
# If a day interval is present, keep only the first day
PD = self._day_interval.sub(r'\1', PD)
return PD
def parse(self, file):
PD = self._parameters["PD"]["key"]
PY = self._parameters["PY"]["key"]
for entry in super().parse(file):
if PD in entry:
entry[PD] = self._preprocess_PD(entry[PD], entry[PY])
yield entry
......@@ -86,6 +86,9 @@ class Parser:
print("WARNING: Date unknown at _Parser level, using now()")
hyperdata['publication_date'] =
# XXX Handling prefixes is most likely useless: there seem to be only
# one prefix which is "publication" (like in "publication_date").
# ...then parse all the "date" fields, to parse it into separate elements
prefixes = [key[:-5] for key in hyperdata.keys() if key[-5:] == "_date"]
for prefix in prefixes:
......@@ -30,7 +30,7 @@
<a class="btn btn-success btn-lg" target="blank" href="" title="Fill the form to sign up">
<a class="btn btn-success btn-lg" target="blank" href="" title="Fill the form to sign up">
<span class="glyphicon glyphicon-hand-right" aria-hidden="true"></span>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment