Commit 2bf705d9 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge remote-tracking branch 'origin/simon-dev' into dev

parents b25deb7b fbfb5d60
...@@ -18,7 +18,8 @@ from gargantext import settings, models ...@@ -18,7 +18,8 @@ from gargantext import settings, models
# this is the Alembic Config object, which provides # this is the Alembic Config object, which provides
# access to the values within the .ini file in use. # access to the values within the .ini file in use.
config = context.config config = context.config
config.set_main_option("sqlalchemy.url", settings.DATABASES['default']['URL']) config.set_main_option("sqlalchemy.url",
settings.DATABASES['default']['SECRET_URL'])
# Interpret the config file for Python logging. # Interpret the config file for Python logging.
# This line sets up loggers basically. # This line sets up loggers basically.
......
...@@ -10,7 +10,7 @@ from sqlalchemy import delete ...@@ -10,7 +10,7 @@ from sqlalchemy import delete
def get_engine(): def get_engine():
from sqlalchemy import create_engine from sqlalchemy import create_engine
return create_engine( settings.DATABASES['default']['URL'] return create_engine( settings.DATABASES['default']['SECRET_URL']
, use_native_hstore = True , use_native_hstore = True
, json_serializer = json_dumps , json_serializer = json_dumps
, pool_size=20, max_overflow=0 , pool_size=20, max_overflow=0
......
...@@ -16,7 +16,8 @@ class CSVParser(Parser): ...@@ -16,7 +16,8 @@ class CSVParser(Parser):
def parse(self, fp=None): def parse(self, fp=None):
fp = fp or self._file fp = fp or self._file
df = pandas.read_csv(fp, dtype=object, skip_blank_lines=True, sep=None, df = pandas.read_csv(fp, dtype=object, engine='python',
skip_blank_lines=True, sep=None,
na_values=[], keep_default_na=False) na_values=[], keep_default_na=False)
# Return a generator of dictionaries with column labels as keys, # Return a generator of dictionaries with column labels as keys,
......
import re
from .RIS import RISParser from .RIS import RISParser
...@@ -17,3 +19,34 @@ class ISIParser(RISParser): ...@@ -17,3 +19,34 @@ class ISIParser(RISParser):
"AB": {"type": "hyperdata", "key": "abstract", "separator": " "}, "AB": {"type": "hyperdata", "key": "abstract", "separator": " "},
"WC": {"type": "hyperdata", "key": "fields"}, "WC": {"type": "hyperdata", "key": "fields"},
} }
_year = re.compile(r'\b\d{4}\b')
_season = re.compile(r'\b(SPR|SUM|FAL|WIN)\b', re.I)
_month_interval = re.compile(r'\b([A-Z]{3})-([A-Z]{3})\b', re.I)
_day_interval = re.compile(r'\b(\d{1,2})-(\d{1,2})\b')
def _preprocess_PD(self, PD, PY):
# Add a year to date if applicable
if PY and self._year.search(PY) and not self._year.search(PD):
PD = PY + " " + PD
# Drop season if any
PD = self._season.sub('', PD).strip()
# If a month interval is present, keep only the first month
PD = self._month_interval.sub(r'\1', PD)
# If a day interval is present, keep only the first day
PD = self._day_interval.sub(r'\1', PD)
return PD
def parse(self, file):
PD = self._parameters["PD"]["key"]
PY = self._parameters["PY"]["key"]
for entry in super().parse(file):
if PD in entry:
entry[PD] = self._preprocess_PD(entry[PD], entry[PY])
yield entry
...@@ -86,6 +86,9 @@ class Parser: ...@@ -86,6 +86,9 @@ class Parser:
print("WARNING: Date unknown at _Parser level, using now()") print("WARNING: Date unknown at _Parser level, using now()")
hyperdata['publication_date'] = datetime.now() hyperdata['publication_date'] = datetime.now()
# XXX Handling prefixes is most likely useless: there seem to be only
# one prefix which is "publication" (like in "publication_date").
# ...then parse all the "date" fields, to parse it into separate elements # ...then parse all the "date" fields, to parse it into separate elements
prefixes = [key[:-5] for key in hyperdata.keys() if key[-5:] == "_date"] prefixes = [key[:-5] for key in hyperdata.keys() if key[-5:] == "_date"]
for prefix in prefixes: for prefix in prefixes:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment