Commit 54d85ec6 authored by PkSM3's avatar PkSM3

Merge branch 'unstable' of ssh://delanoe.org:1979/gargantext into samuel

parents 6b29957f cdac66e2
import os, sys
import linecache
from gargantext_web.settings import MEDIA_ROOT
def ensure_dir(user):
'''
If user is new, folder does not exist yet, create it then
'''
dirpath = '%s/corpora/%s' % (MEDIA_ROOT, user.username)
if not os.path.exists(dirpath):
print("Creating folder %s" % dirpath)
os.makedirs(dirpath)
def PrintException():
'''
This function has to be used in except part to print error with:
- the file
- the line number
- an explicit error araising
'''
exc_type, exc_obj, tb = sys.exc_info()
f = tb.tb_frame
lineno = tb.tb_lineno
filename = f.f_code.co_filename
linecache.checkcache(filename)
line = linecache.getline(filename, lineno, f.f_globals)
print('EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj))
...@@ -75,7 +75,9 @@ ALLOWED_HOSTS = ['localhost', ...@@ -75,7 +75,9 @@ ALLOWED_HOSTS = ['localhost',
'stable.gargantext.org', 'stable.gargantext.org',
'dev.gargantext.org', 'dev.gargantext.org',
'iscpif.gargantext.org', 'iscpif.gargantext.org',
'gargantext.iscpif.fr',
'mines.gargantext.org', 'mines.gargantext.org',
'pasteur.gargantext.org',
'beta.gargantext.org', 'beta.gargantext.org',
'garg-dev.iscpif.fr', 'garg-dev.iscpif.fr',
'garg-stable.iscpif.fr', 'garg-stable.iscpif.fr',
......
...@@ -26,6 +26,9 @@ from parsing.corpustools import add_resource, parse_resources, extract_ngrams, c ...@@ -26,6 +26,9 @@ from parsing.corpustools import add_resource, parse_resources, extract_ngrams, c
from gargantext_web.celery import apply_workflow from gargantext_web.celery import apply_workflow
from admin.utils import ensure_dir
def project(request, project_id): def project(request, project_id):
# do we have a valid project id? # do we have a valid project id?
...@@ -140,11 +143,7 @@ def project(request, project_id): ...@@ -140,11 +143,7 @@ def project(request, project_id):
session.commit() session.commit()
# If user is new, folder does not exist yet, create it then # If user is new, folder does not exist yet, create it then
dirpath = '%s/corpora/%s' % (MEDIA_ROOT, request.user.username) ensure_dir(request.user)
if not os.path.exists(dirpath):
print("Creating folder %s" % dirpath)
os.makedirs(dirpath)
# Save the uploaded file # Save the uploaded file
filepath = '%s/corpora/%s/%s' % (MEDIA_ROOT, request.user.username, thefile._name) filepath = '%s/corpora/%s/%s' % (MEDIA_ROOT, request.user.username, thefile._name)
......
...@@ -4,14 +4,21 @@ from .RisFileParser import RisFileParser ...@@ -4,14 +4,21 @@ from .RisFileParser import RisFileParser
class IsiFileParser(RisFileParser): class IsiFileParser(RisFileParser):
_parameters = { def __init__(self):
b"ER": {"type": "delimiter"},
b"TI": {"type": "metadata", "key": "title", "separator": " "}, super(RisFileParser, self).__init__()
b"AU": {"type": "metadata", "key": "authors", "separator": ", "},
b"DI": {"type": "metadata", "key": "doi"}, self._begin = 3
b"PY": {"type": "metadata", "key": "publication_year"},
b"PD": {"type": "metadata", "key": "publication_month"}, self._parameters = {
b"LA": {"type": "metadata", "key": "language_fullname"}, b"ER": {"type": "delimiter"},
b"AB": {"type": "metadata", "key": "abstract", "separator": " "}, b"TI": {"type": "metadata", "key": "title", "separator": " "},
b"WC": {"type": "metadata", "key": "fields"}, b"AU": {"type": "metadata", "key": "authors", "separator": ", "},
} b"DI": {"type": "metadata", "key": "doi"},
b"PY": {"type": "metadata", "key": "publication_year"},
b"PD": {"type": "metadata", "key": "publication_month"},
b"LA": {"type": "metadata", "key": "language_fullname"},
b"AB": {"type": "metadata", "key": "abstract", "separator": " "},
b"WC": {"type": "metadata", "key": "fields"},
}
...@@ -2,17 +2,22 @@ from .RisFileParser import RisFileParser ...@@ -2,17 +2,22 @@ from .RisFileParser import RisFileParser
class JstorFileParser(RisFileParser): class JstorFileParser(RisFileParser):
def __init__(self):
super(RisFileParser, self).__init__()
self._begin = 3
_parameters = { self._parameters = {
b"ER": {"type": "delimiter"}, b"ER": {"type": "delimiter"},
b"TI": {"type": "metadata", "key": "title", "separator": " "}, b"TI": {"type": "metadata", "key": "title", "separator": " "},
b"AU": {"type": "metadata", "key": "authors", "separator": ", "}, b"AU": {"type": "metadata", "key": "authors", "separator": ", "},
b"UR": {"type": "metadata", "key": "doi"}, b"UR": {"type": "metadata", "key": "doi"},
b"Y1": {"type": "metadata", "key": "publication_year"}, b"Y1": {"type": "metadata", "key": "publication_year"},
b"PD": {"type": "metadata", "key": "publication_month"}, b"PD": {"type": "metadata", "key": "publication_month"},
b"LA": {"type": "metadata", "key": "language_iso2"}, b"LA": {"type": "metadata", "key": "language_iso2"},
b"AB": {"type": "metadata", "key": "abstract", "separator": " "}, b"AB": {"type": "metadata", "key": "abstract", "separator": " "},
b"WC": {"type": "metadata", "key": "fields"}, b"WC": {"type": "metadata", "key": "fields"},
} }
from django.db import transaction from django.db import transaction
from .FileParser import FileParser from .FileParser import FileParser
from ..Caches import LanguagesCache
class RisFileParser(FileParser): class RisFileParser(FileParser):
_parameters = { def __init__(self, language_cache=None):
b"ER": {"type": "delimiter"},
b"TI": {"type": "metadata", "key": "title", "separator": " "}, super(FileParser, self).__init__()
b"AU": {"type": "metadata", "key": "authors", "separator": ", "}, self._languages_cache = LanguagesCache() if language_cache is None else language_cache
b"UR": {"type": "metadata", "key": "doi"},
b"PY": {"type": "metadata", "key": "publication_year"}, self._begin = 6
b"PD": {"type": "metadata", "key": "publication_month"},
b"LA": {"type": "metadata", "key": "language_iso2"}, self._parameters = {
b"AB": {"type": "metadata", "key": "abstract", "separator": " "}, b"ER": {"type": "delimiter"},
b"WC": {"type": "metadata", "key": "fields"}, b"TI": {"type": "metadata", "key": "title", "separator": " "},
} b"ST": {"type": "metadata", "key": "subtitle", "separator": " "},
b"AU": {"type": "metadata", "key": "authors", "separator": ", "},
b"UR": {"type": "metadata", "key": "doi"},
b"PY": {"type": "metadata", "key": "publication_year"},
b"PD": {"type": "metadata", "key": "publication_month"},
b"LA": {"type": "metadata", "key": "language_iso2"},
b"AB": {"type": "metadata", "key": "abstract", "separator": " "},
b"WC": {"type": "metadata", "key": "fields"},
}
def _parse(self, file): def _parse(self, file):
metadata = {} metadata = {}
...@@ -42,9 +52,10 @@ class RisFileParser(FileParser): ...@@ -42,9 +52,10 @@ class RisFileParser(FileParser):
last_key = parameter_key last_key = parameter_key
last_values = [] last_values = []
try: try:
last_values.append(line[3:-1].decode()) last_values.append(line[self._begin:-1].decode())
except Exception as error: except Exception as error:
print(error) print(error)
# if a metadata object is left in memory, yield it as well # if a metadata object is left in memory, yield it as well
if metadata: if metadata:
#print(metadata['title'])
yield metadata yield metadata
...@@ -4,7 +4,7 @@ parsers = { ...@@ -4,7 +4,7 @@ parsers = {
'Pubmed (xml format)' : PubmedFileParser, 'Pubmed (xml format)' : PubmedFileParser,
'Web of Science (ISI format)' : IsiFileParser, 'Web of Science (ISI format)' : IsiFileParser,
'Scopus (RIS format)' : RisFileParser, 'Scopus (RIS format)' : RisFileParser,
'Zotero (RIS format)' : RisFileParser, 'Zotero (RIS format)' : JstorFileParser,
'Jstor (RIS format)' : JstorFileParser, 'Jstor (RIS format)' : JstorFileParser,
#'Europress' : EuropressFileParser, #'Europress' : EuropressFileParser,
'Europress (French)' : EuropressFileParser, 'Europress (French)' : EuropressFileParser,
......
...@@ -37,6 +37,8 @@ from parsing.corpustools import add_resource, parse_resources, extract_ngrams, c ...@@ -37,6 +37,8 @@ from parsing.corpustools import add_resource, parse_resources, extract_ngrams, c
from gargantext_web.celery import apply_workflow from gargantext_web.celery import apply_workflow
from time import sleep from time import sleep
from admin.utils import ensure_dir
def getGlobalStats(request ): def getGlobalStats(request ):
print(request.method) print(request.method)
alist = ["bar","foo"] alist = ["bar","foo"]
...@@ -141,7 +143,9 @@ def doTheQuery(request , project_id): ...@@ -141,7 +143,9 @@ def doTheQuery(request , project_id):
# """ # """
ensure_dir(request.user)
tasks = MedlineFetcher() tasks = MedlineFetcher()
for i in range(8): for i in range(8):
t = threading.Thread(target=tasks.worker2) #thing to do t = threading.Thread(target=tasks.worker2) #thing to do
t.daemon = True # thread dies when main thread (only non-daemon thread) exits. t.daemon = True # thread dies when main thread (only non-daemon thread) exits.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment