Commit 6ec787fe authored by Administrator's avatar Administrator

[TYPOS]

parent 2c8ae069
...@@ -149,7 +149,7 @@ class ModelCache(dict): ...@@ -149,7 +149,7 @@ class ModelCache(dict):
key = getattr(element, column_name) key = getattr(element, column_name)
self[key] = element self[key] = element
class Cache: class Cache():
def __getattr__(self, key): def __getattr__(self, key):
try: try:
......
...@@ -81,7 +81,7 @@ def project(request, project_id): ...@@ -81,7 +81,7 @@ def project(request, project_id):
# deal with the form # deal with the form
if request.method == 'POST': if request.method == 'POST':
# fomr validation # form validation
form = CustomForm(request.POST, request.FILES) form = CustomForm(request.POST, request.FILES)
if form.is_valid(): if form.is_valid():
# extract information from the form # extract information from the form
...@@ -89,6 +89,7 @@ def project(request, project_id): ...@@ -89,6 +89,7 @@ def project(request, project_id):
thefile = form.cleaned_data['file'] thefile = form.cleaned_data['file']
print("thetype:",form.cleaned_data['type']) # <-- e.g: im receiving "isi" print("thetype:",form.cleaned_data['type']) # <-- e.g: im receiving "isi"
resourcetype = cache.ResourceType[form.cleaned_data['type']] # e.g: here it converts to "pubmed" idk why resourcetype = cache.ResourceType[form.cleaned_data['type']] # e.g: here it converts to "pubmed" idk why
print("resourcetype:", resourcetype)
# which default language shall be used? # which default language shall be used?
if resourcetype.name == "europress_french": if resourcetype.name == "europress_french":
language_id = cache.Language['fr'].id language_id = cache.Language['fr'].id
......
...@@ -94,10 +94,10 @@ class EuropressFileParser(FileParser): ...@@ -94,10 +94,10 @@ class EuropressFileParser(FileParser):
format_journal = re.compile('(.*), (.*)', re.UNICODE) format_journal = re.compile('(.*), (.*)', re.UNICODE)
test_journal = format_journal.match(name.text) test_journal = format_journal.match(name.text)
if test_journal is not None: if test_journal is not None:
metadata['source'] = test_journal.group(1) metadata['journal'] = test_journal.group(1)
metadata['volume'] = test_journal.group(2) metadata['volume'] = test_journal.group(2)
else: else:
metadata['source'] = name.text.encode(codif) metadata['journal'] = name.text.encode(codif)
for header in html_article.xpath(header_xpath): for header in html_article.xpath(header_xpath):
try: try:
...@@ -169,7 +169,7 @@ class EuropressFileParser(FileParser): ...@@ -169,7 +169,7 @@ class EuropressFileParser(FileParser):
metadata['page'] = test_page.group(1).encode(codif) metadata['page'] = test_page.group(1).encode(codif)
metadata['title'] = html_article.xpath(title_xpath).encode(codif) metadata['title'] = html_article.xpath(title_xpath).encode(codif)
metadata['text'] = html_article.xpath(text_xpath) metadata['abstract'] = html_article.xpath(text_xpath)
line = 0 line = 0
br_tag = 10 br_tag = 10
...@@ -215,25 +215,25 @@ class EuropressFileParser(FileParser): ...@@ -215,25 +215,25 @@ class EuropressFileParser(FileParser):
metadata['publication_year'] = metadata['publication_date'].strftime('%Y') metadata['publication_year'] = metadata['publication_date'].strftime('%Y')
metadata['publication_month'] = metadata['publication_date'].strftime('%m') metadata['publication_month'] = metadata['publication_date'].strftime('%m')
metadata['publication_day'] = metadata['publication_date'].strftime('%d') metadata['publication_day'] = metadata['publication_date'].strftime('%d')
metadata['publication_date'] = "" metadata.pop('publication_date')
if len(metadata['text'])>0 and format_europresse == 50: if len(metadata['abstract'])>0 and format_europresse == 50:
metadata['doi'] = str(metadata['text'][-9]) metadata['doi'] = str(metadata['abstract'][-9])
metadata['text'].pop() metadata['abstract'].pop()
# Here add separator for paragraphs # Here add separator for paragraphs
metadata['text'] = str(' '.join(metadata['text'])) metadata['abstract'] = str(' '.join(metadata['abstract']))
metadata['text'] = str(re.sub('Tous droits réservés.*$', '', metadata['text'])) metadata['abstract'] = str(re.sub('Tous droits réservés.*$', '', metadata['abstract']))
elif format_europresse == 1: elif format_europresse == 1:
metadata['doi'] = ' '.join(html_article.xpath(doi_xpath)) metadata['doi'] = ' '.join(html_article.xpath(doi_xpath))
metadata['text'] = metadata['text'][:-9] metadata['abstract'] = metadata['abstract'][:-9]
# Here add separator for paragraphs # Here add separator for paragraphs
metadata['text'] = str(' '.join(metadata['text'])) metadata['abstract'] = str(' '.join(metadata['abstract']))
else: else:
metadata['doi'] = "not found" metadata['doi'] = "not found"
metadata['length_words'] = len(metadata['text'].split(' ')) metadata['length_words'] = len(metadata['abstract'].split(' '))
metadata['length_letters'] = len(metadata['text']) metadata['length_letters'] = len(metadata['abstract'])
metadata['bdd'] = u'europresse' metadata['bdd'] = u'europresse'
metadata['url'] = u'' metadata['url'] = u''
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment