Commit 8ab84cb6 authored by PkSM3's avatar PkSM3

[UPDATE] pubmed and pubdate tag consider for publication-date

parent 285f606c
...@@ -2,6 +2,7 @@ from django.db import transaction ...@@ -2,6 +2,7 @@ from django.db import transaction
from lxml import etree from lxml import etree
from .FileParser import FileParser from .FileParser import FileParser
from ..NgramsExtractors import * from ..NgramsExtractors import *
from datetime import datetime
class PubmedFileParser(FileParser): class PubmedFileParser(FileParser):
...@@ -21,7 +22,10 @@ class PubmedFileParser(FileParser): ...@@ -21,7 +22,10 @@ class PubmedFileParser(FileParser):
"title" : 'MedlineCitation/Article/ArticleTitle', "title" : 'MedlineCitation/Article/ArticleTitle',
"language_iso3" : 'MedlineCitation/Article/Language', "language_iso3" : 'MedlineCitation/Article/Language',
"doi" : 'PubmedData/ArticleIdList/ArticleId[@type=doi]', "doi" : 'PubmedData/ArticleIdList/ArticleId[@type=doi]',
"abstract" : 'MedlineCitation/Article/Abstract/AbstractText', "realdate_full_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/MedlineDate',
"realdate_year_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/Year',
"realdate_month_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/Month',
"realdate_day_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/Day',
"publication_year" : 'MedlineCitation/DateCreated/Year', "publication_year" : 'MedlineCitation/DateCreated/Year',
"publication_month" : 'MedlineCitation/DateCreated/Month', "publication_month" : 'MedlineCitation/DateCreated/Month',
"publication_day" : 'MedlineCitation/DateCreated/Day', "publication_day" : 'MedlineCitation/DateCreated/Day',
...@@ -30,6 +34,7 @@ class PubmedFileParser(FileParser): ...@@ -30,6 +34,7 @@ class PubmedFileParser(FileParser):
for key, path in metadata_path.items(): for key, path in metadata_path.items():
try: try:
xml_node = xml_article.find(path) xml_node = xml_article.find(path)
# Authors tag
if key == 'authors': if key == 'authors':
metadata[key] = ', '.join([ metadata[key] = ', '.join([
xml_author.find('ForeName').text + ' ' + xml_author.find('LastName').text xml_author.find('ForeName').text + ' ' + xml_author.find('LastName').text
...@@ -37,8 +42,40 @@ class PubmedFileParser(FileParser): ...@@ -37,8 +42,40 @@ class PubmedFileParser(FileParser):
]) ])
else: else:
metadata[key] = xml_node.text metadata[key] = xml_node.text
except: except:
pass pass
# Date-Decision
# forge.iscpif.fr/issues/1418
RealDate = ""
if "realdate_full_" in metadata:
RealDate = metadata["realdate_full_"]
else:
if "realdate_year_" in metadata: RealDate+=metadata["realdate_year_"]
if "realdate_month_" in metadata: RealDate+=" "+metadata["realdate_month_"]
if "realdate_day_" in metadata: RealDate+=" "+metadata["realdate_day_"]
metadata["realdate_full_"] = RealDate
RealDate = RealDate.split("-")[0]
PubmedDate = ""
if "publication_year" in metadata: PubmedDate+=metadata["publication_year"]
if "publication_month" in metadata: PubmedDate+=" "+metadata["publication_month"]
if "publication_day" in metadata: PubmedDate+=" "+metadata["publication_day"]
if len(RealDate)>4:
if len(RealDate)>8: decision = datetime.strptime(RealDate, '%Y %b %d').date()
else: decision = datetime.strptime(RealDate, '%Y %b').date()
else: decision = datetime.strptime(PubmedDate, '%Y %m %d').date()
if "publication_year" in metadata: metadata["publication_year"] = str(decision.year)
if "publication_month" in metadata: metadata["publication_month"] = str(decision.month)
if "publication_day" in metadata: metadata["publication_day"] = str(decision.day)
if "realdate_year_" in metadata: metadata.pop("realdate_year_")
if "realdate_month_" in metadata: metadata.pop("realdate_month_")
if "realdate_day_" in metadata: metadata.pop("realdate_day_")
metadata_list.append(metadata) metadata_list.append(metadata)
# return the list of metadata # return the list of metadata
return metadata_list return metadata_list
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment