Commit 3c87b298 authored by c24b's avatar c24b

Referencing the Parser

parent b62ce7f5
from ._Parser import Parser
from datetime import datetime
from io import BytesIO
import json
class CernParser(Parser):
self.MARC21 = {
"100":{"a": "author_name",
"v": "author_affiliation",
"w": "author_country",
"m": "author_mail",
},
"700": {"a": "coauthor_name",
"v": "coauthor_affiliation",
"w": "coauthor_country",
},
"773":{ "c": "pages",
"n": "issue",
"p": "journal",
"v": "volume",
"y": "year"
},
"024": {"a":"DOI"},
"037": {"a":"ArXiv"},
"022": {"a":"ISSN"},
"245": {"a":"Title"},
"520": {"a":"Abstract"},
"260": {"b":"Publisher","c":"Pubdate"},
"024": {"t":"Date"},
"540": {"a":"Licence"},
"653": {"a":"keywords"},
"856": {"u":"pdf_source"},
}
def parse(self, filebuf):
tree = etree.tostring(filebuf)
#root = tree.getroot()
soup = BeautifulSoup(tree, "lxml")
for record in soupr.find_all("record"):
r = {v:[] for v in self.MARC21["700"].values()}
r["uid"] = soup.find("controlfield").text
for data in soup.find_all("datafield"):
tag = data.get("tag")
if tag in self.MARC21.keys():
for sub in data.find_all("subfield"):
code = sub.get("code")
if code in self.MARC21[tag].keys():
if tag == "700":
r[self.MARC21[tag][code]].append(sub.text)
else:
r[self.MARC21[tag][code]] = sub.text
records.append(r.decode('utf-8'))
...@@ -9,4 +9,4 @@ from .Europress import EuropressParser ...@@ -9,4 +9,4 @@ from .Europress import EuropressParser
from .ISTex import ISTexParser from .ISTex import ISTexParser
from .CSV import CSVParser from .CSV import CSVParser
from .CERN import CernParser from .Cern import CernParser
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment