Commit 3c87b298 authored by c24b's avatar c24b

Referencing the Parser

parent b62ce7f5
from ._Parser import Parser
from datetime import datetime
from io import BytesIO
import json
class CernParser(Parser):
self.MARC21 = {
"100":{"a": "author_name",
"v": "author_affiliation",
"w": "author_country",
"m": "author_mail",
},
"700": {"a": "coauthor_name",
"v": "coauthor_affiliation",
"w": "coauthor_country",
},
"773":{ "c": "pages",
"n": "issue",
"p": "journal",
"v": "volume",
"y": "year"
},
"024": {"a":"DOI"},
"037": {"a":"ArXiv"},
"022": {"a":"ISSN"},
"245": {"a":"Title"},
"520": {"a":"Abstract"},
"260": {"b":"Publisher","c":"Pubdate"},
"024": {"t":"Date"},
"540": {"a":"Licence"},
"653": {"a":"keywords"},
"856": {"u":"pdf_source"},
}
def parse(self, filebuf):
tree = etree.tostring(filebuf)
#root = tree.getroot()
soup = BeautifulSoup(tree, "lxml")
for record in soupr.find_all("record"):
r = {v:[] for v in self.MARC21["700"].values()}
r["uid"] = soup.find("controlfield").text
for data in soup.find_all("datafield"):
tag = data.get("tag")
if tag in self.MARC21.keys():
for sub in data.find_all("subfield"):
code = sub.get("code")
if code in self.MARC21[tag].keys():
if tag == "700":
r[self.MARC21[tag][code]].append(sub.text)
else:
r[self.MARC21[tag][code]] = sub.text
records.append(r.decode('utf-8'))
......@@ -9,4 +9,4 @@ from .Europress import EuropressParser
from .ISTex import ISTexParser
from .CSV import CSVParser
from .CERN import CernParser
from .Cern import CernParser
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment