Commit d19c6877 authored by c24b's avatar c24b

Referencing the Parser

parent 0950a1ac
from ._Parser import Parser
from datetime import datetime
from io import BytesIO
import json
class CernParser(Parser):
self.MARC21 = {
"100":{"a": "author_name",
"v": "author_affiliation",
"w": "author_country",
"m": "author_mail",
},
"700": {"a": "coauthor_name",
"v": "coauthor_affiliation",
"w": "coauthor_country",
},
"773":{ "c": "pages",
"n": "issue",
"p": "journal",
"v": "volume",
"y": "year"
},
"024": {"a":"DOI"},
"037": {"a":"ArXiv"},
"022": {"a":"ISSN"},
"245": {"a":"Title"},
"520": {"a":"Abstract"},
"260": {"b":"Publisher","c":"Pubdate"},
"024": {"t":"Date"},
"540": {"a":"Licence"},
"653": {"a":"keywords"},
"856": {"u":"pdf_source"},
}
def parse(self, filebuf):
tree = etree.tostring(filebuf)
#root = tree.getroot()
soup = BeautifulSoup(tree, "lxml")
for record in soupr.find_all("record"):
r = {v:[] for v in self.MARC21["700"].values()}
r["uid"] = soup.find("controlfield").text
for data in soup.find_all("datafield"):
tag = data.get("tag")
if tag in self.MARC21.keys():
for sub in data.find_all("subfield"):
code = sub.get("code")
if code in self.MARC21[tag].keys():
if tag == "700":
r[self.MARC21[tag][code]].append(sub.text)
else:
r[self.MARC21[tag][code]] = sub.text
records.append(r.decode('utf-8'))
...@@ -9,4 +9,4 @@ from .Europress import EuropressParser ...@@ -9,4 +9,4 @@ from .Europress import EuropressParser
from .ISTex import ISTexParser from .ISTex import ISTexParser
from .CSV import CSVParser from .CSV import CSVParser
from .CERN import CernParser from .Cern import CernParser
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment