Commit b11e1f9a authored by c24b's avatar c24b

Adding constants + parsers CERN

parent 2e8dd028
...@@ -195,7 +195,7 @@ RESOURCETYPES = [ ...@@ -195,7 +195,7 @@ RESOURCETYPES = [
}, },
# type 10 # type 10
{ "name": 'CERN', { "name": 'CERN',
"parser": CERNParser, "parser": CernParser,
"default_language": "en", "default_language": "en",
}, },
] ]
......
...@@ -5,7 +5,7 @@ import json ...@@ -5,7 +5,7 @@ import json
class CernParser(Parser): class CernParser(Parser):
self.MARC21 = { MARC21 = {
"100":{"a": "author_name", "100":{"a": "author_name",
"v": "author_affiliation", "v": "author_affiliation",
"w": "author_country", "w": "author_country",
...@@ -32,12 +32,13 @@ class CernParser(Parser): ...@@ -32,12 +32,13 @@ class CernParser(Parser):
"653": {"a":"keywords"}, "653": {"a":"keywords"},
"856": {"u":"pdf_source"}, "856": {"u":"pdf_source"},
} }
def parse(self, filebuf): def parse(self, filebuf):
tree = etree.tostring(filebuf) tree = etree.tostring(filebuf)
#root = tree.getroot() #root = tree.getroot()
hyperdata_list =[]
soup = BeautifulSoup(tree, "lxml") soup = BeautifulSoup(tree, "lxml")
for record in soupr.find_all("record"): for record in soup.find_all("record"):
r = {v:[] for v in self.MARC21["700"].values()} r = {v:[] for v in self.MARC21["700"].values()}
r["uid"] = soup.find("controlfield").text r["uid"] = soup.find("controlfield").text
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment