Commit d49877ff authored by delanoe's avatar delanoe

[FEAT][HAL CRAWLER] OK ready for tests.

parent 5de00550
...@@ -32,7 +32,8 @@ class HalParser(Parser): ...@@ -32,7 +32,8 @@ class HalParser(Parser):
, "authors" : "authFullName_s" , "authors" : "authFullName_s"
} }
uris = set()
for doc in json_docs: for doc in json_docs:
hyperdata = {} hyperdata = {}
...@@ -45,6 +46,10 @@ class HalParser(Parser): ...@@ -45,6 +46,10 @@ class HalParser(Parser):
else: else:
hyperdata[key] = field hyperdata[key] = field
if hyperdata["url"] in uris:
print("Document already parsed")
else:
uris.add(hyperdata["url"])
# hyperdata["authors"] = ", ".join( # hyperdata["authors"] = ", ".join(
# [ p.get("person", {}) # [ p.get("person", {})
# .get("name" , "") # .get("name" , "")
...@@ -53,18 +58,18 @@ class HalParser(Parser): ...@@ -53,18 +58,18 @@ class HalParser(Parser):
# ] # ]
# ) # )
# #
maybeDate = doc.get("submittedDate_s", None) maybeDate = doc.get("submittedDate_s", None)
if maybeDate is not None: if maybeDate is not None:
date = datetime.strptime(maybeDate, "%Y-%m-%d %H:%M:%S") date = datetime.strptime(maybeDate, "%Y-%m-%d %H:%M:%S")
else: else:
date = datetime.now() date = datetime.now()
hyperdata["publication_date"] = date hyperdata["publication_date"] = date
hyperdata["publication_year"] = str(date.year) hyperdata["publication_year"] = str(date.year)
hyperdata["publication_month"] = str(date.month) hyperdata["publication_month"] = str(date.month)
hyperdata["publication_day"] = str(date.day) hyperdata["publication_day"] = str(date.day)
hyperdata_list.append(hyperdata) hyperdata_list.append(hyperdata)
return hyperdata_list return hyperdata_list
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment