Commit b94aefad authored by Alexandre Delanoë's avatar Alexandre Delanoë

[HAL] merge parser changes on server.

parent e24efe96
......@@ -31,7 +31,8 @@ class HalCrawler(Crawler):
#search_field="title_t"
search_field="abstract_t"
return (search_field + ":" + "(" + query + ")")
#return (search_field + ":" + "(" + query + ")")
return query # (search_field + ":" + "(" + query + ")")
def _get(self, query, fromPage=1, count=10, lang=None):
......@@ -45,8 +46,15 @@ class HalCrawler(Crawler):
, uri_s
, isbn_s
, issue_s
, journalTitle_s
, language_s
, doiId_s
, authId_i
, instStructId_i
, deptStructId_i
, labStructId_i
, rteamStructId_i
, docType_s
, journalPublisher_s
"""
#, authUrl_s
#, type_s
......
......@@ -15,12 +15,22 @@ class HalParser(Parser):
hyperdata_list = []
hyperdata_path = { "id" : "isbn_s"
, "title" : "title_s"
, "abstract" : "abstract_s"
, "source" : "journalPublisher_s"
, "url" : "uri_s"
, "authors" : "authFullName_s"
hyperdata_path = { "id" : "isbn_s"
, "title" : "title_s"
, "abstract" : "abstract_s"
, "source" : "journalTitle_s"
, "url" : "uri_s"
, "authors" : "authFullName_s"
, "isbn_s" : "isbn_s"
, "issue_s" : "issue_s"
, "language_s" : "language_s"
, "doiId_s" : "doiId_s"
, "authId_i" : "authId_i"
, "instStructId_i" : "instStructId_i"
, "deptStructId_i" : "deptStructId_i"
, "labStructId_i" : "labStructId_i"
, "rteamStructId_i" : "rteamStructId_i"
, "docType_s" : "docType_s"
}
uris = set()
......@@ -33,9 +43,9 @@ class HalParser(Parser):
field = doc.get(path, "NOT FOUND")
if isinstance(field, list):
hyperdata[key] = ", ".join(field)
hyperdata[key] = ", ".join(map(lambda x: str(x), field))
else:
hyperdata[key] = field
hyperdata[key] = str(field)
if hyperdata["url"] in uris:
print("Document already parsed")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment