Commit b94aefad authored by Alexandre Delanoë's avatar Alexandre Delanoë

[HAL] merge parser changes on server.

parent e24efe96
...@@ -31,7 +31,8 @@ class HalCrawler(Crawler): ...@@ -31,7 +31,8 @@ class HalCrawler(Crawler):
#search_field="title_t" #search_field="title_t"
search_field="abstract_t" search_field="abstract_t"
return (search_field + ":" + "(" + query + ")") #return (search_field + ":" + "(" + query + ")")
return query # (search_field + ":" + "(" + query + ")")
def _get(self, query, fromPage=1, count=10, lang=None): def _get(self, query, fromPage=1, count=10, lang=None):
...@@ -45,8 +46,15 @@ class HalCrawler(Crawler): ...@@ -45,8 +46,15 @@ class HalCrawler(Crawler):
, uri_s , uri_s
, isbn_s , isbn_s
, issue_s , issue_s
, journalTitle_s
, language_s
, doiId_s
, authId_i
, instStructId_i
, deptStructId_i
, labStructId_i
, rteamStructId_i
, docType_s , docType_s
, journalPublisher_s
""" """
#, authUrl_s #, authUrl_s
#, type_s #, type_s
......
...@@ -18,9 +18,19 @@ class HalParser(Parser): ...@@ -18,9 +18,19 @@ class HalParser(Parser):
hyperdata_path = { "id" : "isbn_s" hyperdata_path = { "id" : "isbn_s"
, "title" : "title_s" , "title" : "title_s"
, "abstract" : "abstract_s" , "abstract" : "abstract_s"
, "source" : "journalPublisher_s" , "source" : "journalTitle_s"
, "url" : "uri_s" , "url" : "uri_s"
, "authors" : "authFullName_s" , "authors" : "authFullName_s"
, "isbn_s" : "isbn_s"
, "issue_s" : "issue_s"
, "language_s" : "language_s"
, "doiId_s" : "doiId_s"
, "authId_i" : "authId_i"
, "instStructId_i" : "instStructId_i"
, "deptStructId_i" : "deptStructId_i"
, "labStructId_i" : "labStructId_i"
, "rteamStructId_i" : "rteamStructId_i"
, "docType_s" : "docType_s"
} }
uris = set() uris = set()
...@@ -33,9 +43,9 @@ class HalParser(Parser): ...@@ -33,9 +43,9 @@ class HalParser(Parser):
field = doc.get(path, "NOT FOUND") field = doc.get(path, "NOT FOUND")
if isinstance(field, list): if isinstance(field, list):
hyperdata[key] = ", ".join(field) hyperdata[key] = ", ".join(map(lambda x: str(x), field))
else: else:
hyperdata[key] = field hyperdata[key] = str(field)
if hyperdata["url"] in uris: if hyperdata["url"] in uris:
print("Document already parsed") print("Document already parsed")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment