Commit 1b813be6 authored by delanoe's avatar delanoe

[FIX] IsTex parser ok.

parent 2fa892a8
...@@ -7,7 +7,7 @@ from io import BytesIO ...@@ -7,7 +7,7 @@ from io import BytesIO
import json import json
class ISTex(FileParser): class ISTex(FileParser):
def _parse(self, thefile): def _parse(self, thefile):
json_data=open(thefile,"r") json_data=open(thefile,"r")
data = json.load(json_data) data = json.load(json_data)
...@@ -22,14 +22,14 @@ class ISTex(FileParser): ...@@ -22,14 +22,14 @@ class ISTex(FileParser):
"language_iso3" : 'language', "language_iso3" : 'language',
"doi" : 'doi', "doi" : 'doi',
"host" : 'host', "host" : 'host',
"publication_date" : 'pubdate', "publication_date" : 'publicationDate',
# "authors" : 'author', # "authors" : 'author',
"authorsRAW" : 'author', "authorsRAW" : 'author',
"keywords" : "keywords" "keywords" : "keywords"
} }
suma = 0 suma = 0
for json_doc in json_docs: for json_doc in json_docs:
hyperdata = {} hyperdata = {}
...@@ -42,9 +42,9 @@ class ISTex(FileParser): ...@@ -42,9 +42,9 @@ class ISTex(FileParser):
# print("|",hyperdata["language_iso3"]) # print("|",hyperdata["language_iso3"])
if "doi" in hyperdata: if "doi" in hyperdata:
hyperdata["doi"] = hyperdata["doi"][0] hyperdata["doi"] = hyperdata["doi"][0]
keywords = [] keywords = []
if "keywords" in hyperdata: if "keywords" in hyperdata:
for keyw in hyperdata["keywords"]: for keyw in hyperdata["keywords"]:
...@@ -72,7 +72,7 @@ class ISTex(FileParser): ...@@ -72,7 +72,7 @@ class ISTex(FileParser):
authors=False authors=False
if "authorsRAW" in hyperdata: if "authorsRAW" in hyperdata:
names = [] names = []
for author in hyperdata["authorsRAW"]: for author in hyperdata["authorsRAW"]:
names.append(author["name"]) names.append(author["name"])
hyperdata["authors"] = ", ".join(names) hyperdata["authors"] = ", ".join(names)
...@@ -88,7 +88,7 @@ class ISTex(FileParser): ...@@ -88,7 +88,7 @@ class ISTex(FileParser):
if "publication_date" in hyperdata: if "publication_date" in hyperdata:
RealDate = hyperdata["publication_date"] RealDate = hyperdata["publication_date"]
if "publication_date" in hyperdata: if "publication_date" in hyperdata:
hyperdata.pop("publication_date") hyperdata.pop("publication_date")
if isinstance(RealDate, list): if isinstance(RealDate, list):
...@@ -99,18 +99,18 @@ class ISTex(FileParser): ...@@ -99,18 +99,18 @@ class ISTex(FileParser):
if len(RealDate)>4: if len(RealDate)>4:
if len(RealDate)>8: if len(RealDate)>8:
try: Decision = datetime.strptime(RealDate, '%Y-%b-%d').date() try: Decision = datetime.strptime(RealDate, '%Y-%b-%d').date()
except: except:
try: Decision = datetime.strptime(RealDate, '%Y-%m-%d').date() try: Decision = datetime.strptime(RealDate, '%Y-%m-%d').date()
except: Decision=False except: Decision=False
else: else:
try: Decision = datetime.strptime(RealDate, '%Y-%b').date() try: Decision = datetime.strptime(RealDate, '%Y-%b').date()
except: except:
try: Decision = datetime.strptime(RealDate, '%Y-%m').date() try: Decision = datetime.strptime(RealDate, '%Y-%m').date()
except: Decision=False except: Decision=False
else: else:
try: Decision = datetime.strptime(RealDate, '%Y').date() try: Decision = datetime.strptime(RealDate, '%Y').date()
except: Decision=False except: Decision=False
if Decision!=False: if Decision!=False:
hyperdata["publication_year"] = str(Decision.year) hyperdata["publication_year"] = str(Decision.year)
hyperdata["publication_month"] = str(Decision.month) hyperdata["publication_month"] = str(Decision.month)
...@@ -119,11 +119,11 @@ class ISTex(FileParser): ...@@ -119,11 +119,11 @@ class ISTex(FileParser):
# print("\t||",hyperdata["title"]) # print("\t||",hyperdata["title"])
# print("\t\t",Decision) # print("\t\t",Decision)
# print("=============================") # print("=============================")
# else: # else:
# suma+=1 # suma+=1
# if "pubdate" in json_doc: # if "pubdate" in json_doc:
# print ("\tfail pubdate:",json_doc["pubdate"]) # print ("\tfail pubdate:",json_doc["pubdate"])
# print ("nb_hits:",len(json_docs)) # print ("nb_hits:",len(json_docs))
# print("\t - nb_fails:",suma) # print("\t - nb_fails:",suma)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment