Commit 38a39f84 authored by Mathieu Rodic's avatar Mathieu Rodic

[CODE] Replaced lists with iterators in FileParser subclasses

https://forge.iscpif.fr/issues/1422
parent e4531939
...@@ -48,11 +48,9 @@ class EuropressFileParser(FileParser): ...@@ -48,11 +48,9 @@ class EuropressFileParser(FileParser):
print(error) print(error)
except: except Exception as error:
return [] print(error)
# initialize the list of metadata
metadata_list = []
# parse all the articles, one by one # parse all the articles, one by one
try: try:
for html_article in html_articles: for html_article in html_articles:
...@@ -201,16 +199,9 @@ class EuropressFileParser(FileParser): ...@@ -201,16 +199,9 @@ class EuropressFileParser(FileParser):
#metadata_str = {} #metadata_str = {}
for key, value in metadata.items(): for key, value in metadata.items():
metadata[key] = value.decode() if isinstance(value, bytes) else value metadata[key] = value.decode() if isinstance(value, bytes) else value
metadata_list.append(metadata) yield metadata
count += 1 count += 1
except Exception as error: except Exception as error:
print(error) print(error)
pass pass
# from pprint import pprint
# pprint(metadata_list)
# return []
return metadata_list
...@@ -109,9 +109,11 @@ class FileParser: ...@@ -109,9 +109,11 @@ class FileParser:
# ...otherwise, let's parse it directly! # ...otherwise, let's parse it directly!
else: else:
try: try:
metadata_list += self._parse(file) for metadata in self._parse(file):
metadata_list.append(self.format_metadata(metadata))
except Exception as error: except Exception as error:
print(error) print(error)
# return the list of formatted metadata # return the list of formatted metadata
return map(self.format_metadata, metadata_list) return metadata_list
...@@ -10,8 +10,6 @@ class PubmedFileParser(FileParser): ...@@ -10,8 +10,6 @@ class PubmedFileParser(FileParser):
xml_parser = etree.XMLParser(resolve_entities=False, recover=True) xml_parser = etree.XMLParser(resolve_entities=False, recover=True)
xml = etree.parse(file, parser=xml_parser) xml = etree.parse(file, parser=xml_parser)
xml_articles = xml.findall('PubmedArticle') xml_articles = xml.findall('PubmedArticle')
# initialize the list of metadata
metadata_list = []
# parse all the articles, one by one # parse all the articles, one by one
for xml_article in xml_articles: for xml_article in xml_articles:
# extract data from the document # extract data from the document
...@@ -39,6 +37,4 @@ class PubmedFileParser(FileParser): ...@@ -39,6 +37,4 @@ class PubmedFileParser(FileParser):
metadata[key] = xml_node.text metadata[key] = xml_node.text
except: except:
pass pass
metadata_list.append(metadata) yield metadata
# return the list of metadata
return metadata_list
...@@ -17,14 +17,12 @@ class RisFileParser(FileParser): ...@@ -17,14 +17,12 @@ class RisFileParser(FileParser):
} }
def _parse(self, file): def _parse(self, file):
metadata_list = []
metadata = {} metadata = {}
last_key = None last_key = None
last_values = [] last_values = []
for line in file: for line in file:
if len(line) > 2: if len(line) > 2:
parameter_key = line[:2] parameter_key = line[:2]
# print(parameter_key)
if parameter_key != b' ' and parameter_key != last_key: if parameter_key != b' ' and parameter_key != last_key:
if last_key in self._parameters: if last_key in self._parameters:
parameter = self._parameters[last_key] parameter = self._parameters[last_key]
...@@ -32,17 +30,13 @@ class RisFileParser(FileParser): ...@@ -32,17 +30,13 @@ class RisFileParser(FileParser):
separator = parameter["separator"] if "separator" in parameter else "" separator = parameter["separator"] if "separator" in parameter else ""
metadata[parameter["key"]] = separator.join(last_values) metadata[parameter["key"]] = separator.join(last_values)
elif parameter["type"] == "delimiter": elif parameter["type"] == "delimiter":
#language = self._languages_fullname[metadata["language"].lower()]
#print(metadata)
try: try:
#print("append")
if 'language_fullname' not in metadata.keys(): if 'language_fullname' not in metadata.keys():
if 'language_iso3' not in metadata.keys(): if 'language_iso3' not in metadata.keys():
if 'language_iso2' not in metadata.keys(): if 'language_iso2' not in metadata.keys():
metadata['language_iso2'] = 'en' metadata['language_iso2'] = 'en'
metadata_list.append(metadata) yield metadata
metadata = {} metadata = {}
#print("append succeeded")
except: except:
pass pass
last_key = parameter_key last_key = parameter_key
...@@ -52,7 +46,3 @@ class RisFileParser(FileParser): ...@@ -52,7 +46,3 @@ class RisFileParser(FileParser):
except Exception as error: except Exception as error:
print(error) print(error)
pass pass
#print(len(metadata_list))
#print(metadata_list)
return metadata_list
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment