Commit a8532659 authored by Mathieu Rodic's avatar Mathieu Rodic

[FEATURE] - ISI files parser - done!

Now, create some mother class to allow RIS as well.
Also, integrate more fields.
parent 0720146f
This diff is collapsed.
...@@ -48,6 +48,7 @@ class FileParser: ...@@ -48,6 +48,7 @@ class FileParser:
self._extractors = dict() self._extractors = dict()
self._document_nodetype = NodeType.objects.get(name='Document') self._document_nodetype = NodeType.objects.get(name='Document')
languages = Language.objects.all() languages = Language.objects.all()
self._languages_fullname = {language.fullname.lower(): language for language in languages}
self._languages_iso2 = {language.iso2.lower(): language for language in languages} self._languages_iso2 = {language.iso2.lower(): language for language in languages}
self._languages_iso3 = {language.iso3.lower(): language for language in languages} self._languages_iso3 = {language.iso3.lower(): language for language in languages}
#self.parse() #self.parse()
......
...@@ -6,9 +6,11 @@ class IsiFileParser(FileParser): ...@@ -6,9 +6,11 @@ class IsiFileParser(FileParser):
_parameters = { _parameters = {
b"ER": {"type": "delimiter"}, b"ER": {"type": "delimiter"},
b"TI": {"type": "metadata", "key": "title", "concatenate": b" "}, b"TI": {"type": "metadata", "key": "title", "separator": b" "},
b"AU": {"type": "metadata", "key": "authors", "concatenate": b", "}, b"AU": {"type": "metadata", "key": "authors", "separator": b", "},
b"AB": {"type": "metadata", "key": "abstract", "concatenate": b" "}, b"DI": {"type": "metadata", "key": "doi"},
b"LA": {"type": "metadata", "key": "language"},
b"AB": {"type": "metadata", "key": "abstract", "separator": b" "},
} }
def parse(self, parentNode=None, tag=True): def parse(self, parentNode=None, tag=True):
...@@ -22,12 +24,23 @@ class IsiFileParser(FileParser): ...@@ -22,12 +24,23 @@ class IsiFileParser(FileParser):
if last_key in self._parameters: if last_key in self._parameters:
parameter = self._parameters[last_key] parameter = self._parameters[last_key]
if parameter["type"] == "metadata": if parameter["type"] == "metadata":
metadata[parameter["key"]] = parameter["concatenate"].join(last_values) separator = parameter["separator"] if "separator" in parameter else b""
metadata[parameter["key"]] = separator.join(last_values)
elif parameter["type"] == "delimiter": elif parameter["type"] == "delimiter":
language = self._languages_fullname[metadata["language"].lower().decode()]
# self.create_document(
# parentNode = parentNode,
# title = metadata["title"],
# contents = metadata["abstract"],
# language = language,
# metadata = metadata,
# guid = metadata["guid"]
# )
print(metadata) print(metadata)
print()
metadata = {} metadata = {}
break
last_key = parameter_key last_key = parameter_key
last_values = [] last_values = []
last_values.append(line[3:-1]) last_values.append(line[3:-1])
self.file.close() self._file.close()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment