From 852f71b624d05d30018147566f8d197b113ebdc5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alexandre=20Delano=C3=AB?= <alexandre@delanoe.org>
Date: Tue, 12 May 2015 17:00:11 +0100
Subject: [PATCH] [FIX] Adding zotero parser

---
 parsing/FileParsers/FileParser.py       | 26 +++++++++++++------------
 parsing/FileParsers/RisFileParser.py    | 16 +++++++++++----
 parsing/FileParsers/ZoteroFileParser.py | 23 ++++++++++++++++++++++
 parsing/FileParsers/__init__.py         |  1 +
 parsing/parsers_config.py               |  4 ++--
 5 files changed, 52 insertions(+), 18 deletions(-)
 create mode 100644 parsing/FileParsers/ZoteroFileParser.py

diff --git a/parsing/FileParsers/FileParser.py b/parsing/FileParsers/FileParser.py
index eb5bc723..80cce5c0 100644
--- a/parsing/FileParsers/FileParser.py
+++ b/parsing/FileParsers/FileParser.py
@@ -4,21 +4,21 @@ import zipfile
 import chardet
 
 from ..Caches import LanguagesCache
-    
+
 
 class FileParser:
     """Base class for performing files parsing depending on their type.
     """
     def __init__(self, language_cache=None):
         self._languages_cache = LanguagesCache() if language_cache is None else language_cache
-    
+
     def detect_encoding(self, string):
         """Useful method to detect the document encoding.
         """
         encoding = chardet.detect(string)
         return encoding.get('encoding', 'UTF-8')
-    
-    
+
+
     def format_hyperdata_dates(self, hyperdata):
         """Format the dates found in the hyperdata.
         Examples:
@@ -27,7 +27,7 @@ class FileParser:
             {"publication_year": "2014"}
             -> {"publication_date": "2014-01-01 00:00:00", "publication_year": "2014", ...}
         """
-        
+
         # First, check the split dates...
         prefixes = [key[:-5] for key in hyperdata.keys() if key[-5:] == "_year"]
         for prefix in prefixes:
@@ -51,21 +51,23 @@ class FileParser:
                 hyperdata[prefix + "_date"] = dateutil.parser.parse(date_string).strftime("%Y-%m-%d %H:%M:%S")
             except:
                 pass
-        
+
         # ...then parse all the "date" fields, to parse it into separate elements
         prefixes = [key[:-5] for key in hyperdata.keys() if key[-5:] == "_date"]
         for prefix in prefixes:
             date = dateutil.parser.parse(hyperdata[prefix + "_date"])
+            print('date')
+
             hyperdata[prefix + "_year"]      = date.strftime("%Y")
             hyperdata[prefix + "_month"]     = date.strftime("%m")
             hyperdata[prefix + "_day"]       = date.strftime("%d")
             hyperdata[prefix + "_hour"]      = date.strftime("%H")
             hyperdata[prefix + "_minute"]    = date.strftime("%M")
             hyperdata[prefix + "_second"]    = date.strftime("%S")
-                
+
         # finally, return the transformed result!
         return hyperdata
-        
+
     def format_hyperdata_languages(self, hyperdata):
         """format the languages found in the hyperdata."""
         language = None
@@ -81,18 +83,18 @@ class FileParser:
             hyperdata["language_iso3"]       = language.iso3
             hyperdata["language_fullname"]   = language.fullname
         return hyperdata
-        
+
     def format_hyperdata(self, hyperdata):
         """Format the hyperdata."""
         hyperdata = self.format_hyperdata_dates(hyperdata)
         hyperdata = self.format_hyperdata_languages(hyperdata)
         return hyperdata
-    
-    
+
+
     def _parse(self, file):
         """This method shall be overriden by inherited classes."""
         return list()
-        
+
     def parse(self, file):
         """Parse the file, and its children files found in the file.
         """
diff --git a/parsing/FileParsers/RisFileParser.py b/parsing/FileParsers/RisFileParser.py
index d05caf51..888f4bc9 100644
--- a/parsing/FileParsers/RisFileParser.py
+++ b/parsing/FileParsers/RisFileParser.py
@@ -3,15 +3,17 @@ from .FileParser import FileParser
 
 from ..Caches import LanguagesCache
 
+from admin.utils import PrintException
+
 class RisFileParser(FileParser):
 
     def __init__(self, language_cache=None):
-        
+
         super(FileParser, self).__init__()
         self._languages_cache = LanguagesCache() if language_cache is None else language_cache
-        
+
         self._begin = 6
-        
+
         self._parameters = {
             b"ER":  {"type": "delimiter"},
             b"TI":  {"type": "hyperdata", "key": "title", "separator": " "},
@@ -24,7 +26,7 @@ class RisFileParser(FileParser):
             b"AB":  {"type": "hyperdata", "key": "abstract", "separator": " "},
             b"WC":  {"type": "hyperdata", "key": "fields"},
         }
-        
+
 
     def _parse(self, file):
         hyperdata = {}
@@ -57,5 +59,11 @@ class RisFileParser(FileParser):
                     print(error)
         # if a hyperdata object is left in memory, yield it as well
         if hyperdata:
+#            try:
+#                if hyperdata['date_to_parse']:
+#                    print(hyperdata['date_to_parse'])
+#            except:
+#                pass
+#
             #print(hyperdata['title'])
             yield hyperdata
diff --git a/parsing/FileParsers/ZoteroFileParser.py b/parsing/FileParsers/ZoteroFileParser.py
new file mode 100644
index 00000000..3d886af7
--- /dev/null
+++ b/parsing/FileParsers/ZoteroFileParser.py
@@ -0,0 +1,23 @@
+from .RisFileParser import RisFileParser
+
+from ..Caches import LanguagesCache
+
+class ZoteroFileParser(RisFileParser):
+    def __init__(self):
+        super(RisFileParser, self).__init__()
+
+        self._begin = 6
+
+        self._parameters = {
+            b"ER":  {"type": "delimiter"},
+            b"TI":  {"type": "hyperdata", "key": "title", "separator": " "},
+            b"AU":  {"type": "hyperdata", "key": "authors", "separator": ", "},
+            b"UR":  {"type": "hyperdata", "key": "doi"},
+            b"DA":  {"type": "hyperdata", "key": "publication_date"},
+            b"PY":  {"type": "hyperdata", "key": "publication_year"},
+            b"PD":  {"type": "hyperdata", "key": "publication_month"},
+            b"LA":  {"type": "hyperdata", "key": "language_iso2"},
+            b"AB":  {"type": "hyperdata", "key": "abstract", "separator": " "},
+            b"WC":  {"type": "hyperdata", "key": "fields"},
+        }
+
diff --git a/parsing/FileParsers/__init__.py b/parsing/FileParsers/__init__.py
index 09871c6a..e642ec9d 100644
--- a/parsing/FileParsers/__init__.py
+++ b/parsing/FileParsers/__init__.py
@@ -1,6 +1,7 @@
 from .RisFileParser import RisFileParser
 from .IsiFileParser import IsiFileParser
 from .JstorFileParser import JstorFileParser
+from .ZoteroFileParser import ZoteroFileParser
 from .PubmedFileParser import PubmedFileParser
 from .EuropressFileParser import EuropressFileParser
 from .ISText import ISText
diff --git a/parsing/parsers_config.py b/parsing/parsers_config.py
index d2b772e7..e1765e53 100644
--- a/parsing/parsers_config.py
+++ b/parsing/parsers_config.py
@@ -4,11 +4,11 @@ parsers = {
         'Pubmed (xml format)'               : PubmedFileParser,
         'Web of Science (ISI format)'       : IsiFileParser,
         'Scopus (RIS format)'               : RisFileParser,
-        'Zotero (RIS format)'               : JstorFileParser,
+        'Zotero (RIS format)'               : ZoteroFileParser,
         'Jstor (RIS format)'                : JstorFileParser,
         #'Europress'                        : EuropressFileParser,
         'Europress (French)'                : EuropressFileParser,
         'Europress (English)'               : EuropressFileParser,
-        
+
     }
 
-- 
2.21.0