import collections
import dateutil.parser
import zipfile
import chardet

from ..Caches import LanguagesCache
    

class FileParser:
    """Base class for performing files parsing depending on their type.
    """
    def __init__(self, language_cache=None):
        self._languages_cache = LanguagesCache() if language_cache is None else language_cache
    
    def detect_encoding(self, string):
        """Useful method to detect the document encoding.
        """
        encoding = chardet.detect(string)
        return encoding.get('encoding', 'UTF-8')
    
    
    def format_metadata_dates(self, metadata):
        """Format the dates found in the metadata.
        Examples:
            {"publication_date": "2014-10-23 09:57:42"}
            -> {"publication_date": "2014-10-23 09:57:42", "publication_year": "2014", ...}
            {"publication_year": "2014"}
            -> {"publication_date": "2014-01-01 00:00:00", "publication_year": "2014", ...}
        """
        
        # First, check the split dates...
        prefixes = [key[:-5] for key in metadata.keys() if key[-5:] == "_year"]
        for prefix in prefixes:
            date_string = metadata[prefix + "_year"]
            key = prefix + "_month"
            if key in metadata:
                date_string += " " + metadata[key]
                key = prefix + "_day"
                if key in metadata:
                    date_string += " " + metadata[key]
                    key = prefix + "_hour"
                    if key in metadata:
                        date_string += " " + metadata[key]
                        key = prefix + "_minute"
                        if key in metadata:
                            date_string += ":" + metadata[key]
                            key = prefix + "_second"
                            if key in metadata:
                                date_string += ":" + metadata[key]
            try:
                metadata[prefix + "_date"] = dateutil.parser.parse(date_string).strftime("%Y-%m-%d %H:%M:%S")
            except:
                pass
        
        # ...then parse all the "date" fields, to parse it into separate elements
        prefixes = [key[:-5] for key in metadata.keys() if key[-5:] == "_date"]
        for prefix in prefixes:
            date = dateutil.parser.parse(metadata[prefix + "_date"])
            metadata[prefix + "_year"]      = date.strftime("%Y")
            metadata[prefix + "_month"]     = date.strftime("%m")
            metadata[prefix + "_day"]       = date.strftime("%d")
            metadata[prefix + "_hour"]      = date.strftime("%H")
            metadata[prefix + "_minute"]    = date.strftime("%M")
            metadata[prefix + "_second"]    = date.strftime("%S")
                
        # finally, return the transformed result!
        return metadata
        
    def format_metadata_languages(self, metadata):
        """format the languages found in the metadata."""
        language = None
        for key in ["fullname", "iso3", "iso2"]:
            language_key = "language_" + key
            if language_key in metadata:
                language_symbol = metadata[language_key]
                language = self._languages_cache[language_symbol]
                if language:
                    break
        if language:
            metadata["language_iso2"]       = language.iso2
            metadata["language_iso3"]       = language.iso3
            metadata["language_fullname"]   = language.fullname
        return metadata
        
    def format_metadata(self, metadata):
        """Format the metadata."""
        metadata = self.format_metadata_dates(metadata)
        metadata = self.format_metadata_languages(metadata)
        return metadata
    
    
    def _parse(self, file):
        """This method shall be overriden by inherited classes."""
        return list()
        
    def parse(self, file):
        """Parse the file, and its children files found in the file.
        """
        # initialize the list of metadata
        metadata_list = []
        # is the file is a ZIP archive, recurse on each of its files...
        if zipfile.is_zipfile(file):
            zipArchive = zipfile.ZipFile(file)
            for filename in zipArchive.namelist():
                try:
                    f = zipArchive.open(filename, 'r')
                    metadata_list += self.parse(f)
                    f.close()
                except Exception as error:
                    print(error)
        # ...otherwise, let's parse it directly!
        else:
            try:
                for metadata in self._parse(file):
                    metadata_list.append(self.format_metadata(metadata))
                if hasattr(file, 'close'):
                    file.close()
            except Exception as error:
                print(error)
        # return the list of formatted metadata
        return metadata_list