""" File containing all functions generally used for a csv file Authors: Marie FU """ from io import StringIO import typing from streamlit.runtime.uploaded_file_manager import UploadedFile import streamlit as st from src.gerneralFileOP import * def getSeparator(file : UploadedFile|str, encoding : str) -> str|None: """ Get the separator from the given file Args: file (UploadedFile) : target file encoding (String) : file encoding Returns: (String or None) : The separator or None Authors: Nicolas Atrax Marie FU """ line = "" if isinstance(file, UploadedFile): line = file.readline().decode(encoding) elif isinstance(file, str): with open(file, "r", encoding=encoding) as f: line = f.readline() if ',' in line: if '\t' in line or ';' in line: return None else: return ',' elif ';' in line: if '\t' in line : return None else: return ';' else: return '\t' def checkColumnNames(name : str, registeredNames : list[str], otherColumns : list[str], expectedColumns : list[str]) -> typing.Tuple[list[str], list[str]]: """ Check if the column name is wanted Args: name (String) : Column name to check registeredNames (List[String]) : List of already checked column names otherColumns (List[String]) : Current unmatched column name list Returns: registeredNames (List[String]) : Updated list of already checked column names otherColumns (List[String]) : Updated unmatched column name list Authors: Nicolas Atrax Marie FU """ if lowerName(name) in [x.lower() for x in expectedColumns]: registeredNames.append(name) else : otherColumns.append(name) return registeredNames, otherColumns def persoMap(initial_option : str, columnMap : typing.Dict[str,str], mapLength : int) -> bool: """ Check if the dictionary is correct Args: initial_option (String) : The default option displayed columnMap (Dict[String,String]) : mapping to check mapLength (Int) : mapping expected size Returns: (Boolean) : Success state Authors: Marie FU """ if len(columnMap) < mapLength: st.write(st.session_state.general_text_dict['option_duplicate']) return False elif initial_option in columnMap.keys(): st.write(f"{initial_option} " + st.session_state.general_text_dict['init_option_in_choice']) return False else : columnMap = {key: value for key, value in columnMap.items() if value != initial_option} return True def checkColumnExistence(registeredNames : list[str], expectedColumns : list[str]) -> bool: """ Check if there are any missing column name Args: registeredNames (List[String]) : Already present and checked column name expectedColumns (List[String]) : List of expected columns name Returns: (Boolean) : True if no missing column name, False otherwise Authors: Nicolas Atrax Marie FU """ if len(registeredNames) != len(expectedColumns): st.session_state.errMessageLog += st.session_state.general_text_dict['missing_column'] for i in range (len(registeredNames)): registeredNames[i] = lowerName(registeredNames[i]) for name in [x for x in expectedColumns]: if name.lower() not in registeredNames: st.session_state.errMessageLog += f" {name}" st.session_state.errMessageLog += "\n\n" return False return True def infoForm(expectedColumns : list[str], registeredNames : list[str], otherColumns : list[str]) -> typing.Dict[str,str]: """ Form asking for columns mapping when possible Args: expectedColumns (List[String]) : List of expected columns name registeredNames (List[String]) : Already present and checked column name otherColumns (List[String]) : Current unmatched column name list Returns: columnMap (Dict[String,String]) : mapping Authors: Marie FU """ columnMap = {} placeholder = st.empty() with placeholder.form(key="personalMap") : initial_option = st.session_state.general_text_dict['init_option'] colList = expectedColumns option_list = [initial_option]+registeredNames+otherColumns for x in colList : columnMap[st.selectbox(x,option_list,placeholder=initial_option, index=0)] = x submit = st.form_submit_button() if submit : if persoMap(initial_option, columnMap, len(expectedColumns)): placeholder.empty() else: columnMap = {} return columnMap def getColumnsNames(file : UploadedFile|str, encoding : str, separator : str, expectedColumns : list[str]) -> typing.Dict[str,str]: """ Get the columns present in the given file Args: file (UploadedFile) : target file encoding (String) : file encoding separator (String) : Used separator of the file expectedColumns (List[String]) : List of expected columns name Returns: columnMap (Dict[String,String]) : mapping Authors: Marie FU """ registeredNames = [] othersColumns = [] columnMap = {} line = [] if isinstance(file, UploadedFile): line = StringIO(file.getvalue().decode(encoding)).read().split("\n")[0].split(separator) elif isinstance(file, str): with open(file, "r", encoding=encoding) as f: line = f.read().split("\n")[0].split(separator) for name in line: if name in registeredNames: errDisplay("duplicate") return {} else : registeredNames, otherColumns = checkColumnNames(name,registeredNames, othersColumns, expectedColumns) if (len(registeredNames) < len(expectedColumns) and (len(registeredNames) + len(otherColumns) >= len(expectedColumns))) or len(registeredNames) > len(expectedColumns): if "zip" in st.session_state.keys(): return {} else: columnMap = infoForm(expectedColumns,registeredNames,otherColumns) elif checkColumnExistence(registeredNames, expectedColumns): for x in line : x = " ".join(x.split()) for y in expectedColumns : if lowerName(y) == lowerName(x) : columnMap[x] = y return columnMap def addColumnsNamestoTSV(file : UploadedFile|str, encoding : str, separator : str, columnMap : typing.Dict[str,str]) -> str: """ Formate the line containing column name to a TSV format Args: None Returns: None Authors: Marie FU """ fileData = "" filecontent = [] if isinstance(file, UploadedFile): filecontent = StringIO(file.getvalue().decode(encoding)).read().split("\n")[0].split(separator) elif isinstance(file, str): with open(file, "r", encoding=encoding) as f: filecontent = f.read().split(separator) for name in filecontent : if name in columnMap.keys(): if fileData != "": fileData += "\t" fileData += f"{columnMap[name]}" return f"{fileData}\n"