Update pages

ee5f4a3e · Atrax Nicolas · ff89fe15 · ee5f4a3e · ee5f4a3e · ee5f4a3e
Commit ee5f4a3e authored Sep 05, 2023 by Atrax Nicolas
18 changed files
--- a/Streamlit/.streamlit/pages.toml
+++ b/Streamlit/.streamlit/pages.toml
@@ -4,7 +4,7 @@ name = "Home"
 icon = ":house:"

 [[pages]]
-name = "API"
+name = "API Tools"
 icon = ":globe_with_meridians:"
 is_section = true

@@ -21,7 +21,15 @@ path = "pages/Zotero_To_GarganText.py"
 name = "Zotero To GarganText"

 [[pages]]
-name = "Convert"
+path = "pages/TSV_Translator.py"
+name = "TSV Translator"
+
+[[pages]]
+path = "pages/YTB_to_TSV.py"
+name = "YTB To TSV"
+
+[[pages]]
+name = "Convert Tools"
 icon = ":twisted_rightwards_arrows:"
 is_section = true

@@ -70,12 +78,12 @@ name = "PDF To TXT"
 path = "pages/TXT_to_TSV.py"
 name = "TXT To TSV"

+[[pages]]
+name = "Other Tools"
+icon = ":twisted_rightwards_arrows:"
+is_section = true

 [[pages]]
 path = "pages/Merge_Term_GarganText.py"
-name = "Merge Term GarganText"
-in_section = false
+name = "Merge GarganText Terms"

-[[pages]]
-path = "pages/TSV_Translator.py"
-name = "TSV Translator"
--- a/Streamlit/lang/text_HALToGarganText.csv
+++ b/Streamlit/lang/text_HALToGarganText.csv
 locale,key,value
-fr,title,"**HAL vers GarganText**"
-en,title,"**HAL To GarganText**"
+fr,title,"# HAL vers GarganText"
+en,title,"# HAL To GarganText"

 fr,text,"HAL est une base de document scientifique en ligne et libre d'accès contenant plus d'un million de document."
 en,text,"HAL is an online and free access scientific document database containing more than a million documents"

--- a/Streamlit/lang/text_IsidoreToGarganText.csv
+++ b/Streamlit/lang/text_IsidoreToGarganText.csv
 locale,key,value
-fr,title,"**Isidore vers GarganText**"
-en,title,"**Isidore To GarganText**"
+fr,title,"# Isidore vers GarganText"
+en,title,"# Isidore To GarganText"

 fr,keyword,"Mots clés"
 en,keyword,"Key word"

--- a/Streamlit/lang/text_MergeTermGarganText.csv
+++ b/Streamlit/lang/text_MergeTermGarganText.csv
 locale,key,value
-fr,title,"Fusionne Deux Liste de Terme de GarganText"
-en,title,"Input Two Term File From GarganText"

-fr,file,"Choisir un fichier"
-en,file,"Choose a file"
+fr,title,"# Merge GarganText Terms"
+en,title,"# Merge GarganText Terms"

-fr,new_file,"Télécharge ton fichier fusionner:"
-en,new_file,"Download your merge file:"
\ No newline at end of file
+fr,text,"Fusionne 2 fichiers de termes de GarganText."
+en,text,"Input 2 term files from GarganText."
+
+fr,file," Choisir un fichier "
+en,file," Choose a file "
+
+fr,new_file," Télécharge ton fichier fusionné "
+en,new_file," Download your merge file "
\ No newline at end of file
--- a/Streamlit/lang/text_PDFtoTSV.csv
+++ b/Streamlit/lang/text_PDFtoTSV.csv
@@ -33,7 +33,7 @@ fr,globalWarning, "Attention ! Plusieurs langues ont été détectées entre vos
 en,globalWarning,"Warning ! Multiple languages have been detected for your pdfs file !\nThe following languages have been detected : "


-fr,advice,"Cela pourrait affecter massivement l'analyse de GarganText.\nVous pouvez régler ça en traduisant avec l'outil TsvTranslator."
-en,advice,"This could massively affect the analysis of Gargantext.\nYou can correct this by translation with the TsvTranslator tool."
+fr,advice,"Cela pourrait affecter massivement l'analyse de GarganText.Vous pouvez régler ça en traduisant avec l'outil TsvTranslator."
+en,advice,"This could massively affect the analysis of Gargantext.You can correct this by translation with the TsvTranslator tool."


--- a/Streamlit/lang/text_TsvTranslator.csv
+++ b/Streamlit/lang/text_TsvTranslator.csv
@@ -25,3 +25,15 @@ en,translate2," To "

 fr,detected,"Langues détectées : "
 en,detected,"Detected languages : "
+
+fr,loading,"Progression de la traduction : "
+en,loading,"Translation progress : "
+
+fr,loadingLanguages," Analyse des langues du fichier "
+en,loadingLanguages," File languages analysis "
+
+fr,sameLanguages,"Une seule langue a été détectée au sein du fichier : "
+en,sameLanguages,"Only one language has been detected inside this file : "
+
+fr,anotherFile," Traduire un autre fichier "
+en,anotherFile," Translate another file "
--- a/Streamlit/lang/text_Welcome.csv
+++ b/Streamlit/lang/text_Welcome.csv
 locale,key,value

-fr,title,"# Bienvenue sur GanganText Tools"
-en,title,"# Welcome to GanganText Tools"
+fr,title,"# Bienvenue sur GarganTools"
+en,title,"# Welcome to GarganTools"

 fr,welcome,"Bienvenue sur ces pages rassemblant des outils développés par des utilisateurs de GarganText pour des utilisateurs de GarganText."
 en,welcome,"Welcome to these pages featuring tools developed by GarganText’ users for GarganText’ users."

--- a/Streamlit/lang/text_YTBtoTSV.csv
+++ b/Streamlit/lang/text_YTBtoTSV.csv
 locale,key,value
-fr,title,"# YTB To TSV"
-en,title,"# YTB To TSV"
+fr,title,"# Youtube To TSV"
+en,title,"# Youtube To TSV"

-fr,text,"Inspecte un fichier CSV pour vérifier s'il est compatible avec Gargantext"
-en,text,"Inspect a CSV file to check if it is compatible with GarganText"
+fr,text,"Effectue une recherche Youtube à l'aide de mots clés (thème, titre de vidéo, lien de vidéo,...) pour créer un fichier TSV à partir des sous-titres de vidéos."
+en,text,"Do a Youtube research with keywords (topic, video title, video link,...) to create a TSV file based on the subtitles of the videos."

 fr,file,"Choisir un fichier"
 en,file,"Choose a file"
@@ -20,5 +20,14 @@ en,fill,"Only manual subtitles (longer waiting time)"
 fr,submit," Soumettre "
 en,submit," Submit "

+fr,loadingID," Recherche de vidéos "
+en,loadingID," Searching videos "
+
+fr,loading,"Traitement des vidéos : "
+en,loading,"Videos processing : "
+
+fr,quantity," sur "
+en,quantity," out of "
+
 fr,new_file,"Télécharge ton fichier TSV :"
 en,new_file,"Download your TSV file :"
--- a/Streamlit/lang/text_ZoteroToGarganText.csv
+++ b/Streamlit/lang/text_ZoteroToGarganText.csv
 locale,key,value
-fr,title,"**Zotero vers GarganText**"
-en,title,"**Zotero vers GarganText**"
+fr,title,"# Zotero vers GarganText"
+en,title,"# Zotero vers GarganText"

 fr,data,"Type de donnée"
 en,data,"Type of data"

--- a/Streamlit/pages/Clean_CSV_to_TSV.py
+++ b/Streamlit/pages/Clean_CSV_to_TSV.py
@@ -245,11 +245,10 @@ def getContent(file, separator, data, success, fill, errorMessage):

 # Code End

-st.write(st.session_state.general_text_dict['title'])
 st.write(st.session_state.general_text_dict['text'])
 st.session_state.fill = st.checkbox(st.session_state.general_text_dict['fill'])
 file = st.file_uploader(
-    st.session_state.general_text_dict['file'], type=["csv"], key='file')
+    st.session_state.general_text_dict['file'], type=["tsv", "csv"], key='file')

 if file:
    name = file.name.split('.')[0] + '.tsv'

--- a/Streamlit/pages/HAL_To_GarganText.py
+++ b/Streamlit/pages/HAL_To_GarganText.py
@@ -13,8 +13,11 @@ tmp.base("HALToGarganText")
 limit = 500
 limitItems = 10000

+
 def loadApiHALNbFile(search, lang):
-    url = 'http://api.archives-ouvertes.fr/search/?q=' + search + '&rows=5&fl=title_s,' + lang + '_title_s,source_s,publicationDate_s,authFullName_s,' + lang + '_abstract_s,abstract_s&fq=language_s:' + lang
+    url = 'http://api.archives-ouvertes.fr/search/?q=' + search + '&rows=5&fl=title_s,' + lang + \
+        '_title_s,source_s,publicationDate_s,authFullName_s,' + \
+        lang + '_abstract_s,abstract_s&fq=language_s:' + lang
    resp = req.get(url)
    print(url)
    try:
@@ -26,7 +29,8 @@ def loadApiHALNbFile(search, lang):


 def loadApiHAL(search, lang, page, nbvalue):
-    url = 'http://api.archives-ouvertes.fr/search/?q=' + search + '&start=' + str(page * limit) + '&rows=' + str(nbvalue) + '&fl=title_s,' + lang + '_title_s,source_s,publicationDate_s,authFullName_s,' + lang + '_abstract_s,abstract_s&fq=language_s:' + lang
+    url = 'http://api.archives-ouvertes.fr/search/?q=' + search + '&start=' + str(page * limit) + '&rows=' + str(
+        nbvalue) + '&fl=title_s,' + lang + '_title_s,source_s,publicationDate_s,authFullName_s,' + lang + '_abstract_s,abstract_s&fq=language_s:' + lang
    resp = req.get(url)
    print(url)
    try:
@@ -81,41 +85,50 @@ def getParamFromDoc(docs):
        if 'publicationDate_s' in doc.keys():
            split = doc['publicationDate_s'].split('-')
            if len(split) == 3:
-                pdate = datetime.strptime(doc['publicationDate_s'], '%Y-%m-%d').strftime('%Y\t%m\t%d')
+                pdate = datetime.strptime(
+                    doc['publicationDate_s'], '%Y-%m-%d').strftime('%Y\t%m\t%d')
            elif len(split) == 2:
-                pdate = datetime.strptime(doc['publicationDate_s'], '%Y-%m').strftime('%Y\t%m\t1')
+                pdate = datetime.strptime(
+                    doc['publicationDate_s'], '%Y-%m').strftime('%Y\t%m\t1')
            else:
                pdate = doc['publicationDate_s'] + '\t1\t1'
        else:
            pdate = '1900\t1\t1'

-        abstract = abstract.encode(encoding = 'UTF-8', errors = 'ignore').decode("utf-8").replace('\t', '').replace('"', '').replace('\n', '')
-        title = title.encode(encoding = 'UTF-8', errors = 'ignore').decode("utf-8").replace('\t', '').replace('"', '').replace('\n', '')
-        source = source.encode(encoding = 'UTF-8', errors = 'ignore').decode("utf-8").replace('\t', '').replace('"', '').replace('\n', '').replace('\n', '')
+        abstract = abstract.encode(encoding='UTF-8', errors='ignore').decode(
+            "utf-8").replace('\t', '').replace('"', '').replace('\n', '')
+        title = title.encode(encoding='UTF-8', errors='ignore').decode(
+            "utf-8").replace('\t', '').replace('"', '').replace('\n', '')
+        source = source.encode(encoding='UTF-8', errors='ignore').decode(
+            "utf-8").replace('\t', '').replace('"', '').replace('\n', '').replace('\n', '')

        # Output
-        output += str(title) + "\t" + source + "\t" + str(pdate) + "\t" + abstract + "\t" + authors + "\t" + str(1) + "\n"
+        output += str(title) + "\t" + source + "\t" + str(pdate) + \
+            "\t" + abstract + "\t" + authors + "\t" + str(1) + "\n"
    return output

+
 def create_output(search, lang, nb_value):
    output = "title\tsource\tpublication_year\tpublication_month\tpublication_day\tabstract\tauthors\tweight\n"
    for i in range(0, nb_value//limit):
        response = loadApiHAL(search, lang, i, limit)
        output += getParamFromDoc(response['response']['docs'])
-    if (nb_value%limit != 0):
-        response = loadApiHAL(search, lang, nb_value//limit, nb_value%limit)
+    if (nb_value % limit != 0):
+        response = loadApiHAL(search, lang, nb_value//limit, nb_value % limit)
        output += getParamFromDoc(response['response']['docs'])
    return output

+
 lang = {
-    'Français' : 'fr',
-    'Anglais' : 'en',
+    'Français': 'fr',
+    'Anglais': 'en',
 }

 if 'stage_isidore' not in st.session_state:
    st.session_state.stage_isidore = 0
    st.session_state.nb_wanted = 1

+
 def set_stage(stage):
    st.session_state.stage_isidore = stage
    st.session_state.output = ''
@@ -127,19 +140,20 @@ st.write(st.session_state.general_text_dict['text'])

 # Form
 form = st.form('api')
-form.write(st.session_state.general_text_dict['title'])

 search = form.text_input(st.session_state.general_text_dict['keyword'])
-language = form.selectbox(st.session_state.general_text_dict['lang'], lang.keys())
+language = form.selectbox(
+    st.session_state.general_text_dict['lang'], lang.keys())

-form.form_submit_button(st.session_state.general_text_dict['submit'], on_click=set_stage, args=(1,))
+form.form_submit_button(
+    st.session_state.general_text_dict['submit'], on_click=set_stage, args=(1,))

 # API and Slider
 if st.session_state.stage_isidore > 0:

    # Only call first time and after
    if 'search' not in st.session_state or 'language' not in st.session_state or search != st.session_state.search or language != st.session_state.language:
-        with st.spinner(st.session_state.general_text_dict['load_api'] ):
+        with st.spinner(st.session_state.general_text_dict['load_api']):
            nb_doc = int(loadApiHALNbFile(search, lang[language]))
        st.session_state.nb_doc = nb_doc
        if nb_doc != 0:
@@ -150,27 +164,31 @@ if st.session_state.stage_isidore > 0:

        # Form with slider
        form2 = st.form('my_form2')
-        form2.write(st.session_state.general_text_dict['nb_doc'] + str(st.session_state.nb_doc))
+        form2.write(
+            st.session_state.general_text_dict['nb_doc'] + str(st.session_state.nb_doc))

        if st.session_state.nb_doc > limitItems:
-            form2.write(st.session_state.general_text_dict['perform1'] + str(limitItems) + st.session_state.general_text_dict['perform2'])
-            st.session_state.nb_wanted = form2.slider(st.session_state.general_text_dict['nb_taken'], 1, limitItems)
+            form2.write(st.session_state.general_text_dict['perform1'] + str(
+                limitItems) + st.session_state.general_text_dict['perform2'])
+            st.session_state.nb_wanted = form2.slider(
+                st.session_state.general_text_dict['nb_taken'], 1, limitItems)
        else:
-            st.session_state.nb_wanted = form2.slider(st.session_state.general_text_dict['nb_taken'], 1, int(st.session_state.nb_doc))
-        form2.form_submit_button(st.session_state.general_text_dict['submit'], on_click=set_stage, args=(2,))
+            st.session_state.nb_wanted = form2.slider(
+                st.session_state.general_text_dict['nb_taken'], 1, int(st.session_state.nb_doc))
+        form2.form_submit_button(
+            st.session_state.general_text_dict['submit'], on_click=set_stage, args=(2,))

    else:
-        st.write(st.session_state.general_text_dict['overload_api'] )
+        st.write(st.session_state.general_text_dict['overload_api'])


 # Download
 if st.session_state.stage_isidore > 1:

-    with st.spinner(st.session_state.general_text_dict['createTSV'] ):
+    with st.spinner(st.session_state.general_text_dict['createTSV']):
        if st.session_state.output == '':
            print(st.session_state.nb_wanted)
-            st.session_state.output = create_output(st.session_state.search, lang[st.session_state.language], st.session_state.nb_wanted)
+            st.session_state.output = create_output(
+                st.session_state.search, lang[st.session_state.language], st.session_state.nb_wanted)

    st.download_button('Download TSV', st.session_state.output, 'output.csv')
-
-
--- a/Streamlit/pages/Isidore_To_GarganText.py
+++ b/Streamlit/pages/Isidore_To_GarganText.py
@@ -17,9 +17,9 @@ limitItems = 5000 # Can't be superior of 10 times numberReplies
 retryTime = 2


-
 def loadApiIsidoreNumberFile(search, language):
-    url = 'https://api.isidore.science/resource/search?q=' + search + '&output=json&replies=10&language=http://lexvo.org/id/iso639-3/' + language
+    url = 'https://api.isidore.science/resource/search?q=' + search + \
+        '&output=json&replies=10&language=http://lexvo.org/id/iso639-3/' + language
    resp = req.get(url)
    print(url)
    if resp.ok:
@@ -30,8 +30,11 @@ def loadApiIsidoreNumberFile(search, language):

    return docs

+
 def loadApiIsidorePage(search, language, page):
-    url = 'https://api.isidore.science/resource/search?q=' + search + '&output=json&replies=' + str(numberReplies) + '&page=' + str(page) + '&language=http://lexvo.org/id/iso639-3/' + language
+    url = 'https://api.isidore.science/resource/search?q=' + search + '&output=json&replies=' + \
+        str(numberReplies) + '&page=' + str(page) + \
+        '&language=http://lexvo.org/id/iso639-3/' + language
    resp = req.get(url)
    print(url)
    try:
@@ -47,22 +50,23 @@ def create_output(search, language, nb_doc):
    output = "title\tsource\tpublication_year\tpublication_month\tpublication_day\tabstract\tauthors\tweight\n"
    nb = 0
    for i in range(1, nb_doc//numberReplies + 1):
-        while(True):
+        while (True):
            txt = loadApiIsidorePage(search, language, i)
            if txt != 0:
                break
            time.sleep(retryTime)
            print('Retry')
-        tmp, nb_tmp = createFile(txt, nb_doc%numberReplies, language)
+        tmp, nb_tmp = createFile(txt, nb_doc % numberReplies, language)
        output += tmp
        nb += nb_tmp
-    if nb_doc%numberReplies != 0:
+    if nb_doc % numberReplies != 0:
        txt = loadApiIsidorePage(search, language, nb_doc//numberReplies + 1)
-        tmp, nb_tmp = createFile(txt, nb_doc%numberReplies, language)
+        tmp, nb_tmp = createFile(txt, nb_doc % numberReplies, language)
        output += tmp
        nb += nb_tmp
    return output, nb

+
 def createFile(docs, limit, language):

    # Output text
@@ -72,11 +76,11 @@ def createFile(docs, limit, language):
    for doc in docs:
        if (i == limit):
            break
-        i+=1
+        i += 1
        # Title
        title = doc["isidore"]["title"]
        if (type(title) != str):
-            if(type(title) == list):
+            if (type(title) == list):
                tmp = ''
                for lang in title:
                    if type(lang) != str and lang['@xml:lang'] == language[:2]:
@@ -93,22 +97,22 @@ def createFile(docs, limit, language):
                title = title['$']

        # Source
-        source =doc["isidore"]["source_info"]["sourceName"]["$"]
+        source = doc["isidore"]["source_info"]["sourceName"]["$"]

        # Author
        if doc['isidore']['enrichedCreators'] != []:
            list_author = doc["isidore"]["enrichedCreators"]["creator"]
            authors = []
-            if(type(list_author) == list):
+            if (type(list_author) == list):
                for author in list_author:
-                    authors.append(author["@origin"].replace('"',''))
+                    authors.append(author["@origin"].replace('"', ''))
                authors = ';'.join(authors)
            else:
-                authors = list_author["@origin"].replace('"','')
+                authors = list_author["@origin"].replace('"', '')
        else:
            authors = ''

-        #Abstract
+        # Abstract
        if 'abstract' in doc['isidore'].keys() and doc["isidore"]["abstract"] != []:
            abstract = doc["isidore"]["abstract"]
        else:
@@ -126,7 +130,7 @@ def createFile(docs, limit, language):
                        abstract = abstract[0]['$']
                else:
                    abstract = tmp
-            else :
+            else:
                abstract = abstract['$']

        if 'types' in doc['isidore'].keys():
@@ -134,24 +138,27 @@ def createFile(docs, limit, language):
                nb += 1
            elif type(doc['isidore']['types']['type'] == dict) and doc['isidore']['types']['type'][1] in ['Books', 'text']:
                nb += 1
-        else :
+        else:
            print(title)

        # Publication Date
        pdate = getGoodTime(doc["isidore"]["date"]["@origin"])

-        abstract = abstract.encode(encoding = 'UTF-8', errors = 'ignore').decode("utf-8").replace('\t', '').replace('"', '')
-        title = title.encode(encoding = 'UTF-8', errors = 'ignore').decode("utf-8").replace('\t', '').replace('"', '')
-        source = source.encode(encoding = 'UTF-8', errors = 'ignore').decode("utf-8").replace('\t', '').replace('"', '').replace('\n', '')
+        abstract = abstract.encode(
+            encoding='UTF-8', errors='ignore').decode("utf-8").replace('\t', '').replace('"', '')
+        title = title.encode(
+            encoding='UTF-8', errors='ignore').decode("utf-8").replace('\t', '').replace('"', '')
+        source = source.encode(encoding='UTF-8', errors='ignore').decode(
+            "utf-8").replace('\t', '').replace('"', '').replace('\n', '')

        # Output
-        row = str(title) + "\t" + source + "\t" + str(pdate) + "\t" + abstract + "\t" + authors + "\t" + str(1) + "\n"
+        row = str(title) + "\t" + source + "\t" + str(pdate) + \
+            "\t" + abstract + "\t" + authors + "\t" + str(1) + "\n"
        output += row

    return output, nb


-
 # Need modification when Isidore upgrade his api
 def getGoodTime(time):
    time = time.replace('?', '0').replace('.', '0')
@@ -160,43 +167,44 @@ def getGoodTime(time):
    if (len(tiret) == 1 and len(tiret[0]) == 4):
        if time != '0001':
            return tiret[0] + '\t1\t1'
-    elif (len(tiret)>1):
-        if (len(tiret) == 2 and len(tiret[0])==4 and len(tiret[1]) <= 2):
+    elif (len(tiret) > 1):
+        if (len(tiret) == 2 and len(tiret[0]) == 4 and len(tiret[1]) <= 2):
            return tiret[0] + '\t' + tiret[1] + '\t1'
-        elif (len(tiret) == 3 and len(tiret[0])==4 and len(tiret[1]) <= 2 and len(tiret[2].split('T')[0]) <= 2):
+        elif (len(tiret) == 3 and len(tiret[0]) == 4 and len(tiret[1]) <= 2 and len(tiret[2].split('T')[0]) <= 2):
            return tiret[0] + '\t' + tiret[1] + '\t' + tiret[2].split('T')[0]
-    elif(len(slash)==1 and len(time) == 4):
+    elif (len(slash) == 1 and len(time) == 4):
        return slash[0] + '\t1\t1'
    elif (len(slash) > 1):
-        if (len(slash) == 2 and len(slash[1])==4 and len(slash[0]) <= 2):
+        if (len(slash) == 2 and len(slash[1]) == 4 and len(slash[0]) <= 2):
            return slash[1] + '\t' + slash[0] + '\t1'
-        elif (len(slash) == 3 and len(slash[2])==4 and len(slash[1]) <= 2 and len(slash[0]) <= 2):
+        elif (len(slash) == 3 and len(slash[2]) == 4 and len(slash[1]) <= 2 and len(slash[0]) <= 2):
            return slash[2] + '\t' + slash[1] + '\t' + slash[0].split('T')[0]
    elif len(time) == 8:
        return time[:4] + '\t' + time[4:6] + '\t' + time[6:8]
    return '1900\t1\t1'


-###Streamlit
+# Streamlit


 lang = {
-    'Français' : 'fra',
-    'Anglais' : 'eng',
-    'Espagnol' : 'spa',
-    'Italien' : 'ita',
-    'Allemand' : 'deu',
+    'Français': 'fra',
+    'Anglais': 'eng',
+    'Espagnol': 'spa',
+    'Italien': 'ita',
+    'Allemand': 'deu',
 }

 # Can be added but low result
-#'Polonais' : 'nld',
-#'Portugais' : 'por', 
-#'Russe' : 'rus' 
+# 'Polonais' : 'nld',
+# 'Portugais' : 'por',
+# 'Russe' : 'rus'

 if 'stage_isidore' not in st.session_state:
    st.session_state.stage_isidore = 0
    st.session_state.nb_wanted = 1

+
 def set_stage(stage):
    st.session_state.stage_isidore = stage
    st.session_state.output = ''
@@ -212,19 +220,20 @@ with col2:

 # Form
 form = st.form('api')
-form.write(st.session_state.general_text_dict['title'])

 search = form.text_input(st.session_state.general_text_dict['keyword'])
-language = form.selectbox(st.session_state.general_text_dict['lang'], lang.keys())
+language = form.selectbox(
+    st.session_state.general_text_dict['lang'], lang.keys())

-form.form_submit_button(st.session_state.general_text_dict['submit'], on_click=set_stage, args=(1,))
+form.form_submit_button(
+    st.session_state.general_text_dict['submit'], on_click=set_stage, args=(1,))

 # API and Slider
 if st.session_state.stage_isidore > 0:

    # Only call first time and after
    if 'search' not in st.session_state or 'language' not in st.session_state or search != st.session_state.search or language != st.session_state.language:
-        with st.spinner(st.session_state.general_text_dict['load_api'] ):
+        with st.spinner(st.session_state.general_text_dict['load_api']):
            nb_doc = int(loadApiIsidoreNumberFile(search, lang[language]))
        st.session_state.nb_doc = nb_doc
        if nb_doc != 0:
@@ -235,28 +244,32 @@ if st.session_state.stage_isidore > 0:

        # Form with slider
        form2 = st.form('my_form2')
-        form2.write(st.session_state.general_text_dict['nb_doc'] + str(st.session_state.nb_doc))
+        form2.write(
+            st.session_state.general_text_dict['nb_doc'] + str(st.session_state.nb_doc))

        if st.session_state.nb_doc > limitItems:
-            form2.write(st.session_state.general_text_dict['perform1'] + str(limitItems) + st.session_state.general_text_dict['perform2'])
-            st.session_state.nb_wanted = form2.slider(st.session_state.general_text_dict['nb_taken'], 1, limitItems)
+            form2.write(st.session_state.general_text_dict['perform1'] + str(
+                limitItems) + st.session_state.general_text_dict['perform2'])
+            st.session_state.nb_wanted = form2.slider(
+                st.session_state.general_text_dict['nb_taken'], 1, limitItems)
        else:
-            st.session_state.nb_wanted = form2.slider(st.session_state.general_text_dict['nb_taken'], 1, int(st.session_state.nb_doc))
-        form2.form_submit_button(st.session_state.general_text_dict['submit'], on_click=set_stage, args=(2,))
+            st.session_state.nb_wanted = form2.slider(
+                st.session_state.general_text_dict['nb_taken'], 1, int(st.session_state.nb_doc))
+        form2.form_submit_button(
+            st.session_state.general_text_dict['submit'], on_click=set_stage, args=(2,))

    else:
-        st.write(st.session_state.general_text_dict['overload_api'] )
+        st.write(st.session_state.general_text_dict['overload_api'])


 # Download
 if st.session_state.stage_isidore > 1:

-    with st.spinner(st.session_state.general_text_dict['createTSV'] ):
+    with st.spinner(st.session_state.general_text_dict['createTSV']):
        if st.session_state.output == '':
-            print(st.session_state.nb_wanted)
-            st.session_state.output, st.session_state.nb_bad_file = create_output(st.session_state.search, lang[st.session_state.language], st.session_state.nb_wanted)
+            st.session_state.output, st.session_state.nb_bad_file = create_output(
+                st.session_state.search, lang[st.session_state.language], st.session_state.nb_wanted)

-    st.write(st.session_state.general_text_dict['doc_abstract1'] + str(st.session_state.nb_bad_file) + st.session_state.general_text_dict['doc_abstract2'])
+    st.write(st.session_state.general_text_dict['doc_abstract1'] + str(
+        st.session_state.nb_bad_file) + st.session_state.general_text_dict['doc_abstract2'])
    st.download_button('Download TSV', st.session_state.output, 'output.csv')
-
-
--- a/Streamlit/pages/Merge_Term_GarganText.py
+++ b/Streamlit/pages/Merge_Term_GarganText.py
@@ -21,73 +21,71 @@ def tmp(file1, file2):
    roots = []
    leafs = []

-
    # find the roots of list 1
-	for root in ngrams1.keys() :
+    for root in ngrams1.keys():

-		if ngrams1[root]['list'] == "MapTerm" :
+        if ngrams1[root]['list'] == "MapTerm":

            roots.append(root)
            leafs = leafs + ngrams1[root]['children']

    # merge list 2 in list 1
-	for root in ngrams2.keys() :
+    for root in ngrams2.keys():

-		if root in roots :
+        if root in roots:

-			ngrams1[root]['children'] = list(set(ngrams1[root]['children'] + ngrams2[root]['children']))
+            ngrams1[root]['children'] = list(
+                set(ngrams1[root]['children'] + ngrams2[root]['children']))

-		else :
+        else:

-			if root not in leafs :
+            if root not in leafs:

                ngrams1[root] = ngrams2[root]
                children = ngrams1[root]['children']
                ngrams1[root]['children'] = []
-				for child in children :
-					if child not in root or child not in leafs :
+                for child in children:
+                    if child not in root or child not in leafs:
                        ngrams1[root]['children'].append(child)

    # clean the merged list
-	for root in ngrams1.keys() :
+    for root in ngrams1.keys():

-		if ngrams1[root]['list'] == "MapTerm" :
+        if ngrams1[root]['list'] == "MapTerm":

-			if len(ngrams1[root]['children']) == 0 :
+            if len(ngrams1[root]['children']) == 0:

                ngrams1[root]['children'] = []

            merged[root] = ngrams1[root]

-
    file1['NgramsTerms']['data'] = merged
    file1['Authors']['data'] = {}
    file1['Institutes']['data'] = {}
    file1['Sources']['data'] = {}

-	tmp = file1.to_json(orient='columns',indent=4)
+    tmp = file1.to_json(orient='columns', indent=4)
    return tmp


-
-
-
-st.subheader(st.session_state.general_text_dict['title'])
+st.write(st.session_state.general_text_dict['text'])
 col1, col2 = st.columns(2)
 with col1:
-    file1 = st.file_uploader(st.session_state.general_text_dict['file'],type=["json"],key='file1')
+    file1 = st.file_uploader(st.session_state.general_text_dict['file'], type=[
+                             "json"], key='file1')
 with col2:
-    file2 = st.file_uploader(st.session_state.general_text_dict['file'],type=["json"],key='file2')
+    file2 = st.file_uploader(st.session_state.general_text_dict['file'], type=[
+                             "json"], key='file2')


 if (file1 and file2):
    try:
        df1 = pd.read_json(file1)
        df2 = pd.read_json(file2)
-		st.write(st.session_state.general_text_dict['new_file'])
-		time = datetime.strftime(datetime.now(),"%d-%m-%Y/%H:%M:%S")
-		name = 'output-' +time+ '.json'
-		st.download_button('Download File', tmp(df1, df2), name)
+        time = datetime.strftime(datetime.now(), "%d-%m-%Y/%H:%M:%S")
+        name = 'output-' + time + '.json'
+        st.download_button(
+            st.session_state.general_text_dict['new_file'], tmp(df1, df2), name)
    except Exception as e:
        st.write("Error : one of the file isn't valid")


--- a/Streamlit/pages/PDF_to_TSV.py
+++ b/Streamlit/pages/PDF_to_TSV.py
@@ -97,9 +97,9 @@ def detectMultiplePdfLanguages():
    languages = []
    for l in st.session_state.pdfLanguages.values():
        if l not in languages and len(languages) == 1:
-            st.error(st.session_state.general_text_dict['globalWarning'])
-            st.error(str(st.session_state.pdfLanguages))
-            st.error(st.session_state.general_text_dict['advice'])
+            st.info(st.session_state.general_text_dict['globalWarning'])
+            st.info(str(st.session_state.pdfLanguages))
+            st.info(st.session_state.general_text_dict['advice'])
            return
        if len(languages) == 0:
            languages.append(l)
@@ -290,7 +290,7 @@ if st.session_state.page == 3:
                        "/PDFCompilation", 'zip', st.session_state.pdfDir.name)
    with open(st.session_state.zipDir.name + "/PDFCompilation.zip", 'rb') as zip:
        if st.session_state.warning != "":
-            st.error(st.session_state.warning)
+            st.info(st.session_state.warning)
        detectMultiplePdfLanguages()
        st.write(st.session_state.general_text_dict['new_file'])
        st.download_button("PDFCompilation.zip",

--- a/Streamlit/pages/TSV_Translator.py
+++ b/Streamlit/pages/TSV_Translator.py
@@ -114,12 +114,14 @@ def getContent(file, data, total, separator):
    reader = csv.DictReader(codecs.iterdecode(
        file, 'utf-8'), delimiter=separator)
    count = 1
-    bar = st.progress(0, "Translation progress : 0%")
+    bar = st.progress(0,
+                      st.session_state.general_text_dict['loading'] + "0%")
    for row in reader:
        tmp = ""
        first = True
        loading = int(count / total * 100)
-        bar.progress(loading, "Translation progress : " + str(loading) + "%")
+        bar.progress(loading,
+                     st.session_state.general_text_dict['loading'] + str(loading) + "%")
        for name, value in row.items():
            if not first:
                tmp += "\t"
@@ -207,9 +209,15 @@ def askTranslateLanguages(file):
 if st.session_state.page == 0:
    if st.session_state.detect:
        if st.session_state.file != None:
-            st.session_state.separator = getSeparator(st.session_state.file)
+            with st.spinner(st.session_state.general_text_dict['loadingLanguages']):
+                st.session_state.separator = getSeparator(
+                    st.session_state.file)
                st.session_state.languages = inspectLanguages(
                    st.session_state.file)
+            if len(st.session_state.languages) == 1:
+                st.session_state.page = 3
+                st.session_state.detect = False
+            else:
                st.session_state.page = 1
                st.session_state.detect = False
                st.session_state.tmpFile = st.session_state.file
@@ -220,7 +228,7 @@ if st.session_state.page == 0:


 if st.session_state.page == 1:
-    if st.session_state.submit:
+    if st.session_state.submit and st.session_state.srcLang != st.session_state.destLang:
        st.session_state.page = 2
        st.session_state.submit = False
    else:
@@ -232,3 +240,10 @@ if st.session_state.page == 2:
    name = st.session_state.tmpFile.name
    st.download_button(name,
                       tsv, name, on_click=resetPage())
+
+if st.session_state.page == 3:
+    st.write(
+        st.session_state.general_text_dict['sameLanguages'] + list(st.session_state.languages.keys())[0])
+    st.session_state.languages = {}
+    st.button(
+        st.session_state.general_text_dict['anotherFile'], on_click=resetPage())
--- a/Streamlit/pages/TXT_to_TSV.py
+++ b/Streamlit/pages/TXT_to_TSV.py
@@ -197,5 +197,5 @@ if st.session_state.page == 1:
    st.write(st.session_state.general_text_dict['new_file'])
    st.session_state.submit = False
    if st.session_state.warning != "":
-        st.error(st.session_state.warning)
+        st.info(st.session_state.warning)
    st.download_button(name, txt, name, on_click=setPage())
--- a/Streamlit/pages/YTB_to_TSV.py
+++ b/Streamlit/pages/YTB_to_TSV.py
@@ -28,7 +28,6 @@ def ytbSearch(search, n):
    result = videosSearch.result()["result"]
    videos = []
    while len(videos) < n:
-        print(len(videos))
        for video in result:
            id = video["id"]
            title = video["title"]
@@ -168,17 +167,19 @@ def correctTranscript(transcript):

 def transcriptToTsv(search, nbVideos):
    tsv = "authors\tsource\tpublication_year\tpublication_month\tpublication_day\ttitle\tabstract\n"
+    dict = st.session_state.general_text_dict
+    with st.spinner(dict['loadingID']):
        if st.session_state.manualOnly:
            videos = ytbSearch(search, nbVideos * 20)
        else:
            videos = ytbSearch(search, nbVideos * 4)
    count = 0
    countManual = 0
-    bar = st.progress(count / nbVideos, "Search videos : " +
-                      str(count) + " out of " + str(nbVideos))
+    bar = st.progress(count / nbVideos, dict['loading'] +
+                      str(count) + dict['quantity'] + str(nbVideos))
    for video in videos:
-        bar.progress(count / nbVideos, "Search videos : " +
-                     str(count) + " out of " + str(nbVideos))
+        bar.progress(count / nbVideos, dict['loading'] +
+                     str(count) + dict['quantity'] + str(nbVideos))
        if count == nbVideos:
            break
        id, author, title = video[0], video[1], video[2]
@@ -237,6 +238,7 @@ def askVideos():


 if st.session_state.page == 0:
+    st.write(st.session_state.general_text_dict['text'])
    if st.session_state.submit:
        st.session_state.submit = False
        if st.session_state.keywords != "":

--- a/Streamlit/pages/Zotero_To_GarganText.py
+++ b/Streamlit/pages/Zotero_To_GarganText.py
@@ -29,9 +29,9 @@ def getAllItems(id):
        return 0


-
 def loadApiItems(id, page):
-    url = 'https://api.zotero.org/users/'+ str(id) +'/items/top?limit=' + str(limit) + '&start=' + str(page * limit) + '&direction=asc&sort=title'
+    url = 'https://api.zotero.org/users/' + str(id) + '/items/top?limit=' + str(
+        limit) + '&start=' + str(page * limit) + '&direction=asc&sort=title'
    resp = req.get(url)
    if resp.ok:
        jsontxt = json.loads(resp.content)
@@ -40,10 +40,9 @@ def loadApiItems(id, page):
        return 0, 0


-
 def loadApiCollections(id):

-    url = 'https://api.zotero.org/users/'+ str(id) +'/collections'
+    url = 'https://api.zotero.org/users/' + str(id) + '/collections'
    resp = req.get(url)
    if resp.ok:
        jsontxt = json.loads(resp.content)
@@ -51,9 +50,11 @@ def loadApiCollections(id):
    else:
        return 0

+
 def loadApiItemsByCollections(id, key):

-    url = 'https://api.zotero.org/users/'+ str(id) +'/collections/' + str(key) + '/items/top'
+    url = 'https://api.zotero.org/users/' + \
+        str(id) + '/collections/' + str(key) + '/items/top'
    resp = req.get(url)
    if resp.ok:
        jsontxt = json.loads(resp.content)
@@ -62,7 +63,6 @@ def loadApiItemsByCollections(id, key):
        return 0


-
 def getAllCollections(docs):
    output = {}
    for doc in docs:
@@ -104,15 +104,17 @@ def getParamFromDoc(doc):
    else:
        pdate = str(date.today().year) + '\t1\t1'

-    abstract = abstract.encode(encoding = 'UTF-8', errors = 'ignore').decode("utf-8").replace('\t', '').replace('"', '').replace('\n', '')
-    title = title.encode(encoding = 'UTF-8', errors = 'ignore').decode("utf-8").replace('\t', '').replace('"', '').replace('\n', '')
-    source = source.encode(encoding = 'UTF-8', errors = 'ignore').decode("utf-8").replace('\t', '').replace('"', '').replace('\n', '').replace('\n', '')
+    abstract = abstract.encode(encoding='UTF-8', errors='ignore').decode(
+        "utf-8").replace('\t', '').replace('"', '').replace('\n', '')
+    title = title.encode(encoding='UTF-8', errors='ignore').decode(
+        "utf-8").replace('\t', '').replace('"', '').replace('\n', '')
+    source = source.encode(encoding='UTF-8', errors='ignore').decode(
+        "utf-8").replace('\t', '').replace('"', '').replace('\n', '').replace('\n', '')

    # Output
    return str(title) + "\t" + source + "\t" + str(pdate) + "\t" + abstract + "\t" + authors + "\t" + str(1) + "\n"


-
 def createTSV(docs):
    # Output text
    output = "title\tsource\tpublication_year\tpublication_month\tpublication_day\tabstract\tauthors\tweight\n"
@@ -123,6 +125,7 @@ def createTSV(docs):

    return output

+
 def createTSVfromCollections():
    # Output text
    output = "title\tsource\tpublication_year\tpublication_month\tpublication_day\tabstract\tauthors\tweight\n"
@@ -141,6 +144,7 @@ if 'stage' not in st.session_state:
    st.session_state.id = ''
    st.session_state.format = 'items'

+
 def set_stage(stage):
    st.session_state.stage = stage
    st.session_state.id = st.session_state.idForm
@@ -152,11 +156,14 @@ def set_stage(stage):
        st.session_state.docsByKey = {}
        st.session_state.select = False

+
 def set_stage_collections(stage):
    st.session_state.stage = stage
    st.session_state.collectionsKey = []
    for elem in st.session_state.collectionsForm:
-        st.session_state.collectionsKey.append(st.session_state.collections[elem])
+        st.session_state.collectionsKey.append(
+            st.session_state.collections[elem])
+

 def set_stage_items(stage):
    st.session_state.stage = stage
@@ -166,13 +173,17 @@ def set_stage_items(stage):
        if elem[1]:
            st.session_state.result.append(elem[0])

+
 def set_stage_minus():
    st.session_state.stage -= 1

+
 def saveKey():
    for elem in st.session_state.keys():
        if 'itemskey-' in elem:
-            st.session_state.docsByKey[elem.split('-')[1]][1] = st.session_state[elem]
+            st.session_state.docsByKey[elem.split(
+                '-')[1]][1] = st.session_state[elem]
+

 def fill_docs():
    st.session_state.docs = []
@@ -180,10 +191,12 @@ def fill_docs():
        if st.session_state.zotero_search in elem[0]['data']['title']:
            st.session_state.docs.append(elem[0])

+
 def selectAll():
    st.session_state.select = not st.session_state.select
    for elem in st.session_state.docs:
-        st.session_state.docsByKey[elem['data']['key']] = [elem, st.session_state.select]
+        st.session_state.docsByKey[elem['data']['key']] = [
+            elem, st.session_state.select]


 def clear_docs():
@@ -208,24 +221,22 @@ def switch_page(value):
    fill_docs()


-
-
 # Begin page
 if st.session_state.stage == 0:

    # Form
    form = st.form('api')
-    form.write(st.session_state.general_text_dict['title'])
-
    lst = ['items', 'collections']
-    st.session_state.id = form.text_input('ID',st.session_state.id, key='idForm', help=st.session_state.general_text_dict['help'])
-    st.session_state.format = form.selectbox(st.session_state.general_text_dict['data'], lst,lst.index(st.session_state.format), key='formatForm')
-
-    form.form_submit_button(st.session_state.general_text_dict['submit'], on_click=set_stage, args=(1,))
+    st.session_state.id = form.text_input(
+        'ID', st.session_state.id, key='idForm', help=st.session_state.general_text_dict['help'])
+    st.session_state.format = form.selectbox(
+        st.session_state.general_text_dict['data'], lst, lst.index(st.session_state.format), key='formatForm')

+    form.form_submit_button(
+        st.session_state.general_text_dict['submit'], on_click=set_stage, args=(1,))


-#page for select items
+# page for select items
 if st.session_state.stage == 1 and st.session_state.format == 'items':
    if (st.session_state.docsByKey == {}):
        st.session_state.docs = getAllItems(st.session_state.id)
@@ -234,36 +245,41 @@ if st.session_state.stage == 1 and st.session_state.format == 'items':
                st.session_state.docsByKey[doc['data']['key']] = [doc, False]
            clear_docs()

-
    if st.session_state.docs == 0:
        st.write(st.session_state.general_text_dict['denied'])
    else:
        st.write(st.session_state.general_text_dict['add_doc'])
-        st.checkbox(st.session_state.general_text_dict['select_all'],st.session_state.select, on_change=selectAll)
-        st.text_input(st.session_state.general_text_dict['search'], key='zotero_search', on_change=clear_docs)
+        st.checkbox(
+            st.session_state.general_text_dict['select_all'], st.session_state.select, on_change=selectAll)
+        st.text_input(
+            st.session_state.general_text_dict['search'], key='zotero_search', on_change=clear_docs)
        min = st.session_state.page * sizepage
        max = st.session_state.page * sizepage + sizepage
        for doc in st.session_state.docs[min:max]:
-            st.checkbox(doc['data']['title'],st.session_state.docsByKey[doc['data']['key']][1], key='itemskey-'+doc['data']['key'])
+            st.checkbox(doc['data']['title'], st.session_state.docsByKey[doc['data']
+                        ['key']][1], key='itemskey-'+doc['data']['key'])
        col1, col2 = st.columns(2)
        with col1:
-            st.button(st.session_state.general_text_dict['submit'], on_click=set_stage_items, args=(2,))
+            st.button(
+                st.session_state.general_text_dict['submit'], on_click=set_stage_items, args=(2,))
        with col2:
            nb = int(st.session_state.nbdoc)//sizepage
-            if int(st.session_state.nbdoc)%sizepage == 0: # Fix the problem where the page end on the last file but there is still a page left
+            # Fix the problem where the page end on the last file but there is still a page left
+            if int(st.session_state.nbdoc) % sizepage == 0:
                nb -= 1
            if nb != 0:
                col3, col4 = st.columns(2)
                with col3:
                    if st.session_state.page != 0:
-                        st.button(st.session_state.general_text_dict['p_page'], on_click=switch_page, args=('down',))
+                        st.button(
+                            st.session_state.general_text_dict['p_page'], on_click=switch_page, args=('down',))
                with col4:
                    if st.session_state.page != nb:
-                        st.button(st.session_state.general_text_dict['n_page'], on_click=switch_page, args=('up',))
+                        st.button(
+                            st.session_state.general_text_dict['n_page'], on_click=switch_page, args=('up',))


-
-#page for select collections
+# page for select collections
 if st.session_state.stage == 1 and st.session_state.format == 'collections':
    docs = loadApiCollections(st.session_state.id)

@@ -274,22 +290,26 @@ if st.session_state.stage == 1 and st.session_state.format == 'collections':
        st.session_state.collections = collections
        form = st.form('collection')
        form.write(st.session_state.general_text_dict['add_collect'])
-        form.multiselect(st.session_state.general_text_dict['chose_collect'], collections.keys(), key='collectionsForm')
-        form.form_submit_button(st.session_state.general_text_dict['submit'], on_click=set_stage_collections, args=(2,))
+        form.multiselect(st.session_state.general_text_dict['chose_collect'], collections.keys(
+        ), key='collectionsForm')
+        form.form_submit_button(
+            st.session_state.general_text_dict['submit'], on_click=set_stage_collections, args=(2,))

-#page for TSV items
+# page for TSV items
 if st.session_state.stage == 2 and st.session_state.format == 'items':
-    st.write(st.session_state.general_text_dict['fileTSV1'] + str(len(st.session_state.result)) + st.session_state.general_text_dict['fileTSV2'])
+    st.write(st.session_state.general_text_dict['fileTSV1'] + str(
+        len(st.session_state.result)) + st.session_state.general_text_dict['fileTSV2'])
    output = createTSV(st.session_state.result)
    st.download_button('Download TSV', output, 'output.csv')

-#page for TSV collections
+# page for TSV collections
 if st.session_state.stage == 2 and st.session_state.format == 'collections':
    output = createTSVfromCollections()
-    st.write(st.session_state.general_text_dict['fileTSV1'] + str(len(output.split('\n'))-2) + st.session_state.general_text_dict['fileTSV2'])
+    st.write(st.session_state.general_text_dict['fileTSV1'] + str(
+        len(output.split('\n'))-2) + st.session_state.general_text_dict['fileTSV2'])
    st.download_button('Download TSV', output, 'output.csv')


 if st.session_state.stage > 0:
-    st.button(st.session_state.general_text_dict['back'], on_click=set_stage_minus)
-
+    st.button(
+        st.session_state.general_text_dict['back'], on_click=set_stage_minus)