Commit 65c88912 authored by Loïc Chapron's avatar Loïc Chapron

Fix Bug and stuff

parent 41e1220d
import streamlit as st
import pandas as pd
st.set_page_config(
page_title="Hello"
)
st.write("# Welcome to ")
def load_bundle(lang):
df = pd.read_csv("lang/text_Welcome.csv")
df = df.query(f"locale == '{lang}'")
tmp = {}
for i in range(len(df)):
tmp[df.key.to_list()[i]] = df.value.to_list()[i]
return tmp
def update_lang():
print(st.session_state.general_language)
st.session_state.general_text_dict = load_bundle(st.session_state.general_lang_dict[st.session_state.general_language])
if 'general_session_page' not in st.session_state.keys():
st.session_state.general_lang_dict = {'Français' : 'fr', 'English': 'en'}
st.session_state.general_text_dict = load_bundle('fr')
st.session_state.general_language = 'Français'
st.session_state.general_session_page = 'Welcome'
elif st.session_state.general_session_page != 'Welcome':
st.session_state.general_text_dict = load_bundle(st.session_state.general_lang_dict[st.session_state.general_language])
st.session_state.general_session_page = 'Welcome'
st.selectbox('Langue', list(st.session_state.general_lang_dict.keys()), list(st.session_state.general_lang_dict.keys()).index(st.session_state.general_language),key='general_language', on_change=update_lang)
st.write("# Welcome to GanganText Tools")
st.write(st.session_state.general_text_dict['welcome'])
st.write(st.session_state.general_text_dict['tools'])
st.write(st.session_state.general_text_dict['code'])
st.write(st.session_state.general_text_dict['help'])
locale,key,value
fr,title,"**Isidore vers GarganText**"
en,title,"**Isidore To GarganText**"
fr,keyword,"Mots clés"
en,keyword,"Key word"
fr,lang,"Langue"
en,lang,"Language"
fr,submit,"Soumettre"
en,submit,"Submit"
fr,load_api,"Chargement de l'api..."
en,load_api,"Loading API..."
fr,overload_api,"L'API est surchargé, relancer la requête dans quelques secondes"
en,overload'api,"The API is overloaded, please retry the request in a few seconds"
fr,nb_doc,"Nombres de documents : "
en,nb_doc,"Numbers of documents : "
fr,perform1,"Pour des raisons de performence, on limit à "
fr,perform2," le nombre de document maximum"
en,perform1,"For performance reasons, we limit to "
en,perform2," the maximum number of documents"
fr,nb_taken,"Nombres de documents à prendre"
en,nb_taken,"Number of documents to take"
fr,createTSV,"Création du fichier TSV (Cela peut prendre quelque minutes)"
en,createTSV,"Creation of the TSV file (It may take a while)"
fr,doc_abstract1,"Il y a "
fr,doc_abstract2," documents qui peuvent ne pas avoir de description."
en,doc_abstract1,"There are "
en,doc_abstract2," documents who may not have an abstract"
\ No newline at end of file
locale,key,value
fr,title,"Fusionne Deux Liste de Terme de GarganText"
en,title,"Input Two Term File From GarganText"
fr,file,"Choisir un fichier"
en,file,"Choose a file"
fr,new_file,"Télécharge ton fichier fusionner:"
en,new_file,"Download your merge file:"
\ No newline at end of file
locale,key,value
fr,welcome,"Bienvenue sur ces pages rassemblant des outils développés par des utilisateurs de GarganText pour des utilisateurs de GarganText."
en,welcome,"Welcome to these pages featuring tools developed by GarganText’ users for GarganText’ users."
fr,tools,"Les outils proposés ici ne demandent pas de ressources de calcul mais permettent de transformer des données pour faciliter une création de corpus dans un format adapté à GarganText ou au contraire d’exploiter des fichiers traités préalablement dans GarganText."
en,tools,"The tools offered here do not require computational resources, but can be used to transform data to facilitate the creation of a corpus in a format suitable for GarganText, or to exploit files previously processed in GarganText."
fr,code,"Le code de ces pages, ainsi que des outils utilisés ici sont disponibles en « open source » dans un [Gitlab dédié hébergé par l’ISC-PIF](https://gitlab.iscpif.fr/athomas/gargantexternal-tools)."
en,code,"The code for these pages, as well as the tools used here, are available as open source in a dedicated [Gitlab hosted by ISC-PIF](https://gitlab.iscpif.fr/athomas/gargantexternal-tools)."
fr,help,"N’hésitez pas à contribuer, à permettre à ces outils de s’enrichir et d’offrir de nouvelles possibilités."
en,help,"Don't hesitate to contribute, to help these tools grow and offer new possibilities."
\ No newline at end of file
locale,key,value
fr,title,"**Zotero vers GarganText**"
en,title,"**Zotero vers GarganText**"
fr,data,"Type de donnée"
en,data,"Type of data"
fr,submit,"Suivant"
en,submit,"Submit"
fr,denied,"L'acèss au compte n'est pas publique, pour la mettre publique: https://www.zotero.org/settings/privacy"
en,denied,"Account access is not public, to make it public: https://www.zotero.org/settings/privacy"
fr,add_doc,"*Ajouter les documents que vous voulez mettre dans le TSV*"
en,add_doc,"*Add the document that tou want in the TSV*"
fr,select_all,"Select All"
en,select_all,"Select All"
fr,search,"Recherche"
en,search,"Search"
fr,p_page,"Page Précédente"
en,p_page,"Previous Page"
fr,n_page,"Page Suivante"
en,n_page,"Next Page"
fr,add_collect,"**Selectionner une collection** vous pouvez en choisir plusieurs"
en,add_collect,"**Chose a collection** you can choose multiple one"
fr,chose_collect,"Choisie une collection"
en,chose_collect,"Chose a collection"
fr,fileTSV1,"Le TSV contient "
fr,fileTSV2," documents"
en,fileTSV1,"The TSV file got "
en,fileTSV2," documents"
fr,back,"Retour"
en,back,"Back"
......@@ -3,16 +3,66 @@ Streamlit Application
Loïc Chapron
"""
#streamlit run test.py
#streamlit run Isidore_To_GarganText.py
import streamlit as st
import requests as req
import json
from datetime import date
import pandas as pd
import time
keys = ['search','language','stage_isidore','output','nb_doc','nb_wanted', 'nb_bad_file']
def loadApiIsidore(search, language):
url = 'https://api.isidore.science/resource/search?q=' + search + '&output=json&replies=1000&language=http://lexvo.org/id/iso639-3/' + language
for key in st.session_state.keys():
if key not in keys and 'general_' not in key:
del st.session_state[key]
def load_bundle(lang):
df = pd.read_csv("lang/text_IsidoreToGarganText.csv")
df = df.query(f"locale == '{lang}'")
tmp = {}
for i in range(len(df)):
tmp[df.key.to_list()[i]] = df.value.to_list()[i]
return tmp
def update_lang():
st.session_state.general_text_dict = load_bundle(st.session_state.general_lang_dict[st.session_state.general_language])
if 'general_session_page' not in st.session_state.keys():
st.session_state.general_lang_dict = {'Français' : 'fr', 'English': 'en'}
st.session_state.general_text_dict = load_bundle('fr')
st.session_state.general_language = 'Français'
st.session_state.general_session_page = 'IsidoreToGarganText'
elif st.session_state.general_session_page != 'IsidoreToGarganText':
st.session_state.general_text_dict = load_bundle(st.session_state.general_lang_dict[st.session_state.general_language])
st.session_state.general_session_page = 'IsidoreToGarganText'
st.selectbox('Langue', list(st.session_state.general_lang_dict.keys()), list(st.session_state.general_lang_dict.keys()).index(st.session_state.general_language),key='general_language', on_change=update_lang)
numberReplies = 500 # Dont' exceed 1 000
limitItems = 5000 # Can't be superior of 10 times numberReplies
retryTime = 2
def loadApiIsidoreNumberFile(search, language):
url = 'https://api.isidore.science/resource/search?q=' + search + '&output=json&replies=10&language=http://lexvo.org/id/iso639-3/' + language
resp = req.get(url)
print(url)
if resp.ok:
jsontxt = json.loads(resp.content)
docs = jsontxt["response"]["replies"]["meta"]["@items"]
else:
docs = 0
return docs
def loadApiIsidorePage(search, language, page):
url = 'https://api.isidore.science/resource/search?q=' + search + '&output=json&replies=' + str(numberReplies) + '&page=' + str(page) + '&language=http://lexvo.org/id/iso639-3/' + language
resp = req.get(url)
print(url)
try:
......@@ -24,13 +74,34 @@ def loadApiIsidore(search, language):
return docs
def createFile(docs):
def create_output(search, language, nb_doc):
output = "title\tsource\tpublication_year\tpublication_month\tpublication_day\tabstract\tauthors\tweight\n"
nb = 0
for i in range(1, nb_doc//numberReplies + 1):
while(True):
txt = loadApiIsidorePage(search, language, i)
if txt != 0:
break
time.sleep(retryTime)
print('Retry')
output += createFile(txt, numberReplies, language)
if nb_doc%numberReplies != 0:
txt = loadApiIsidorePage(search, language, nb_doc//numberReplies + 1)
tmp, nb_tmp = createFile(txt, nb_doc%numberReplies, language)
output += tmp
nb += nb_tmp
return output, nb
def createFile(docs, limit, language):
# Output text
output = "title\tsource\tpublication_year\tpublication_month\tpublication_day\tabstract\tauthors\tweight\n"
output = ''
i = 0
nb = 0
for doc in docs:
if (i == limit):
break
i+=1
# Title
title = doc["isidore"]["title"]
if (type(title) != str):
......@@ -86,6 +157,14 @@ def createFile(docs):
abstract = tmp
else :
abstract = abstract['$']
if 'types' in doc['isidore'].keys():
if type(doc['isidore']['types']['type'] == str) and doc['isidore']['types']['type'] in ['Books', 'text']:
nb += 1
elif type(doc['isidore']['types']['type'] == dict) and doc['isidore']['types']['type'][1] in ['Books', 'text']:
nb += 1
else :
print(title)
# Publication Date
pdate = getGoodTime(doc["isidore"]["date"]["@origin"])
......@@ -98,9 +177,11 @@ def createFile(docs):
row = str(title) + "\t" + source + "\t" + str(pdate) + "\t" + abstract + "\t" + authors + "\t" + str(1) + "\n"
output += row
return output
return output, nb
# Need modification when Isidore upgrade his api
def getGoodTime(time):
time = time.replace('?', '0').replace('.', '0')
tiret = time.replace('[', '').replace(']', '').split('-')
......@@ -122,7 +203,7 @@ def getGoodTime(time):
return slash[2] + '\t' + slash[1] + '\t' + slash[0].split('T')[0]
elif len(time) == 8:
return time[:4] + '\t' + time[4:6] + '\t' + time[6:8]
return str(date.today().year) + '\t1\t1'
return '1900\t1\t1'
###Streamlit
......@@ -134,16 +215,20 @@ lang = {
'Espagnol' : 'spa',
'Italien' : 'ita',
'Allemand' : 'deu',
'Polonais' : 'nld',
'Portugais' : 'por',
'Russe' : 'rus'
}
if 'stage' not in st.session_state:
st.session_state.stage = 0
# Can be added but low result
#'Polonais' : 'nld',
#'Portugais' : 'por',
#'Russe' : 'rus'
if 'stage_isidore' not in st.session_state:
st.session_state.stage_isidore = 0
st.session_state.nb_wanted = 1
def set_stage(stage):
st.session_state.stage = stage
st.session_state.stage_isidore = stage
st.session_state.output = ''
# Image
......@@ -151,46 +236,56 @@ col1, col2 = st.columns(2)
with col1:
st.image('img/isidore_logo.png')
with col2:
st.image('img/gargantext_logo.png')
st.image('img/gargantext_logo.jpg')
# Form
form = st.form('api')
form.write('**Isidore vers GarganText**')
form.write(st.session_state.general_text_dict['title'])
search = form.text_input('Mots clés')
language = form.selectbox('Langue', lang.keys())
search = form.text_input(st.session_state.general_text_dict['keyword'])
language = form.selectbox(st.session_state.general_text_dict['lang'], lang.keys())
form.form_submit_button('Submit', on_click=set_stage, args=(1,))
form.form_submit_button(st.session_state.general_text_dict['submit'], on_click=set_stage, args=(1,))
# API and Slider
if st.session_state.stage > 0:
if st.session_state.stage_isidore > 0:
# Only call first time and after
if 'search' not in st.session_state or 'language' not in st.session_state or search != st.session_state.search or language != st.session_state.language:
with st.spinner("Chargement de l'api.."):
docs = loadApiIsidore(search, lang[language])
st.session_state.txt = docs
if docs != 0:
with st.spinner(st.session_state.general_text_dict['load_api'] ):
nb_doc = int(loadApiIsidoreNumberFile(search, lang[language]))
st.session_state.nb_doc = nb_doc
if nb_doc != 0:
st.session_state.search = search
st.session_state.language = language
if st.session_state.txt != 0:
nb_doc = len(st.session_state.txt)
if st.session_state.nb_doc != 0:
# Form with slider
form2 = st.form('my_form2')
form2.write('Nombres de documents : ' + str(nb_doc))
nb_wanted = form2.slider('Nombres de documents à prendre', 1, nb_doc, key="slider")
form2.write(st.session_state.general_text_dict['nb_doc'] + str(st.session_state.nb_doc))
form2.form_submit_button('Submit', on_click=set_stage, args=(2,))
if st.session_state.nb_doc > limitItems:
form2.write(st.session_state.general_text_dict['perform1'] + str(limitItems) + st.session_state.general_text_dict['perform2'])
st.session_state.nb_wanted = form2.slider(st.session_state.general_text_dict['nb_taken'], 1, limitItems)
else:
st.session_state.nb_wanted = form2.slider(st.session_state.general_text_dict['nb_taken'], 1, int(st.session_state.nb_doc))
form2.form_submit_button(st.session_state.general_text_dict['submit'], on_click=set_stage, args=(2,))
else:
st.write("L'API est surchargé, relancer la requête dans quelques secondes")
st.write(st.session_state.general_text_dict['overload_api'] )
# Download
if st.session_state.stage > 1:
output = createFile(st.session_state.txt[:st.session_state.slider])
st.download_button('Download TSV', output, 'output.csv')
if st.session_state.stage_isidore > 1:
with st.spinner(st.session_state.general_text_dict['createTSV'] ):
if st.session_state.output == '':
print(st.session_state.nb_wanted)
st.session_state.output, st.session_state.nb_bad_file = create_output(st.session_state.search, lang[st.session_state.language], st.session_state.nb_wanted)
st.write(st.session_state.general_text_dict['doc_abstract1'] + str(st.session_state.nb_bad_file) + st.session_state.general_text_dict['doc_abstract2'])
st.download_button('Download TSV', st.session_state.output, 'output.csv')
import sys
import json
import streamlit as st
from datetime import datetime
import pandas as pd
for key in st.session_state.keys():
if 'general_' not in key:
del st.session_state[key]
def load_bundle(lang):
df = pd.read_csv("lang/text_MergeTermGarganText.csv")
df = df.query(f"locale == '{lang}'")
tmp = {}
for i in range(len(df)):
tmp[df.key.to_list()[i]] = df.value.to_list()[i]
return tmp
def update_lang():
st.session_state.general_text_dict = load_bundle(st.session_state.general_lang_dict[st.session_state.general_language])
if 'general_session_page' not in st.session_state.keys():
st.session_state.general_lang_dict = {'Français' : 'fr', 'English': 'en'}
st.session_state.general_text_dict = load_bundle('fr')
st.session_state.general_language = 'Français'
st.session_state.general_session_page = 'MergeTermGarganText'
elif st.session_state.general_session_page != 'MergeTermGarganText':
st.session_state.general_text_dict = load_bundle(st.session_state.general_lang_dict[st.session_state.general_language])
st.session_state.general_session_page = 'MergeTermGarganText'
st.selectbox('Langue', list(st.session_state.general_lang_dict.keys()), list(st.session_state.general_lang_dict.keys()).index(st.session_state.general_language),key='general_language', on_change=update_lang)
def tmp(file1, file2):
......@@ -62,14 +93,14 @@ def tmp(file1, file2):
return json.dumps(listJson1, sort_keys=False, indent=4)
st.subheader('Input Two Term File From GarganText')
st.subheader(st.session_state.general_text_dict['title'])
col1, col2 = st.columns(2)
with col1:
file1 = st.file_uploader("Choose a file", key='file1')
file1 = st.file_uploader(st.session_state.general_text_dict['file'],'json',key='file1')
with col2:
file2 = st.file_uploader("Choose a file", key='file2')
file2 = st.file_uploader(st.session_state.general_text_dict['file'],'json',key='file2')
if (file1 and file2):
st.write('You can download your merge file:')
st.write(st.session_state.general_text_dict['new_file'])
time = datetime.strftime(datetime.now(),"%d-%m-%Y/%H:%M:%S")
name = 'output-' +time+ '.json'
st.download_button('Download File', tmp(file1, file2), name)
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment