Commit 333eb348 authored by Loïc Chapron's avatar Loïc Chapron

add page from convertion

parent 4463cc66
...@@ -66,7 +66,7 @@ with open(path, 'r') as corpus : ...@@ -66,7 +66,7 @@ with open(path, 'r') as corpus :
day = tmp[2] day = tmp[2]
else: else:
if doc.__contains__('year'): if doc.__contains__('year'):
year = doc['year'] year = doc['year'].replace('/','').replace('.','')
else: else:
year = str(date.today().year) year = str(date.today().year)
......
...@@ -3,7 +3,7 @@ fr,title,"# Json Vers TSV" ...@@ -3,7 +3,7 @@ fr,title,"# Json Vers TSV"
en,title,"# Json To TSV" en,title,"# Json To TSV"
fr,text,"Transforme un corpus Json venant de Gargantext en TSV pour GarganText" fr,text,"Transforme un corpus Json venant de Gargantext en TSV pour GarganText"
en,text,"Transform a Json corpus fron GarganText to a TSV file for GarganText" en,text,"Transform a Json corpus from GarganText to a TSV file for GarganText"
fr,file,"Choisir un fichier" fr,file,"Choisir un fichier"
en,file,"Choose a file" en,file,"Choose a file"
......
locale,key,value
fr,title,"# Pubmed Vers GarganText"
en,title,"# Pubmed To GarganText"
fr,text,"Transforme un corpus pubmed en TSV pour GarganText"
en,text,"Transform a pubmed corpus to a TSV file for GarganText"
fr,file,"Choisir un fichier"
en,file,"Choose a file"
fr,new_file,"Télécharge ton fichier TSV :"
en,new_file,"Download your TSV file:"
fr,error,"Erreur : le fichier n'est pas valide"
en,error,"Error : the file isn't valid"
\ No newline at end of file
locale,key,value
fr,title,"# Ris Vers GarganText"
en,title,"# Ris To GarganText"
fr,text,"Transforme un corpus ris en TSV pour GarganText"
en,text,"Transform a ris corpus to a TSV file for GarganText"
fr,file,"Choisir un fichier"
en,file,"Choose a file"
fr,new_file,"Télécharge ton fichier TSV :"
en,new_file,"Download your TSV file:"
fr,error,"Erreur : le fichier n'est pas valide"
en,error,"Error : the file isn't valid"
\ No newline at end of file
"""
Streamlit Application
Loïc Chapron
"""
import streamlit as st
import pandas as pd
import nbib
import re
import calendar
st.image('img/gargantool_banner.jpg')
st.markdown("""
<style>
.block-container {
padding-top: 2rem;
padding-bottom: 0rem;
padding-left: 1rem;
padding-right: 1rem;
}
</style>
""", unsafe_allow_html=True)
def load_bundle(lang):
df = pd.read_csv("lang/text_PubMedToGarganText.csv")
df = df.query(f"locale == '{lang}'")
tmp = {}
for i in range(len(df)):
tmp[df.key.to_list()[i]] = df.value.to_list()[i]
return tmp
def update_lang():
st.session_state.general_text_dict = load_bundle(st.session_state.general_lang_dict[st.session_state.general_language])
if 'general_session_page' not in st.session_state.keys():
st.session_state.general_lang_dict = {'Français' : 'fr', 'English': 'en'}
st.session_state.general_text_dict = load_bundle('fr')
st.session_state.general_language = 'Français'
st.session_state.general_session_page = 'PubMedToGarganText'
elif st.session_state.general_session_page != 'PubMedToGarganText':
st.session_state.general_text_dict = load_bundle(st.session_state.general_lang_dict[st.session_state.general_language])
st.session_state.general_session_page = 'PubMedToGarganText'
for key in st.session_state.keys():
if 'general_' not in key:
del st.session_state[key]
st.selectbox('Langue', list(st.session_state.general_lang_dict.keys()), list(st.session_state.general_lang_dict.keys()).index(st.session_state.general_language),key='general_language', on_change=update_lang)
def read_file(file):
docs = nbib.read(file.read().decode('utf-8'))
output = "title\tsource\tpublication_year\tpublication_month\tpublication_day\tabstract\tauthors\tweight\n"
for doc in docs :
keys = doc.keys()
if len(list(set(['title','publication_date','authors']) & set(keys))) < 3 :
continue
if 'journal' in keys :
source = doc['journal']
else :
source = ""
if 'abstract' in keys :
abstract = doc['abstract']
else :
abstract = ""
title = doc['title']
date = doc['publication_date'].split(' ')
year = date[0]
if len(date) > 1:
try:
month = list(calendar.month_abbr).index(date[1])
except Exception as e:
month = '1'
else:
month = '1'
if len(date) > 2:
day = date[2]
else:
day = '1'
abstract = re.sub('\"', "", abstract).replace("\t", "")
title = re.sub('\"', "", title).replace("\t", "")
authors_lst = []
for author in doc['authors'] :
authors_lst.append((author['author']).replace(',',''))
authors = ','.join(authors_lst)
row = str(title) + "\t" + "scopus" + "\t" + year + "\t" + str(month) + "\t" + str(day) + "\t" + abstract + "\t" + authors + "\t" + str(1) + "\n"
output += row
return output
st.write(st.session_state.general_text_dict['title'])
st.write(st.session_state.general_text_dict['text'])
file = st.file_uploader(st.session_state.general_text_dict['file'],type=["txt"],key='file')
if file:
try:
name = file.name.split('.')[0] + '.csv'
st.write(st.session_state.general_text_dict['new_file'])
st.download_button(name, read_file(file), name)
except Exception as e:
st.write(st.session_state.general_text_dict['error'])
print(e)
file.close()
"""
Streamlit Application
Loïc Chapron
"""
import streamlit as st
import pandas as pd
import rispy
from datetime import date
st.image('img/gargantool_banner.jpg')
st.markdown("""
<style>
.block-container {
padding-top: 2rem;
padding-bottom: 0rem;
padding-left: 1rem;
padding-right: 1rem;
}
</style>
""", unsafe_allow_html=True)
def load_bundle(lang):
df = pd.read_csv("lang/text_RisToGarganText.csv")
df = df.query(f"locale == '{lang}'")
tmp = {}
for i in range(len(df)):
tmp[df.key.to_list()[i]] = df.value.to_list()[i]
return tmp
def update_lang():
st.session_state.general_text_dict = load_bundle(st.session_state.general_lang_dict[st.session_state.general_language])
if 'general_session_page' not in st.session_state.keys():
st.session_state.general_lang_dict = {'Français' : 'fr', 'English': 'en'}
st.session_state.general_text_dict = load_bundle('fr')
st.session_state.general_language = 'Français'
st.session_state.general_session_page = 'RisToGarganText'
elif st.session_state.general_session_page != 'RisToGarganText':
st.session_state.general_text_dict = load_bundle(st.session_state.general_lang_dict[st.session_state.general_language])
st.session_state.general_session_page = 'RisToGarganText'
for key in st.session_state.keys():
if 'general_' not in key:
del st.session_state[key]
st.selectbox('Langue', list(st.session_state.general_lang_dict.keys()), list(st.session_state.general_lang_dict.keys()).index(st.session_state.general_language),key='general_language', on_change=update_lang)
def read_file(file):
output = "title\tsource\tpublication_year\tpublication_month\tpublication_day\tabstract\tauthors\tweight\n"
docs = rispy.loads(file.read().decode('utf-8'))
for doc in docs :
keys = doc.keys()
if 'secondary_title' in keys :
source = doc['secondary_title']
else :
source = ""
if 'abstract' in keys :
abstract = doc['abstract']
else :
abstract = ""
title = doc['title']
month = '1'
day = '1'
if doc.__contains__('date'):
tmp = doc['date'].split("/")
year = tmp[0]
month = tmp[1]
day = tmp[2]
else:
if doc.__contains__('year'):
year = doc['year'].replace('/','').replace('.','')
else:
year = str(date.today().year)
if doc.__contains__('authors'):
authors = ';'.join(doc['authors'])
else:
authors = ""
row = str(title) + "\t" + str(source) + "\t" + year + "\t" + month + "\t" + day + "\t" + abstract + "\t" + authors + "\t" + str(1) + "\n"
output += row
return output
st.write(st.session_state.general_text_dict['title'])
st.write(st.session_state.general_text_dict['text'])
file = st.file_uploader(st.session_state.general_text_dict['file'],type=["ris"],key='file')
if file:
try:
name = file.name.split('.')[0] + '.csv'
st.write(st.session_state.general_text_dict['new_file'])
st.download_button(name, read_file(file), name)
except Exception as e:
st.write(st.session_state.general_text_dict['error'])
print(e)
file.close()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment