Commit 182ba320 authored by Loïc Chapron's avatar Loïc Chapron

Merge branch 'dev-loic' into 'dev'

streamlit

See merge request !3
parents 8fd3f7ad 41e1220d
## About The project
## Install Streamlit
```shell
pip install streamlit
```
## Start Project
```shell
streamlit run welcome.py
```
\ No newline at end of file
import streamlit as st
st.set_page_config(
page_title="Hello"
)
st.write("# Welcome to ")
"""
Streamlit Application
Loïc Chapron
"""
#streamlit run test.py
import streamlit as st
import requests as req
import json
from datetime import date
def loadApiIsidore(search, language):
url = 'https://api.isidore.science/resource/search?q=' + search + '&output=json&replies=1000&language=http://lexvo.org/id/iso639-3/' + language
resp = req.get(url)
print(url)
try:
jsontxt = json.loads(resp.content)
docs = jsontxt["response"]["replies"]["content"]["reply"]
except Exception as e:
docs = 0
return docs
def createFile(docs):
# Output text
output = "title\tsource\tpublication_year\tpublication_month\tpublication_day\tabstract\tauthors\tweight\n"
for doc in docs:
# Title
title = doc["isidore"]["title"]
if (type(title) != str):
if(type(title) == list):
tmp = ''
for lang in title:
if type(lang) != str and lang['@xml:lang'] == language[:2]:
tmp = lang['$']
if tmp == '':
if type(title[0]) == str:
title = title[0]
else:
title = title[0]['$']
else:
title = tmp
else:
title = title['$']
# Source
source =doc["isidore"]["source_info"]["sourceName"]["$"]
# Author
if doc['isidore']['enrichedCreators'] != []:
list_author = doc["isidore"]["enrichedCreators"]["creator"]
authors = []
if(type(list_author) == list):
for author in list_author:
authors.append(author["@origin"].replace('"',''))
authors = ';'.join(authors)
else:
authors = list_author["@origin"].replace('"','')
else:
authors = ''
#Abstract
if 'abstract' in doc['isidore'].keys() and doc["isidore"]["abstract"] != []:
abstract = doc["isidore"]["abstract"]
else:
abstract = ''
if (type(abstract) != str):
if type(abstract) == list:
tmp = ''
for lang in abstract:
if type(lang) != str and type(lang) != list and lang['@xml:lang'] == language[:2]:
tmp = lang['$']
if tmp == '':
if type(abstract[0]) == str:
abstract = abstract[0]
else:
abstract = abstract[0]['$']
else:
abstract = tmp
else :
abstract = abstract['$']
# Publication Date
pdate = getGoodTime(doc["isidore"]["date"]["@origin"])
abstract = abstract.encode(encoding = 'UTF-8', errors = 'ignore').decode("utf-8").replace('\t', '').replace('"', '')
title = title.encode(encoding = 'UTF-8', errors = 'ignore').decode("utf-8").replace('\t', '').replace('"', '')
source = source.encode(encoding = 'UTF-8', errors = 'ignore').decode("utf-8").replace('\t', '').replace('"', '').replace('\n', '')
# Output
row = str(title) + "\t" + source + "\t" + str(pdate) + "\t" + abstract + "\t" + authors + "\t" + str(1) + "\n"
output += row
return output
def getGoodTime(time):
time = time.replace('?', '0').replace('.', '0')
tiret = time.replace('[', '').replace(']', '').split('-')
slash = time.replace('[', '').replace(']', '').split('/')
if (len(tiret) == 1 and len(tiret[0]) == 4):
if time != '0001':
return tiret[0] + '\t1\t1'
elif (len(tiret)>1):
if (len(tiret) == 2 and len(tiret[0])==4 and len(tiret[1]) <= 2):
return tiret[0] + '\t' + tiret[1] + '\t1'
elif (len(tiret) == 3 and len(tiret[0])==4 and len(tiret[1]) <= 2 and len(tiret[2].split('T')[0]) <= 2):
return tiret[0] + '\t' + tiret[1] + '\t' + tiret[2].split('T')[0]
elif(len(slash)==1 and len(time) == 4):
return slash[0] + '\t1\t1'
elif (len(slash) > 1):
if (len(slash) == 2 and len(slash[1])==4 and len(slash[0]) <= 2):
return slash[1] + '\t' + slash[0] + '\t1'
elif (len(slash) == 3 and len(slash[2])==4 and len(slash[1]) <= 2 and len(slash[0]) <= 2):
return slash[2] + '\t' + slash[1] + '\t' + slash[0].split('T')[0]
elif len(time) == 8:
return time[:4] + '\t' + time[4:6] + '\t' + time[6:8]
return str(date.today().year) + '\t1\t1'
###Streamlit
lang = {
'Français' : 'fra',
'Anglais' : 'eng',
'Espagnol' : 'spa',
'Italien' : 'ita',
'Allemand' : 'deu',
'Polonais' : 'nld',
'Portugais' : 'por',
'Russe' : 'rus'
}
if 'stage' not in st.session_state:
st.session_state.stage = 0
def set_stage(stage):
st.session_state.stage = stage
# Image
col1, col2 = st.columns(2)
with col1:
st.image('img/isidore_logo.png')
with col2:
st.image('img/gargantext_logo.png')
# Form
form = st.form('api')
form.write('**Isidore vers GarganText**')
search = form.text_input('Mots clés')
language = form.selectbox('Langue', lang.keys())
form.form_submit_button('Submit', on_click=set_stage, args=(1,))
# API and Slider
if st.session_state.stage > 0:
# Only call first time and after
if 'search' not in st.session_state or 'language' not in st.session_state or search != st.session_state.search or language != st.session_state.language:
with st.spinner("Chargement de l'api.."):
docs = loadApiIsidore(search, lang[language])
st.session_state.txt = docs
if docs != 0:
st.session_state.search = search
st.session_state.language = language
if st.session_state.txt != 0:
nb_doc = len(st.session_state.txt)
# Form with slider
form2 = st.form('my_form2')
form2.write('Nombres de documents : ' + str(nb_doc))
nb_wanted = form2.slider('Nombres de documents à prendre', 1, nb_doc, key="slider")
form2.form_submit_button('Submit', on_click=set_stage, args=(2,))
else:
st.write("L'API est surchargé, relancer la requête dans quelques secondes")
# Download
if st.session_state.stage > 1:
output = createFile(st.session_state.txt[:st.session_state.slider])
st.download_button('Download TSV', output, 'output.csv')
import sys
import json
import streamlit as st
from datetime import datetime
def tmp(file1, file2):
listJson1 = json.load(file1)
listJson2 = json.load(file2)
ngrams1 = listJson1['NgramsTerms']['data']
ngrams2 = listJson2['NgramsTerms']['data']
merged = {}
roots = []
leafs = []
# find the roots of list 1
for root in ngrams1.keys() :
if ngrams1[root]['list'] == "MapTerm" :
roots.append(root)
leafs = leafs + ngrams1[root]['children']
# merge list 2 in list 1
for root in ngrams2.keys() :
if root in roots :
ngrams1[root]['children'] = list(set(ngrams1[root]['children'] + ngrams2[root]['children']))
else :
if root not in leafs :
ngrams1[root] = ngrams2[root]
children = ngrams1[root]['children']
ngrams1[root]['children'] = []
for child in children :
if child not in root or child not in leafs :
ngrams1[root]['children'].append(child)
# clean the merged list
for root in ngrams1.keys() :
if ngrams1[root]['list'] == "MapTerm" :
if len(ngrams1[root]['children']) == 0 :
ngrams1[root]['children'] = []
merged[root] = ngrams1[root]
listJson1['NgramsTerms']['data'] = merged
listJson1['Authors']['data'] = {}
listJson1['Institutes']['data'] = {}
listJson1['Sources']['data'] = {}
return json.dumps(listJson1, sort_keys=False, indent=4)
st.subheader('Input Two Term File From GarganText')
col1, col2 = st.columns(2)
with col1:
file1 = st.file_uploader("Choose a file", key='file1')
with col2:
file2 = st.file_uploader("Choose a file", key='file2')
if (file1 and file2):
st.write('You can download your merge file:')
time = datetime.strftime(datetime.now(),"%d-%m-%Y/%H:%M:%S")
name = 'output-' +time+ '.json'
st.download_button('Download File', tmp(file1, file2), name)
\ No newline at end of file
"""
Streamlit Application
Loïc Chapron
"""
#streamlit run zotero2ggt.py
import streamlit as st
import requests as req
import json
from datetime import date
#12093554
limit = 15 # This value cannot exceed 100 !
st.set_page_config(page_title="Zotero to GarganText")
def loadApiItems(id, page):
url = 'https://api.zotero.org/users/'+ str(id) +'/items/top?limit=' + str(limit) + '&start=' + str(page * limit) + '&direction=asc&sort=title'
resp = req.get(url)
if resp.ok:
jsontxt = json.loads(resp.content)
return jsontxt, resp.headers['Total-Results']
else:
return 0, 0
def loadApiCollections(id):
url = 'https://api.zotero.org/users/'+ str(id) +'/collections'
resp = req.get(url)
if resp.ok:
jsontxt = json.loads(resp.content)
return jsontxt
else:
return 0
def loadApiItemsByCollections(id, key):
url = 'https://api.zotero.org/users/'+ str(id) +'/collections/' + str(key) + '/items/top'
resp = req.get(url)
if resp.ok:
jsontxt = json.loads(resp.content)
return jsontxt
else:
return 0
def getAllCollections(docs):
output = {}
for doc in docs:
output[doc['data']['name']] = doc['data']['key']
return output
def getParamFromDoc(doc):
# Title
title = doc['data']['title']
# Authors
if 'creators' in doc['data'].keys():
authors = []
for author in doc['data']['creators']:
authors.append(author['lastName'])
authors = ';'.join(authors)
else:
authors = ''
# Source
source = doc['data']['url']
# Abstract
if 'abstractNote' in doc['data'].keys():
abstract = doc['data']['abstractNote']
else:
abstract = ''
# Date
if 'date' in doc['data'].keys() and doc['data']['date'] != '':
pdate = doc['data']['date'].split('-')
pdate[2] = pdate[2].split('T')[0]
pdate = '\t'.join(pdate)
else:
pdate = str(date.today().year) + '\t1\t1'
abstract = abstract.encode(encoding = 'UTF-8', errors = 'ignore').decode("utf-8").replace('\t', '').replace('"', '').replace('\n', '')
title = title.encode(encoding = 'UTF-8', errors = 'ignore').decode("utf-8").replace('\t', '').replace('"', '').replace('\n', '')
source = source.encode(encoding = 'UTF-8', errors = 'ignore').decode("utf-8").replace('\t', '').replace('"', '').replace('\n', '').replace('\n', '')
# Output
return str(title) + "\t" + source + "\t" + str(pdate) + "\t" + abstract + "\t" + authors + "\t" + str(1) + "\n"
def createTSV(docs):
# Output text
output = "title\tsource\tpublication_year\tpublication_month\tpublication_day\tabstract\tauthors\tweight\n"
for doc in docs:
output += getParamFromDoc(doc)
return output
def createTSVfromCollections():
# Output text
output = "title\tsource\tpublication_year\tpublication_month\tpublication_day\tabstract\tauthors\tweight\n"
for elem in st.session_state.collectionsKey:
docs = loadApiItemsByCollections(st.session_state.id, elem)
for doc in docs:
output += getParamFromDoc(doc)
return output
# Stage setting and function
if 'stage' not in st.session_state:
st.session_state.stage = 0
def set_stage(stage):
st.session_state.stage = stage
st.session_state.id = st.session_state.idForm
st.session_state.format = st.session_state.formatForm
if st.session_state.format == 'items':
st.session_state.page = 0
st.session_state.docs = []
st.session_state.selectedKey = []
st.session_state.docsByKey = {}
def set_stage_collections(stage):
st.session_state.stage = stage
st.session_state.collectionsKey = []
for elem in st.session_state.collectionsForm:
st.session_state.collectionsKey.append(st.session_state.collections[elem])
def set_stage_items(stage):
st.session_state.stage = stage
st.session_state.result = []
st.session_state.selectedKey = []
keys = []
for elem in st.session_state.keys():
if 'itemskey-' in elem:
st.session_state.docsByKey[elem.split('-')[1]][1] = st.session_state[elem]
for elem in st.session_state.docsByKey.values():
if elem[1]:
st.session_state.result.append(elem[0])
st.session_state.selectedKey.append('itemskey-' + elem[0]['data']['key'])
def set_stage_minus():
st.session_state.stage -= 1
# items page function
def up_page():
st.session_state.page += 1
st.session_state.docs = []
for elem in st.session_state.keys():
if 'itemskey-' in elem:
st.session_state.docsByKey[elem.split('-')[1]][1] = st.session_state[elem]
def down_page():
st.session_state.page -= 1
st.session_state.docs = []
for elem in st.session_state.keys():
if 'itemskey-' in elem:
st.session_state.docsByKey[elem.split('-')[1]][1] = st.session_state[elem]
# Begin page
if st.session_state.stage == 0:
# Form
form = st.form('api')
form.write('**Zotero vers GarganText**')
st.session_state.id = form.text_input('ID', key='idForm')
st.session_state.format = form.selectbox('Type de donnée', ['items', 'collections'], key='formatForm')
form.form_submit_button('Suivant', on_click=set_stage, args=(1,))
#page for select items
if st.session_state.stage == 1 and st.session_state.format == 'items':
if (st.session_state.docs == []):
st.session_state.docs, st.session_state.nbdoc = loadApiItems(st.session_state.id, st.session_state.page)
if st.session_state.docs != 0:
for doc in st.session_state.docs:
if doc['data']['key'] not in st.session_state.docsByKey.keys():
st.session_state.docsByKey[doc['data']['key']] = [doc, False]
if st.session_state.docs == 0:
st.write('Acess to the account is forbidden, please make your library public on https://www.zotero.org/settings/privacy')
else:
with st.form('items'):
st.write('*Ajouter les documents que vous voulez mettre dans le TSV*')
for doc in st.session_state.docs:
st.checkbox(doc['data']['title'],st.session_state.docsByKey[doc['data']['key']][1], key='itemskey-'+doc['data']['key'])
col1, col2 = st.columns(2)
with col1:
st.form_submit_button('Suivant', on_click=set_stage_items, args=(2,))
with col2:
if st.session_state.page == 0:
st.form_submit_button('Page Suivante', on_click=up_page)
elif st.session_state.page == int(st.session_state.nbdoc)//limit:
st.form_submit_button('Page Précédente', on_click=down_page)
else:
col3, col4 = st.columns(2)
with col3:
st.form_submit_button('Page Précédente', on_click=down_page)
with col4:
st.form_submit_button('Page Suivante', on_click=up_page)
#page for select collections
if st.session_state.stage == 1 and st.session_state.format == 'collections':
docs = loadApiCollections(st.session_state.id)
if docs == 0:
st.write('Acess to the account is forbidden, please make your library public on https://www.zotero.org/settings/privacy')
else:
collections = getAllCollections(docs)
st.session_state.collections = collections
form = st.form('collection')
form.write('**Chose a collection** you can choose multiple one')
form.multiselect('Chose a collection', collections.keys(), key='collectionsForm')
form.form_submit_button('Submit', on_click=set_stage_collections, args=(2,))
#page for TSV items
if st.session_state.stage == 2 and st.session_state.format == 'items':
st.write('Le TSV contient ' + str(len(st.session_state.result)) + ' documents')
output = createTSV(st.session_state.result)
st.download_button('Download TSV', output, 'output.csv')
#page for TSV collections
if st.session_state.stage == 2 and st.session_state.format == 'collections':
output = createTSVfromCollections()
st.write('Le TSV contient ' + str(len(output.split('\n'))-2) + ' documents')
st.download_button('Download TSV', output, 'output.csv')
if st.session_state.stage > 0:
st.button('Retour', on_click=set_stage_minus)
sub = st.button('Affiche keys Terminal')
if sub:
print("\n KEYS:")
for elem in st.session_state.keys():
print(elem)
#12093554
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment