Commit 709099dd authored by Loïc Chapron's avatar Loïc Chapron

fix isidore and name output file

parent 42b61e63
...@@ -31,5 +31,5 @@ en,perform2," the maximum number of documents" ...@@ -31,5 +31,5 @@ en,perform2," the maximum number of documents"
fr,nb_taken,"Nombres de documents à prendre" fr,nb_taken,"Nombres de documents à prendre"
en,nb_taken,"Number of documents to take" en,nb_taken,"Number of documents to take"
fr,createTSV,"Création du fichier TSV (Cela peut prendre quelque minutes)" fr,createTSV,"Création du fichier TSV (Cela peut prendre quelques minutes)"
en,createTSV,"Creation of the TSV file (It may take a while)" en,createTSV,"Creation of the TSV file (It may take a while)"
...@@ -2,7 +2,7 @@ locale,key,value ...@@ -2,7 +2,7 @@ locale,key,value
fr,title,"# Isidore vers GarganText" fr,title,"# Isidore vers GarganText"
en,title,"# Isidore To GarganText" en,title,"# Isidore To GarganText"
fr,text,"Effectue une recherche Isidore de documents scientifiques et les convertir en un fichier TSV." fr,text,"Effectue une recherche Isidore de documents scientifiques et les convertit en un fichier TSV."
en,text,"Do a Isidore scientific documents research and convert it into a TSV file." en,text,"Do a Isidore scientific documents research and convert it into a TSV file."
fr,keyword,"Mots clés" fr,keyword,"Mots clés"
...@@ -17,21 +17,21 @@ en,submit,"Submit" ...@@ -17,21 +17,21 @@ en,submit,"Submit"
fr,load_api,"Chargement de l'api..." fr,load_api,"Chargement de l'api..."
en,load_api,"Loading API..." en,load_api,"Loading API..."
fr,overload_api,"L'API est surchargé, relancer la requête dans quelques secondes" fr,overload_api,"L'API est surchargé, relancer la requête dans quelques secondes."
en,overload'api,"The API is overloaded, please retry the request in a few seconds" en,overload'api,"The API is overloaded, please retry the request in a few seconds."
fr,nb_doc,"Nombres de documents : " fr,nb_doc,"Nombres de documents : "
en,nb_doc,"Numbers of documents : " en,nb_doc,"Numbers of documents : "
fr,perform1,"Pour des raisons de performence, on limit à " fr,perform1,"Pour des raisons de performence, on limite à "
fr,perform2," le nombre de document maximum" fr,perform2," le nombre maximum de documents."
en,perform1,"For performance reasons, we limit to " en,perform1,"For performance reasons, we limit to "
en,perform2," the maximum number of documents" en,perform2," ,the maximum number of documents."
fr,nb_taken,"Nombres de documents à prendre" fr,nb_taken,"Nombres de documents à prendre"
en,nb_taken,"Number of documents to take" en,nb_taken,"Number of documents to take"
fr,createTSV,"Création du fichier TSV (Cela peut prendre quelque minutes)" fr,createTSV,"Création du fichier TSV (Cela peut prendre quelques minutes)"
en,createTSV,"Creation of the TSV file (It may take a while)" en,createTSV,"Creation of the TSV file (It may take a while)"
fr,doc_abstract1,"Il y a " fr,doc_abstract1,"Il y a "
......
...@@ -190,5 +190,7 @@ if st.session_state.stage_isidore > 1: ...@@ -190,5 +190,7 @@ if st.session_state.stage_isidore > 1:
print(st.session_state.nb_wanted) print(st.session_state.nb_wanted)
st.session_state.output = create_output( st.session_state.output = create_output(
st.session_state.search, lang[st.session_state.language], st.session_state.nb_wanted) st.session_state.search, lang[st.session_state.language], st.session_state.nb_wanted)
st.download_button('Download TSV', st.session_state.output, 'output.csv')
fileName = "HALOutput_" + str(datetime.now().strftime("%Y-%m-%d_%H:%M:%S")) + '.csv'
st.download_button('Download TSV', st.session_state.output, fileName)
...@@ -7,6 +7,7 @@ import streamlit as st ...@@ -7,6 +7,7 @@ import streamlit as st
import requests as req import requests as req
import json import json
import time import time
from datetime import datetime
from json import JSONDecodeError from json import JSONDecodeError
import src.basic as tmp import src.basic as tmp
...@@ -64,11 +65,16 @@ def create_output(search, language, nb_doc): ...@@ -64,11 +65,16 @@ def create_output(search, language, nb_doc):
break break
time.sleep(retryTime) time.sleep(retryTime)
print('Retry') print('Retry')
tmp, nb_tmp = createFile(txt, nb_doc % numberReplies, language) tmp, nb_tmp = createFile(txt, numberReplies, language)
output += tmp output += tmp
nb += nb_tmp nb += nb_tmp
if nb_doc % numberReplies != 0: if nb_doc % numberReplies != 0:
txt = loadApiIsidorePage(search, language, nb_doc//numberReplies + 1) while (True):
txt = loadApiIsidorePage(search, language, nb_doc//numberReplies + 1)
if txt != 0:
break
time.sleep(retryTime)
print('Retry')
tmp, nb_tmp = createFile(txt, nb_doc % numberReplies, language) tmp, nb_tmp = createFile(txt, nb_doc % numberReplies, language)
output += tmp output += tmp
nb += nb_tmp nb += nb_tmp
...@@ -139,12 +145,16 @@ def createFile(docs, limit, language): ...@@ -139,12 +145,16 @@ def createFile(docs, limit, language):
else: else:
abstract = tmp abstract = tmp
else: else:
abstract = abstract['$'] if '$' in abstract.keys():
abstract = abstract['$']
else:
abstract = ''
if 'types' in doc['isidore'].keys(): if 'types' in doc['isidore'].keys():
if type(doc['isidore']['types']['type'] == str) and doc['isidore']['types']['type'] in ['Books', 'text']: print(i)
if type(doc['isidore']['types']['type']) == str and doc['isidore']['types']['type'] in ['Books', 'text']:
nb += 1 nb += 1
elif type(doc['isidore']['types']['type'] == dict) and doc['isidore']['types']['type'][1] in ['Books', 'text']: elif type(doc['isidore']['types']['type']) == dict and doc['isidore']['types']['type']['$'] in ['Books', 'text']:
nb += 1 nb += 1
else: else:
print(title) print(title)
...@@ -280,4 +290,5 @@ if st.session_state.stage_isidore > 1: ...@@ -280,4 +290,5 @@ if st.session_state.stage_isidore > 1:
st.write(st.session_state.general_text_dict['doc_abstract1'] + str( st.write(st.session_state.general_text_dict['doc_abstract1'] + str(
st.session_state.nb_bad_file) + st.session_state.general_text_dict['doc_abstract2']) st.session_state.nb_bad_file) + st.session_state.general_text_dict['doc_abstract2'])
st.download_button('Download TSV', st.session_state.output, 'output.csv') fileName = "isidoreOutput_" + str(datetime.now().strftime("%Y-%m-%d_%H:%M:%S")) + '.csv'
st.download_button('Download TSV', st.session_state.output, fileName)
...@@ -91,13 +91,13 @@ file = st.file_uploader( ...@@ -91,13 +91,13 @@ file = st.file_uploader(
if file: if file:
try: try:
name = file.name.split('.')[0] + '.csv' fileName = "istexOutput_" + str(datetime.now().strftime("%Y-%m-%d_%H:%M:%S")) + '.csv'
res, nb_dup = read_zip(file) res, nb_dup = read_zip(file)
if nb_dup: if nb_dup:
st.write(st.session_state.general_text_dict['dup1'] + str( st.write(st.session_state.general_text_dict['dup1'] + str(
nb_dup) + st.session_state.general_text_dict['dup2']) nb_dup) + st.session_state.general_text_dict['dup2'])
st.write(st.session_state.general_text_dict['new_file']) st.write(st.session_state.general_text_dict['new_file'])
st.download_button(name, res, name) st.download_button('Download TSV', res, fileName)
except Exception as e: except Exception as e:
st.write(st.session_state.general_text_dict['error']) st.write(st.session_state.general_text_dict['error'])
print(e) print(e)
......
...@@ -6,7 +6,7 @@ Loïc Chapron ...@@ -6,7 +6,7 @@ Loïc Chapron
import streamlit as st import streamlit as st
import requests as req import requests as req
import json import json
from datetime import date from datetime import date, datetime
import src.basic as tmp import src.basic as tmp
...@@ -308,7 +308,8 @@ if st.session_state.stage == 2 and st.session_state.format == 'collections': ...@@ -308,7 +308,8 @@ if st.session_state.stage == 2 and st.session_state.format == 'collections':
output = createTSVfromCollections() output = createTSVfromCollections()
st.write(st.session_state.general_text_dict['fileTSV1'] + str( st.write(st.session_state.general_text_dict['fileTSV1'] + str(
len(output.split('\n'))-2) + st.session_state.general_text_dict['fileTSV2']) len(output.split('\n'))-2) + st.session_state.general_text_dict['fileTSV2'])
st.download_button('Download TSV', output, 'output.csv') fileName = "zoteroOutput_" + str(datetime.now().strftime("%Y-%m-%d_%H:%M:%S")) + '.csv'
st.download_button('Download TSV', output, fileName)
if st.session_state.stage > 0: if st.session_state.stage > 0:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment