Commit ee5f4a3e authored by Atrax Nicolas's avatar Atrax Nicolas

Update pages

parent ff89fe15
......@@ -4,7 +4,7 @@ name = "Home"
icon = ":house:"
[[pages]]
name = "API"
name = "API Tools"
icon = ":globe_with_meridians:"
is_section = true
......@@ -21,7 +21,15 @@ path = "pages/Zotero_To_GarganText.py"
name = "Zotero To GarganText"
[[pages]]
name = "Convert"
path = "pages/TSV_Translator.py"
name = "TSV Translator"
[[pages]]
path = "pages/YTB_to_TSV.py"
name = "YTB To TSV"
[[pages]]
name = "Convert Tools"
icon = ":twisted_rightwards_arrows:"
is_section = true
......@@ -70,12 +78,12 @@ name = "PDF To TXT"
path = "pages/TXT_to_TSV.py"
name = "TXT To TSV"
[[pages]]
name = "Other Tools"
icon = ":twisted_rightwards_arrows:"
is_section = true
[[pages]]
path = "pages/Merge_Term_GarganText.py"
name = "Merge Term GarganText"
in_section = false
name = "Merge GarganText Terms"
[[pages]]
path = "pages/TSV_Translator.py"
name = "TSV Translator"
locale,key,value
fr,title,"**HAL vers GarganText**"
en,title,"**HAL To GarganText**"
fr,title,"# HAL vers GarganText"
en,title,"# HAL To GarganText"
fr,text,"HAL est une base de document scientifique en ligne et libre d'accès contenant plus d'un million de document."
en,text,"HAL is an online and free access scientific document database containing more than a million documents"
......
locale,key,value
fr,title,"**Isidore vers GarganText**"
en,title,"**Isidore To GarganText**"
fr,title,"# Isidore vers GarganText"
en,title,"# Isidore To GarganText"
fr,keyword,"Mots clés"
en,keyword,"Key word"
......
locale,key,value
fr,title,"Fusionne Deux Liste de Terme de GarganText"
en,title,"Input Two Term File From GarganText"
fr,file,"Choisir un fichier"
en,file,"Choose a file"
fr,title,"# Merge GarganText Terms"
en,title,"# Merge GarganText Terms"
fr,new_file,"Télécharge ton fichier fusionner:"
en,new_file,"Download your merge file:"
\ No newline at end of file
fr,text,"Fusionne 2 fichiers de termes de GarganText."
en,text,"Input 2 term files from GarganText."
fr,file," Choisir un fichier "
en,file," Choose a file "
fr,new_file," Télécharge ton fichier fusionné "
en,new_file," Download your merge file "
\ No newline at end of file
......@@ -33,7 +33,7 @@ fr,globalWarning, "Attention ! Plusieurs langues ont été détectées entre vos
en,globalWarning,"Warning ! Multiple languages have been detected for your pdfs file !\nThe following languages have been detected : "
fr,advice,"Cela pourrait affecter massivement l'analyse de GarganText.\nVous pouvez régler ça en traduisant avec l'outil TsvTranslator."
en,advice,"This could massively affect the analysis of Gargantext.\nYou can correct this by translation with the TsvTranslator tool."
fr,advice,"Cela pourrait affecter massivement l'analyse de GarganText.Vous pouvez régler ça en traduisant avec l'outil TsvTranslator."
en,advice,"This could massively affect the analysis of Gargantext.You can correct this by translation with the TsvTranslator tool."
......@@ -25,3 +25,15 @@ en,translate2," To "
fr,detected,"Langues détectées : "
en,detected,"Detected languages : "
fr,loading,"Progression de la traduction : "
en,loading,"Translation progress : "
fr,loadingLanguages," Analyse des langues du fichier "
en,loadingLanguages," File languages analysis "
fr,sameLanguages,"Une seule langue a été détectée au sein du fichier : "
en,sameLanguages,"Only one language has been detected inside this file : "
fr,anotherFile," Traduire un autre fichier "
en,anotherFile," Translate another file "
locale,key,value
fr,title,"# Bienvenue sur GanganText Tools"
en,title,"# Welcome to GanganText Tools"
fr,title,"# Bienvenue sur GarganTools"
en,title,"# Welcome to GarganTools"
fr,welcome,"Bienvenue sur ces pages rassemblant des outils développés par des utilisateurs de GarganText pour des utilisateurs de GarganText."
en,welcome,"Welcome to these pages featuring tools developed by GarganText’ users for GarganText’ users."
......
locale,key,value
fr,title,"# YTB To TSV"
en,title,"# YTB To TSV"
fr,title,"# Youtube To TSV"
en,title,"# Youtube To TSV"
fr,text,"Inspecte un fichier CSV pour vérifier s'il est compatible avec Gargantext"
en,text,"Inspect a CSV file to check if it is compatible with GarganText"
fr,text,"Effectue une recherche Youtube à l'aide de mots clés (thème, titre de vidéo, lien de vidéo,...) pour créer un fichier TSV à partir des sous-titres de vidéos."
en,text,"Do a Youtube research with keywords (topic, video title, video link,...) to create a TSV file based on the subtitles of the videos."
fr,file,"Choisir un fichier"
en,file,"Choose a file"
......@@ -20,5 +20,14 @@ en,fill,"Only manual subtitles (longer waiting time)"
fr,submit," Soumettre "
en,submit," Submit "
fr,loadingID," Recherche de vidéos "
en,loadingID," Searching videos "
fr,loading,"Traitement des vidéos : "
en,loading,"Videos processing : "
fr,quantity," sur "
en,quantity," out of "
fr,new_file,"Télécharge ton fichier TSV :"
en,new_file,"Download your TSV file :"
locale,key,value
fr,title,"**Zotero vers GarganText**"
en,title,"**Zotero vers GarganText**"
fr,title,"# Zotero vers GarganText"
en,title,"# Zotero vers GarganText"
fr,data,"Type de donnée"
en,data,"Type of data"
......
......@@ -245,11 +245,10 @@ def getContent(file, separator, data, success, fill, errorMessage):
# Code End
st.write(st.session_state.general_text_dict['title'])
st.write(st.session_state.general_text_dict['text'])
st.session_state.fill = st.checkbox(st.session_state.general_text_dict['fill'])
file = st.file_uploader(
st.session_state.general_text_dict['file'], type=["csv"], key='file')
st.session_state.general_text_dict['file'], type=["tsv", "csv"], key='file')
if file:
name = file.name.split('.')[0] + '.tsv'
......
......@@ -13,8 +13,11 @@ tmp.base("HALToGarganText")
limit = 500
limitItems = 10000
def loadApiHALNbFile(search, lang):
url = 'http://api.archives-ouvertes.fr/search/?q=' + search + '&rows=5&fl=title_s,' + lang + '_title_s,source_s,publicationDate_s,authFullName_s,' + lang + '_abstract_s,abstract_s&fq=language_s:' + lang
url = 'http://api.archives-ouvertes.fr/search/?q=' + search + '&rows=5&fl=title_s,' + lang + \
'_title_s,source_s,publicationDate_s,authFullName_s,' + \
lang + '_abstract_s,abstract_s&fq=language_s:' + lang
resp = req.get(url)
print(url)
try:
......@@ -26,7 +29,8 @@ def loadApiHALNbFile(search, lang):
def loadApiHAL(search, lang, page, nbvalue):
url = 'http://api.archives-ouvertes.fr/search/?q=' + search + '&start=' + str(page * limit) + '&rows=' + str(nbvalue) + '&fl=title_s,' + lang + '_title_s,source_s,publicationDate_s,authFullName_s,' + lang + '_abstract_s,abstract_s&fq=language_s:' + lang
url = 'http://api.archives-ouvertes.fr/search/?q=' + search + '&start=' + str(page * limit) + '&rows=' + str(
nbvalue) + '&fl=title_s,' + lang + '_title_s,source_s,publicationDate_s,authFullName_s,' + lang + '_abstract_s,abstract_s&fq=language_s:' + lang
resp = req.get(url)
print(url)
try:
......@@ -81,41 +85,50 @@ def getParamFromDoc(docs):
if 'publicationDate_s' in doc.keys():
split = doc['publicationDate_s'].split('-')
if len(split) == 3:
pdate = datetime.strptime(doc['publicationDate_s'], '%Y-%m-%d').strftime('%Y\t%m\t%d')
pdate = datetime.strptime(
doc['publicationDate_s'], '%Y-%m-%d').strftime('%Y\t%m\t%d')
elif len(split) == 2:
pdate = datetime.strptime(doc['publicationDate_s'], '%Y-%m').strftime('%Y\t%m\t1')
pdate = datetime.strptime(
doc['publicationDate_s'], '%Y-%m').strftime('%Y\t%m\t1')
else:
pdate = doc['publicationDate_s'] + '\t1\t1'
else:
pdate = '1900\t1\t1'
abstract = abstract.encode(encoding = 'UTF-8', errors = 'ignore').decode("utf-8").replace('\t', '').replace('"', '').replace('\n', '')
title = title.encode(encoding = 'UTF-8', errors = 'ignore').decode("utf-8").replace('\t', '').replace('"', '').replace('\n', '')
source = source.encode(encoding = 'UTF-8', errors = 'ignore').decode("utf-8").replace('\t', '').replace('"', '').replace('\n', '').replace('\n', '')
abstract = abstract.encode(encoding='UTF-8', errors='ignore').decode(
"utf-8").replace('\t', '').replace('"', '').replace('\n', '')
title = title.encode(encoding='UTF-8', errors='ignore').decode(
"utf-8").replace('\t', '').replace('"', '').replace('\n', '')
source = source.encode(encoding='UTF-8', errors='ignore').decode(
"utf-8").replace('\t', '').replace('"', '').replace('\n', '').replace('\n', '')
# Output
output += str(title) + "\t" + source + "\t" + str(pdate) + "\t" + abstract + "\t" + authors + "\t" + str(1) + "\n"
output += str(title) + "\t" + source + "\t" + str(pdate) + \
"\t" + abstract + "\t" + authors + "\t" + str(1) + "\n"
return output
def create_output(search, lang, nb_value):
output = "title\tsource\tpublication_year\tpublication_month\tpublication_day\tabstract\tauthors\tweight\n"
for i in range(0, nb_value//limit):
response = loadApiHAL(search, lang, i, limit)
output += getParamFromDoc(response['response']['docs'])
if (nb_value%limit != 0):
response = loadApiHAL(search, lang, nb_value//limit, nb_value%limit)
if (nb_value % limit != 0):
response = loadApiHAL(search, lang, nb_value//limit, nb_value % limit)
output += getParamFromDoc(response['response']['docs'])
return output
lang = {
'Français' : 'fr',
'Anglais' : 'en',
'Français': 'fr',
'Anglais': 'en',
}
if 'stage_isidore' not in st.session_state:
st.session_state.stage_isidore = 0
st.session_state.nb_wanted = 1
def set_stage(stage):
st.session_state.stage_isidore = stage
st.session_state.output = ''
......@@ -127,19 +140,20 @@ st.write(st.session_state.general_text_dict['text'])
# Form
form = st.form('api')
form.write(st.session_state.general_text_dict['title'])
search = form.text_input(st.session_state.general_text_dict['keyword'])
language = form.selectbox(st.session_state.general_text_dict['lang'], lang.keys())
language = form.selectbox(
st.session_state.general_text_dict['lang'], lang.keys())
form.form_submit_button(st.session_state.general_text_dict['submit'], on_click=set_stage, args=(1,))
form.form_submit_button(
st.session_state.general_text_dict['submit'], on_click=set_stage, args=(1,))
# API and Slider
if st.session_state.stage_isidore > 0:
# Only call first time and after
if 'search' not in st.session_state or 'language' not in st.session_state or search != st.session_state.search or language != st.session_state.language:
with st.spinner(st.session_state.general_text_dict['load_api'] ):
with st.spinner(st.session_state.general_text_dict['load_api']):
nb_doc = int(loadApiHALNbFile(search, lang[language]))
st.session_state.nb_doc = nb_doc
if nb_doc != 0:
......@@ -150,27 +164,31 @@ if st.session_state.stage_isidore > 0:
# Form with slider
form2 = st.form('my_form2')
form2.write(st.session_state.general_text_dict['nb_doc'] + str(st.session_state.nb_doc))
form2.write(
st.session_state.general_text_dict['nb_doc'] + str(st.session_state.nb_doc))
if st.session_state.nb_doc > limitItems:
form2.write(st.session_state.general_text_dict['perform1'] + str(limitItems) + st.session_state.general_text_dict['perform2'])
st.session_state.nb_wanted = form2.slider(st.session_state.general_text_dict['nb_taken'], 1, limitItems)
form2.write(st.session_state.general_text_dict['perform1'] + str(
limitItems) + st.session_state.general_text_dict['perform2'])
st.session_state.nb_wanted = form2.slider(
st.session_state.general_text_dict['nb_taken'], 1, limitItems)
else:
st.session_state.nb_wanted = form2.slider(st.session_state.general_text_dict['nb_taken'], 1, int(st.session_state.nb_doc))
form2.form_submit_button(st.session_state.general_text_dict['submit'], on_click=set_stage, args=(2,))
st.session_state.nb_wanted = form2.slider(
st.session_state.general_text_dict['nb_taken'], 1, int(st.session_state.nb_doc))
form2.form_submit_button(
st.session_state.general_text_dict['submit'], on_click=set_stage, args=(2,))
else:
st.write(st.session_state.general_text_dict['overload_api'] )
st.write(st.session_state.general_text_dict['overload_api'])
# Download
if st.session_state.stage_isidore > 1:
with st.spinner(st.session_state.general_text_dict['createTSV'] ):
with st.spinner(st.session_state.general_text_dict['createTSV']):
if st.session_state.output == '':
print(st.session_state.nb_wanted)
st.session_state.output = create_output(st.session_state.search, lang[st.session_state.language], st.session_state.nb_wanted)
st.session_state.output = create_output(
st.session_state.search, lang[st.session_state.language], st.session_state.nb_wanted)
st.download_button('Download TSV', st.session_state.output, 'output.csv')
This diff is collapsed.
......@@ -21,73 +21,71 @@ def tmp(file1, file2):
roots = []
leafs = []
# find the roots of list 1
for root in ngrams1.keys() :
for root in ngrams1.keys():
if ngrams1[root]['list'] == "MapTerm" :
if ngrams1[root]['list'] == "MapTerm":
roots.append(root)
leafs = leafs + ngrams1[root]['children']
# merge list 2 in list 1
for root in ngrams2.keys() :
for root in ngrams2.keys():
if root in roots :
if root in roots:
ngrams1[root]['children'] = list(set(ngrams1[root]['children'] + ngrams2[root]['children']))
ngrams1[root]['children'] = list(
set(ngrams1[root]['children'] + ngrams2[root]['children']))
else :
else:
if root not in leafs :
if root not in leafs:
ngrams1[root] = ngrams2[root]
children = ngrams1[root]['children']
ngrams1[root]['children'] = []
for child in children :
if child not in root or child not in leafs :
for child in children:
if child not in root or child not in leafs:
ngrams1[root]['children'].append(child)
# clean the merged list
for root in ngrams1.keys() :
for root in ngrams1.keys():
if ngrams1[root]['list'] == "MapTerm" :
if ngrams1[root]['list'] == "MapTerm":
if len(ngrams1[root]['children']) == 0 :
if len(ngrams1[root]['children']) == 0:
ngrams1[root]['children'] = []
merged[root] = ngrams1[root]
file1['NgramsTerms']['data'] = merged
file1['Authors']['data'] = {}
file1['Institutes']['data'] = {}
file1['Sources']['data'] = {}
tmp = file1.to_json(orient='columns',indent=4)
tmp = file1.to_json(orient='columns', indent=4)
return tmp
st.subheader(st.session_state.general_text_dict['title'])
st.write(st.session_state.general_text_dict['text'])
col1, col2 = st.columns(2)
with col1:
file1 = st.file_uploader(st.session_state.general_text_dict['file'],type=["json"],key='file1')
file1 = st.file_uploader(st.session_state.general_text_dict['file'], type=[
"json"], key='file1')
with col2:
file2 = st.file_uploader(st.session_state.general_text_dict['file'],type=["json"],key='file2')
file2 = st.file_uploader(st.session_state.general_text_dict['file'], type=[
"json"], key='file2')
if (file1 and file2):
try:
df1 = pd.read_json(file1)
df2 = pd.read_json(file2)
st.write(st.session_state.general_text_dict['new_file'])
time = datetime.strftime(datetime.now(),"%d-%m-%Y/%H:%M:%S")
name = 'output-' +time+ '.json'
st.download_button('Download File', tmp(df1, df2), name)
time = datetime.strftime(datetime.now(), "%d-%m-%Y/%H:%M:%S")
name = 'output-' + time + '.json'
st.download_button(
st.session_state.general_text_dict['new_file'], tmp(df1, df2), name)
except Exception as e:
st.write("Error : one of the file isn't valid")
......
......@@ -97,9 +97,9 @@ def detectMultiplePdfLanguages():
languages = []
for l in st.session_state.pdfLanguages.values():
if l not in languages and len(languages) == 1:
st.error(st.session_state.general_text_dict['globalWarning'])
st.error(str(st.session_state.pdfLanguages))
st.error(st.session_state.general_text_dict['advice'])
st.info(st.session_state.general_text_dict['globalWarning'])
st.info(str(st.session_state.pdfLanguages))
st.info(st.session_state.general_text_dict['advice'])
return
if len(languages) == 0:
languages.append(l)
......@@ -290,7 +290,7 @@ if st.session_state.page == 3:
"/PDFCompilation", 'zip', st.session_state.pdfDir.name)
with open(st.session_state.zipDir.name + "/PDFCompilation.zip", 'rb') as zip:
if st.session_state.warning != "":
st.error(st.session_state.warning)
st.info(st.session_state.warning)
detectMultiplePdfLanguages()
st.write(st.session_state.general_text_dict['new_file'])
st.download_button("PDFCompilation.zip",
......
......@@ -114,12 +114,14 @@ def getContent(file, data, total, separator):
reader = csv.DictReader(codecs.iterdecode(
file, 'utf-8'), delimiter=separator)
count = 1
bar = st.progress(0, "Translation progress : 0%")
bar = st.progress(0,
st.session_state.general_text_dict['loading'] + "0%")
for row in reader:
tmp = ""
first = True
loading = int(count / total * 100)
bar.progress(loading, "Translation progress : " + str(loading) + "%")
bar.progress(loading,
st.session_state.general_text_dict['loading'] + str(loading) + "%")
for name, value in row.items():
if not first:
tmp += "\t"
......@@ -207,9 +209,15 @@ def askTranslateLanguages(file):
if st.session_state.page == 0:
if st.session_state.detect:
if st.session_state.file != None:
st.session_state.separator = getSeparator(st.session_state.file)
with st.spinner(st.session_state.general_text_dict['loadingLanguages']):
st.session_state.separator = getSeparator(
st.session_state.file)
st.session_state.languages = inspectLanguages(
st.session_state.file)
if len(st.session_state.languages) == 1:
st.session_state.page = 3
st.session_state.detect = False
else:
st.session_state.page = 1
st.session_state.detect = False
st.session_state.tmpFile = st.session_state.file
......@@ -220,7 +228,7 @@ if st.session_state.page == 0:
if st.session_state.page == 1:
if st.session_state.submit:
if st.session_state.submit and st.session_state.srcLang != st.session_state.destLang:
st.session_state.page = 2
st.session_state.submit = False
else:
......@@ -232,3 +240,10 @@ if st.session_state.page == 2:
name = st.session_state.tmpFile.name
st.download_button(name,
tsv, name, on_click=resetPage())
if st.session_state.page == 3:
st.write(
st.session_state.general_text_dict['sameLanguages'] + list(st.session_state.languages.keys())[0])
st.session_state.languages = {}
st.button(
st.session_state.general_text_dict['anotherFile'], on_click=resetPage())
......@@ -197,5 +197,5 @@ if st.session_state.page == 1:
st.write(st.session_state.general_text_dict['new_file'])
st.session_state.submit = False
if st.session_state.warning != "":
st.error(st.session_state.warning)
st.info(st.session_state.warning)
st.download_button(name, txt, name, on_click=setPage())
......@@ -28,7 +28,6 @@ def ytbSearch(search, n):
result = videosSearch.result()["result"]
videos = []
while len(videos) < n:
print(len(videos))
for video in result:
id = video["id"]
title = video["title"]
......@@ -168,17 +167,19 @@ def correctTranscript(transcript):
def transcriptToTsv(search, nbVideos):
tsv = "authors\tsource\tpublication_year\tpublication_month\tpublication_day\ttitle\tabstract\n"
dict = st.session_state.general_text_dict
with st.spinner(dict['loadingID']):
if st.session_state.manualOnly:
videos = ytbSearch(search, nbVideos * 20)
else:
videos = ytbSearch(search, nbVideos * 4)
count = 0
countManual = 0
bar = st.progress(count / nbVideos, "Search videos : " +
str(count) + " out of " + str(nbVideos))
bar = st.progress(count / nbVideos, dict['loading'] +
str(count) + dict['quantity'] + str(nbVideos))
for video in videos:
bar.progress(count / nbVideos, "Search videos : " +
str(count) + " out of " + str(nbVideos))
bar.progress(count / nbVideos, dict['loading'] +
str(count) + dict['quantity'] + str(nbVideos))
if count == nbVideos:
break
id, author, title = video[0], video[1], video[2]
......@@ -237,6 +238,7 @@ def askVideos():
if st.session_state.page == 0:
st.write(st.session_state.general_text_dict['text'])
if st.session_state.submit:
st.session_state.submit = False
if st.session_state.keywords != "":
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment