Commit ff89fe15 authored by Atrax Nicolas's avatar Atrax Nicolas

Update pages

parent 1c63845c
......@@ -29,11 +29,11 @@ en,warning,"Warning ! Multiple languages have been detected at the source : "
en,warning2,"The following languages have been detected : "
fr,globalWarning, "Attention ! Plusieurs langues ont été détectées entre vos pdf ! Les langues suivantes ont été détectées : "
en,globalWarning,"Warning ! Multiple languages have been detected for your pdfs file ! The following languages have been detected : "
fr,globalWarning, "Attention ! Plusieurs langues ont été détectées entre vos pdf !\nLes langues suivantes ont été détectées : "
en,globalWarning,"Warning ! Multiple languages have been detected for your pdfs file !\nThe following languages have been detected : "
fr,advice,"Cela pourrait affecter massivement l'analyse de GarganText. Vous pouvez régler ça en traduisant avec l'outil TsvTranslator."
fr,advice,"Cela pourrait affecter massivement l'analyse de GarganText.\nVous pouvez régler ça en traduisant avec l'outil TsvTranslator."
en,advice,"This could massively affect the analysis of Gargantext.\nYou can correct this by translation with the TsvTranslator tool."
......@@ -3,22 +3,23 @@ Streamlit Application
Nicolas Atrax
"""
import streamlit as st
import pandas as pd
import chardet
import re
from datetime import date
import codecs
import os
import tempfile
import zipfile
import shutil
import zipfile
import tempfile
import os
import codecs
from datetime import date
import re
import chardet
import pandas as pd
import streamlit as st
from lib.tika.tika import parser
from lib.langdetect.langdetect import detect
from lib.langdetect.langdetect.lang_detect_exception import LangDetectException
import src.basic as tmp
os.environ['TIKA_SERVER_JAR'] = 'https://repo1.maven.org/maven2/org/apache/tika/tika-server/1.19/tika-server-1.19.jar'
tmp.base("PDFtoTSV")
# Tool Code Start
......@@ -83,7 +84,7 @@ def detectMultipleLanguages(languages, fileName):
detected += l + " : " + str(languages[l]) + "%"
valuable.append(l)
if len(valuable) > 1:
st.session_state.warning += st.session_state.general_text_dict['warning'] + \
st.session_state.warning += st.session_state.general_text_dict['warning'] + "\"" + \
fileName + "\" !\n"
st.session_state.warning += st.session_state.general_text_dict['warning2'] + \
detected + " \n"
......@@ -96,9 +97,9 @@ def detectMultiplePdfLanguages():
languages = []
for l in st.session_state.pdfLanguages.values():
if l not in languages and len(languages) == 1:
st.write(st.session_state.general_text_dict['globalWarning'])
st.write(str(st.session_state.pdfLanguages))
st.write(st.session_state.general_text_dict['advice'])
st.error(st.session_state.general_text_dict['globalWarning'])
st.error(str(st.session_state.pdfLanguages))
st.error(st.session_state.general_text_dict['advice'])
return
if len(languages) == 0:
languages.append(l)
......@@ -193,24 +194,6 @@ def extractAllPDF(zipDir, zipFile):
if ".pdf" in info.filename:
zipRef.extract(info, zipDir)
def convertAllPDF(zipFile, zipDir):
pdfLanguages = {}
tsv = "authors\tsource\tpublication_year\tpublication_month\tpublication_day\ttitle\tabstract\n"
with tempfile.TemporaryDirectory() as pdfDir:
st.session_state.dir = pdfDir
extractAllPDF(zipDir, zipFile)
for file in os.listdir(zipDir):
if ".pdf" in file:
tmp, languages = txtToTSV(
file, zipDir + '/' + file, pdfDir)
tsv += "\n" + tmp
with open(pdfDir + "/final.tsv", "w", encoding='utf-8-sig') as file:
file.write(tsv)
shutil.make_archive(zipDir + "/PDFCompilation", 'zip', pdfDir)
# Tool Code End
......@@ -306,7 +289,8 @@ if st.session_state.page == 3:
shutil.make_archive(st.session_state.zipDir.name +
"/PDFCompilation", 'zip', st.session_state.pdfDir.name)
with open(st.session_state.zipDir.name + "/PDFCompilation.zip", 'rb') as zip:
st.write(st.session_state.warning)
if st.session_state.warning != "":
st.error(st.session_state.warning)
detectMultiplePdfLanguages()
st.write(st.session_state.general_text_dict['new_file'])
st.download_button("PDFCompilation.zip",
......
......@@ -3,6 +3,8 @@ Streamlit Application
Nicolas Atrax
"""
import src.basic as tmp
from lib.tika.tika import parser
import streamlit as st
import pandas as pd
import chardet
......@@ -11,8 +13,6 @@ from datetime import date
import codecs
import os
import tempfile
from lib.tika.tika import parser
import src.basic as tmp
os.environ['TIKA_SERVER_JAR'] = 'https://repo1.maven.org/maven2/org/apache/tika/tika-server/1.19/tika-server-1.19.jar'
tmp.base("PDFtoTXT")
......
......@@ -61,7 +61,7 @@ def detectMultipleLanguages(languages, fileName):
detected += l + " : " + str(languages[l]) + "%"
valuable.append(l)
if len(valuable) > 1:
st.session_state.warning += st.session_state.general_text_dict['warning'] + \
st.session_state.warning += st.session_state.general_text_dict['warning'] + "\"" + \
fileName + "\" !\n"
st.session_state.warning += st.session_state.general_text_dict['warning2'] + \
detected + " \n"
......@@ -145,6 +145,9 @@ form = st.form('api')
if 'page' not in st.session_state:
st.session_state.page = 0
if 'warning' not in st.session_state:
st.session_state.warning = ""
def setSubmit():
st.session_state.submit = True
......@@ -193,4 +196,6 @@ if st.session_state.page == 1:
name = st.session_state.file.name.split('.')[0] + '.tsv'
st.write(st.session_state.general_text_dict['new_file'])
st.session_state.submit = False
if st.session_state.warning != "":
st.error(st.session_state.warning)
st.download_button(name, txt, name, on_click=setPage())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment