Spaces:
Sleeping
Sleeping
Improved feedback to the user
Browse files- gradio_app.py +15 -7
- src/translate_any_doc.py +19 -7
gradio_app.py
CHANGED
|
@@ -13,17 +13,22 @@ translator = SalamandraTA7bTranslator(hf_token)
|
|
| 13 |
|
| 14 |
def upload_file(filepath, source_lang, target_lang):
|
| 15 |
aligner = Aligner(config_folder, source_lang, target_lang, temp_folder)
|
| 16 |
-
translated_file_name
|
| 17 |
-
|
| 18 |
-
gr.
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
def before_processing():
|
| 23 |
return [
|
| 24 |
gr.UploadButton(visible=False),
|
| 25 |
-
gr.
|
| 26 |
-
# Keep download hidden until processing finishes
|
| 27 |
]
|
| 28 |
|
| 29 |
|
|
@@ -42,8 +47,11 @@ with gr.Blocks() as demo:
|
|
| 42 |
with gr.Row():
|
| 43 |
u = gr.UploadButton("Upload a file", file_count="single")
|
| 44 |
d = gr.DownloadButton("Download the file", visible=False)
|
|
|
|
| 45 |
|
| 46 |
-
u.upload(fn=before_processing, inputs=None, outputs=[u,
|
|
|
|
|
|
|
| 47 |
d.click(download_file, None, [u, d])
|
| 48 |
if __name__ == "__main__":
|
| 49 |
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
| 13 |
|
| 14 |
def upload_file(filepath, source_lang, target_lang):
|
| 15 |
aligner = Aligner(config_folder, source_lang, target_lang, temp_folder)
|
| 16 |
+
for status, translated_file_name in translate_document(filepath, source_lang, target_lang, translator, aligner):
|
| 17 |
+
if translated_file_name: # finished
|
| 18 |
+
yield [gr.UploadButton(visible=False),
|
| 19 |
+
gr.DownloadButton(label=f"Download {translated_file_name}", value=translated_file_name,
|
| 20 |
+
visible=True, interactive=True),
|
| 21 |
+
gr.Textbox(visible=False)]
|
| 22 |
+
else:
|
| 23 |
+
yield [gr.UploadButton(visible=False),
|
| 24 |
+
gr.DownloadButton(visible=False),
|
| 25 |
+
gr.Textbox(value=status, visible=True)]
|
| 26 |
|
| 27 |
|
| 28 |
def before_processing():
|
| 29 |
return [
|
| 30 |
gr.UploadButton(visible=False),
|
| 31 |
+
gr.Textbox(value="Processing...", visible=True),
|
|
|
|
| 32 |
]
|
| 33 |
|
| 34 |
|
|
|
|
| 47 |
with gr.Row():
|
| 48 |
u = gr.UploadButton("Upload a file", file_count="single")
|
| 49 |
d = gr.DownloadButton("Download the file", visible=False)
|
| 50 |
+
status_text = gr.Textbox(label="Status", visible=False)
|
| 51 |
|
| 52 |
+
u.upload(fn=before_processing, inputs=None, outputs=[u, status_text]).then(upload_file,
|
| 53 |
+
[u, dropdown1, dropdown2],
|
| 54 |
+
[u, d, status_text])
|
| 55 |
d.click(download_file, None, [u, d])
|
| 56 |
if __name__ == "__main__":
|
| 57 |
demo.launch(server_name="0.0.0.0", server_port=7860)
|
src/translate_any_doc.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import shutil
|
| 2 |
import string
|
|
|
|
| 3 |
import time
|
| 4 |
import os
|
| 5 |
from itertools import groupby
|
|
@@ -12,6 +13,8 @@ import glob
|
|
| 12 |
import spacy
|
| 13 |
from spacy.tokens import Doc
|
| 14 |
|
|
|
|
|
|
|
| 15 |
import tqdm
|
| 16 |
|
| 17 |
# Load multilingual model to use as sentence tokenizer
|
|
@@ -366,7 +369,8 @@ def translate_document(input_file: str, source_lang: str, target_lang: str,
|
|
| 366 |
translator,
|
| 367 |
aligner: Aligner,
|
| 368 |
temp_folder: str = "tmp",
|
| 369 |
-
tikal_folder: str = "okapi-apps_gtk2-linux-x86_64_1.47.0", with_format: bool = True) -> str
|
|
|
|
| 370 |
input_filename = input_file.split("/")[-1]
|
| 371 |
os.makedirs(temp_folder, exist_ok=True)
|
| 372 |
|
|
@@ -390,20 +394,28 @@ def translate_document(input_file: str, source_lang: str, target_lang: str,
|
|
| 390 |
original_spacing += spaces
|
| 391 |
|
| 392 |
translated_sentences = []
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
|
|
|
|
|
|
| 396 |
text = Doc(spacy_nlp.vocab, words=[token["text"] for token in sentence], spaces=spacing).text
|
| 397 |
|
| 398 |
while True:
|
| 399 |
try:
|
| 400 |
translated_sentences.append(translator.translate(text, source_lang, target_lang))
|
| 401 |
break
|
| 402 |
-
except:
|
| 403 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 404 |
|
| 405 |
# time to align the translation with the original
|
| 406 |
print("Generating alignments...")
|
|
|
|
| 407 |
start_time = time.time()
|
| 408 |
translated_sentences_with_style, translated_sentences_spacing = generate_alignments(
|
| 409 |
original_tokenized_sentences_with_style,
|
|
@@ -468,4 +480,4 @@ def translate_document(input_file: str, source_lang: str, target_lang: str,
|
|
| 468 |
translated_file_path = re.search(r'(?<=Output:\s)(.*)', output)[0]
|
| 469 |
|
| 470 |
print(f"Saved file in {translated_file_path}")
|
| 471 |
-
|
|
|
|
| 1 |
import shutil
|
| 2 |
import string
|
| 3 |
+
import sys
|
| 4 |
import time
|
| 5 |
import os
|
| 6 |
from itertools import groupby
|
|
|
|
| 13 |
import spacy
|
| 14 |
from spacy.tokens import Doc
|
| 15 |
|
| 16 |
+
from gradio_client.exceptions import AppError
|
| 17 |
+
|
| 18 |
import tqdm
|
| 19 |
|
| 20 |
# Load multilingual model to use as sentence tokenizer
|
|
|
|
| 369 |
translator,
|
| 370 |
aligner: Aligner,
|
| 371 |
temp_folder: str = "tmp",
|
| 372 |
+
tikal_folder: str = "okapi-apps_gtk2-linux-x86_64_1.47.0", with_format: bool = True) -> (str,
|
| 373 |
+
str):
|
| 374 |
input_filename = input_file.split("/")[-1]
|
| 375 |
os.makedirs(temp_folder, exist_ok=True)
|
| 376 |
|
|
|
|
| 394 |
original_spacing += spaces
|
| 395 |
|
| 396 |
translated_sentences = []
|
| 397 |
+
yield "Translating 0%...", None
|
| 398 |
+
total = len(original_tokenized_sentences_with_style)
|
| 399 |
+
pbar = tqdm.tqdm(desc="Translating paragraphs...", total=total)
|
| 400 |
+
|
| 401 |
+
for i, (sentence, spacing) in enumerate(zip(original_tokenized_sentences_with_style, original_spacing)):
|
| 402 |
text = Doc(spacy_nlp.vocab, words=[token["text"] for token in sentence], spaces=spacing).text
|
| 403 |
|
| 404 |
while True:
|
| 405 |
try:
|
| 406 |
translated_sentences.append(translator.translate(text, source_lang, target_lang))
|
| 407 |
break
|
| 408 |
+
except AppError as e:
|
| 409 |
+
print(e)
|
| 410 |
+
sys.exit()
|
| 411 |
+
|
| 412 |
+
pbar.update(1)
|
| 413 |
+
percent_complete = int(((i + 1) / total) * 100)
|
| 414 |
+
yield f"Translating {percent_complete}%...", None
|
| 415 |
|
| 416 |
# time to align the translation with the original
|
| 417 |
print("Generating alignments...")
|
| 418 |
+
yield "Aligning...", None
|
| 419 |
start_time = time.time()
|
| 420 |
translated_sentences_with_style, translated_sentences_spacing = generate_alignments(
|
| 421 |
original_tokenized_sentences_with_style,
|
|
|
|
| 480 |
translated_file_path = re.search(r'(?<=Output:\s)(.*)', output)[0]
|
| 481 |
|
| 482 |
print(f"Saved file in {translated_file_path}")
|
| 483 |
+
yield "", translated_file_path
|