Spaces:
Sleeping
Sleeping
added language dropdown menus to document translation tab
Browse files- gradio_app.py +23 -12
- src/translate_any_doc.py +1 -1
gradio_app.py
CHANGED
|
@@ -1,37 +1,48 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from src.translate_any_doc import translate_document
|
|
|
|
|
|
|
| 3 |
from src.aligner import Aligner
|
| 4 |
from nltk.tokenize.treebank import TreebankWordDetokenizer
|
| 5 |
|
| 6 |
-
|
| 7 |
-
ip='10.192.31.127'
|
| 8 |
config_folder = 'fast_align_config'
|
| 9 |
-
source_lang = 'en'
|
| 10 |
-
target_lang = 'ca'
|
| 11 |
temp_folder = 'tmp'
|
| 12 |
-
|
|
|
|
| 13 |
detokenizer = TreebankWordDetokenizer()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
-
def upload_file(filepath):
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
def download_file():
|
| 21 |
return [gr.UploadButton(visible=True), gr.DownloadButton(visible=False)]
|
| 22 |
|
| 23 |
|
| 24 |
with gr.Blocks() as demo:
|
| 25 |
-
|
| 26 |
with gr.Tab("Text"):
|
| 27 |
-
gr.Interface(fn=translate, inputs=["text","text","text"], outputs="text")
|
| 28 |
with gr.Tab("Docx documents"):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
gr.Markdown("First upload a file and and then you'll be able download it (but only once!)")
|
| 30 |
with gr.Row():
|
| 31 |
u = gr.UploadButton("Upload a file", file_count="single")
|
| 32 |
d = gr.DownloadButton("Download the file", visible=False)
|
| 33 |
|
| 34 |
-
u.upload(upload_file, u, [u, d])
|
| 35 |
d.click(download_file, None, [u, d])
|
| 36 |
if __name__ == "__main__":
|
| 37 |
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from src.translate_any_doc import translate_document
|
| 3 |
+
from src.salamandraTA7b_translator import SalamandraTA7bTranslator
|
| 4 |
+
from src.mtuoc_aina_translator import MTUOCAinaTranslator
|
| 5 |
from src.aligner import Aligner
|
| 6 |
from nltk.tokenize.treebank import TreebankWordDetokenizer
|
| 7 |
|
|
|
|
|
|
|
| 8 |
config_folder = 'fast_align_config'
|
|
|
|
|
|
|
| 9 |
temp_folder = 'tmp'
|
| 10 |
+
hf_token = ""
|
| 11 |
+
|
| 12 |
detokenizer = TreebankWordDetokenizer()
|
| 13 |
+
translator = SalamandraTA7bTranslator(hf_token)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
# ip = ''
|
| 17 |
+
# port = ''
|
| 18 |
+
# translator = MTUOCAinaTranslator(ip, port)
|
| 19 |
|
| 20 |
|
| 21 |
+
def upload_file(filepath, source_lang, target_lang):
|
| 22 |
+
aligner = Aligner(config_folder, source_lang, target_lang, temp_folder)
|
| 23 |
+
translated_file_name = translate_document(filepath, source_lang, target_lang, translator, aligner, detokenizer)
|
| 24 |
+
return [gr.UploadButton(visible=False),
|
| 25 |
+
gr.DownloadButton(label=f"Download {translated_file_name}", value=translated_file_name, visible=True)]
|
| 26 |
+
|
| 27 |
|
| 28 |
def download_file():
|
| 29 |
return [gr.UploadButton(visible=True), gr.DownloadButton(visible=False)]
|
| 30 |
|
| 31 |
|
| 32 |
with gr.Blocks() as demo:
|
|
|
|
| 33 |
with gr.Tab("Text"):
|
| 34 |
+
gr.Interface(fn=translator.translate, inputs=["text", "text", "text"], outputs="text")
|
| 35 |
with gr.Tab("Docx documents"):
|
| 36 |
+
with gr.Row():
|
| 37 |
+
dropdown1 = gr.Dropdown(label="Source language", choices=["en", "ca"], value=None,
|
| 38 |
+
interactive=True)
|
| 39 |
+
dropdown2 = gr.Dropdown(label="Target language", choices=["en", "ca"], value=None, interactive=True)
|
| 40 |
gr.Markdown("First upload a file and and then you'll be able download it (but only once!)")
|
| 41 |
with gr.Row():
|
| 42 |
u = gr.UploadButton("Upload a file", file_count="single")
|
| 43 |
d = gr.DownloadButton("Download the file", visible=False)
|
| 44 |
|
| 45 |
+
u.upload(upload_file, [u, dropdown1, dropdown2], [u, d])
|
| 46 |
d.click(download_file, None, [u, d])
|
| 47 |
if __name__ == "__main__":
|
| 48 |
demo.launch()
|
src/translate_any_doc.py
CHANGED
|
@@ -44,7 +44,7 @@ def doc_to_plain_text(input_file: str, source_lang: str, target_lang: str, tikal
|
|
| 44 |
return os.path.join(original_xliff_file_path + f".{source_lang}")
|
| 45 |
|
| 46 |
|
| 47 |
-
def get_runs_from_paragraph(text: str, paragraph_index: int) ->
|
| 48 |
"""
|
| 49 |
Given some text that may or may not contain some chunks tagged with something like <g id=1> </g>, extract each
|
| 50 |
of the runs of text and convert them into dictionaries to keep this information
|
|
|
|
| 44 |
return os.path.join(original_xliff_file_path + f".{source_lang}")
|
| 45 |
|
| 46 |
|
| 47 |
+
def get_runs_from_paragraph(text: str, paragraph_index: int) -> list[dict[str, str | tuple[str, ...]]]:
|
| 48 |
"""
|
| 49 |
Given some text that may or may not contain some chunks tagged with something like <g id=1> </g>, extract each
|
| 50 |
of the runs of text and convert them into dictionaries to keep this information
|