Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,20 +1,16 @@
|
|
| 1 |
-
import io
|
| 2 |
-
import os
|
| 3 |
-
import requests
|
| 4 |
import streamlit as st
|
| 5 |
import pandas as pd
|
| 6 |
import pysrt
|
| 7 |
from transformers import MarianMTModel, MarianTokenizer
|
| 8 |
import tempfile
|
| 9 |
-
|
| 10 |
-
|
| 11 |
|
| 12 |
def fetch_languages(url):
|
| 13 |
response = requests.get(url)
|
| 14 |
if response.status_code == 200:
|
| 15 |
-
# Convert bytes to a string using decode, then create a file-like object with io.StringIO
|
| 16 |
csv_content = response.content.decode('utf-8')
|
| 17 |
-
df = pd.read_csv(
|
| 18 |
df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
|
| 19 |
df['ISO 639-1'] = df['ISO 639-1'].str.strip()
|
| 20 |
language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']} - {row['Language Name']}") for index, row in df.iterrows()]
|
|
@@ -42,19 +38,12 @@ def translate_text(text, source_language_code, target_language_code):
|
|
| 42 |
def translate_srt(input_file, source_language_code, target_language_code):
|
| 43 |
subs = pysrt.open(input_file)
|
| 44 |
translated_subs = []
|
| 45 |
-
progress_bar = st.progress(0)
|
| 46 |
for idx, sub in enumerate(subs):
|
| 47 |
translated_text = translate_text(sub.text, source_language_code, target_language_code)
|
| 48 |
translated_sub = pysrt.SubRipItem(index=idx+1, start=sub.start, end=sub.end, text=translated_text)
|
| 49 |
translated_subs.append(translated_sub)
|
| 50 |
-
progress_bar.progress((idx + 1) / len(subs))
|
| 51 |
translated_file = pysrt.SubRipFile(translated_subs)
|
| 52 |
-
|
| 53 |
-
with tempfile.NamedTemporaryFile(suffix=".srt", delete=False) as tmp_file:
|
| 54 |
-
translated_file.save(tmp_file.name)
|
| 55 |
-
translated_srt_path = tmp_file.name
|
| 56 |
-
progress_bar.empty()
|
| 57 |
-
return translated_srt_path
|
| 58 |
|
| 59 |
st.title("SRT Translator")
|
| 60 |
st.write("Translate subtitles from one language to another.")
|
|
@@ -63,14 +52,29 @@ st.write("Translate subtitles from one language to another.")
|
|
| 63 |
url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
|
| 64 |
language_options = fetch_languages(url)
|
| 65 |
|
| 66 |
-
source_language_code
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
file_input = st.file_uploader("Upload SRT File", type=["srt"])
|
| 70 |
|
| 71 |
-
if file_input is not None:
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
import pysrt
|
| 4 |
from transformers import MarianMTModel, MarianTokenizer
|
| 5 |
import tempfile
|
| 6 |
+
from io import BytesIO
|
| 7 |
+
import requests
|
| 8 |
|
| 9 |
def fetch_languages(url):
|
| 10 |
response = requests.get(url)
|
| 11 |
if response.status_code == 200:
|
|
|
|
| 12 |
csv_content = response.content.decode('utf-8')
|
| 13 |
+
df = pd.read_csv(BytesIO(response.content), delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
|
| 14 |
df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
|
| 15 |
df['ISO 639-1'] = df['ISO 639-1'].str.strip()
|
| 16 |
language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']} - {row['Language Name']}") for index, row in df.iterrows()]
|
|
|
|
| 38 |
def translate_srt(input_file, source_language_code, target_language_code):
|
| 39 |
subs = pysrt.open(input_file)
|
| 40 |
translated_subs = []
|
|
|
|
| 41 |
for idx, sub in enumerate(subs):
|
| 42 |
translated_text = translate_text(sub.text, source_language_code, target_language_code)
|
| 43 |
translated_sub = pysrt.SubRipItem(index=idx+1, start=sub.start, end=sub.end, text=translated_text)
|
| 44 |
translated_subs.append(translated_sub)
|
|
|
|
| 45 |
translated_file = pysrt.SubRipFile(translated_subs)
|
| 46 |
+
return translated_file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
st.title("SRT Translator")
|
| 49 |
st.write("Translate subtitles from one language to another.")
|
|
|
|
| 52 |
url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
|
| 53 |
language_options = fetch_languages(url)
|
| 54 |
|
| 55 |
+
source_language_code, target_language_code = None, None
|
| 56 |
+
|
| 57 |
+
if language_options:
|
| 58 |
+
source_language_code = st.selectbox("Select Source Language", options=language_options, format_func=lambda x: x[1])[0]
|
| 59 |
+
target_language_code = st.selectbox("Select Target Language", options=language_options, format_func=lambda x: x[1])[0]
|
| 60 |
|
| 61 |
file_input = st.file_uploader("Upload SRT File", type=["srt"])
|
| 62 |
|
| 63 |
+
if file_input is not None and source_language_code and target_language_code:
|
| 64 |
+
translated_srt = translate_srt(file_input, source_language_code, target_language_code)
|
| 65 |
+
|
| 66 |
+
# Save the translated subtitles to an in-memory buffer
|
| 67 |
+
buffer = BytesIO()
|
| 68 |
+
translated_srt.save(buffer, encoding='utf-8')
|
| 69 |
+
buffer.seek(0)
|
| 70 |
+
|
| 71 |
+
# Convert the BytesIO buffer to bytes for the download button
|
| 72 |
+
translated_srt_bytes = buffer.getvalue()
|
| 73 |
+
|
| 74 |
+
# Create a download button and serve the translated subtitles as a downloadable file
|
| 75 |
+
st.download_button(
|
| 76 |
+
label="Download Translated SRT",
|
| 77 |
+
data=translated_srt_bytes,
|
| 78 |
+
file_name="translated_subtitles.srt",
|
| 79 |
+
mime="text/plain",
|
| 80 |
+
)
|