Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| import os | |
| import re | |
| import gradio as gr | |
| from deep_translator import GoogleTranslator | |
| from sentence_transformers import SentenceTransformer, util | |
| # model load (first run may take 1-2 minutes) | |
| embed_model = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2") | |
| def clean(s: str) -> str: | |
| if not s: | |
| return "" | |
| return re.sub(r"\s+", " ", s).strip() | |
| def ko2en(text: str) -> str: | |
| return GoogleTranslator(source="ko", target="en").translate(text) | |
| def en2ko(text: str) -> str: | |
| return GoogleTranslator(source="en", target="ko").translate(text) | |
| def sim_ko(a: str, b: str) -> float: | |
| if not a or not b: | |
| return 0.0 | |
| v = embed_model.encode([a, b], convert_to_tensor=True) | |
| return float(util.cos_sim(v[0], v[1]).item()) * 100.0 | |
| # tone rules (EN / KO) | |
| def tone_en(text: str, tone: str) -> str: | |
| t = clean(text) | |
| if tone == "์ฟจํค": | |
| t = re.sub(r"\b(very|really|so|truly)\b\s*", "", t, flags=re.I) | |
| t = re.sub(r"\b(Thank you so much|Thanks a lot)\b", "Thank you", t, flags=re.I) | |
| elif tone == "์ํค": | |
| if re.search(r"\b(Thank you|Thanks)\b", t, re.I) and not re.search(r"\bappreciat(e|ion)\b", t, re.I): | |
| t += " I sincerely appreciate your support." | |
| if not re.search(r"\bplease\b", t, re.I) and t: | |
| t = "Please " + t[0].lower() + t[1:] | |
| elif tone == "๋น์ฆ๋์คํค": | |
| if not t.endswith("."): | |
| t += "." | |
| if not re.search(r"\b(Please let me know|I look forward to)\b", t, re.I): | |
| t += " Please let me know if you need any further information." | |
| return t | |
| def tone_ko(text: str, tone: str) -> str: | |
| t = clean(text) | |
| if tone == "์ฟจํค": | |
| t = re.sub(r"(์ ๋ง|์ง์ฌ์ผ๋ก|๋๋ฌด|๋งค์ฐ)\s*", "", t) | |
| t = re.sub(r"(๋ถํ๋๋ฆฝ๋๋ค|๊ฐ์ฌ๋๋ฆฝ๋๋ค)", "๊ฐ์ฌํฉ๋๋ค", t) | |
| elif tone == "์ํค": | |
| if "๊ฐ์ฌ" in t and "์ง์ฌ" not in t: | |
| t = t.replace("๊ฐ์ฌํฉ๋๋ค", "์ง์ฌ์ผ๋ก ๊ฐ์ฌํฉ๋๋ค") | |
| if not re.search(r"(๋ถํ๋๋ฆฝ๋๋ค|๋์์ฃผ์๋ฉด ๊ฐ์ฌํ๊ฒ ์ต๋๋ค)", t): | |
| t += " ๋์์ฃผ์๋ฉด ๊ฐ์ฌํ๊ฒ ์ต๋๋ค." | |
| elif tone == "๋น์ฆ๋์คํค": | |
| t = re.sub(r"(๊ณ ๋ง์์|๊ณ ๋ง์|๊ฐ์ฌํด์)", "๊ฐ์ฌํฉ๋๋ค", t) | |
| if not (t.endswith("์ต๋๋ค.") or t.endswith("๋๋ฆฝ๋๋ค.") or t.endswith("์.")): | |
| t += " ๊ฐ์ฌํฉ๋๋ค." | |
| return t | |
| def td_translate(ko_text: str, tone: str, target: float = 98.2): | |
| ko_text = clean(ko_text) | |
| if not ko_text: | |
| return "์๋ฌธ์ ์ ๋ ฅํ์ธ์.", "", "", "", "", 0.0, "" | |
| try: | |
| en = ko2en(ko_text) | |
| back = en2ko(en) | |
| except Exception as e: | |
| return f"[๋ฒ์ญ ์ค๋ฅ] {e}", "", "", "", "", 0.0, "" | |
| sim = sim_ko(ko_text, back) | |
| en_tuned = tone_en(en, tone) | |
| ko_tuned = tone_ko(back, tone) | |
| ok = sim >= float(target) | |
| flag = "๊ธฐ์ค ์ถฉ์กฑ" if ok else "๊ธฐ์ค ๋ฏธ๋ฌ (ํํ์ ๊ตฌ์ฒดํํด ๋ณด์ธ์)" | |
| final = f"{en_tuned}\n\n{flag} (์ ์ฌ๋: {sim:.2f}%, ๊ธฐ์ค: {float(target):.1f}%)" | |
| guide = f"ํค: {tone} | KO->EN->KO ์ฌ๋ฒ์ญ ๊ธฐ๋ฐ ์๋ฏธ ๊ฒ์ฆ" | |
| return "", en, back, en_tuned, ko_tuned, sim, final + "\n" + guide | |
| with gr.Blocks(title="Tha Deeply Translator") as app: | |
| gr.Markdown("Tha Deeply โ ํค ์ค์์น / ์ฌ๋ฒ์ญ / ์ ์ฌ๋ ๋ถ์๊ธฐ") | |
| default_tone = "๋น์ฆ๋์คํค" | |
| default_threshold = 98.2 | |
| default_text = "์ ๋ง ๊ณ ๋ง์ต๋๋ค. ๋ด์ผ ์ค์ ๊น์ง ํ์ ๋ถํ๋๋ฆฝ๋๋ค." | |
| with gr.Row(): | |
| tone = gr.Radio(["์ฟจํค", "์ํค", "๋น์ฆ๋์คํค"], value=default_tone, label="ํค ์ ํ") | |
| thres = gr.Slider(0, 100, value=default_threshold, step=0.1, label="์ ์ฌ๋ ๊ธฐ์ค(%)") | |
| ko_in = gr.Textbox(lines=4, label="์๋ฌธ(ํ๊ตญ์ด)", value=default_text, placeholder="ํ๊ตญ์ด ๋ฌธ์ฅ์ ์ ๋ ฅํ์ธ์") | |
| run = gr.Button("์คํ") | |
| en_out = gr.Textbox(lines=4, label="1) ์๋ฌธ ๋ฒ์ญ(EN)") | |
| ko_back = gr.Textbox(lines=4, label="2) ์ฌ๋ฒ์ญ(KO)") | |
| en_tone = gr.Textbox(lines=4, label="3) ํค ๋ณด์ ์๋ฌธ(EN)") | |
| ko_tone = gr.Textbox(lines=4, label="4) ํค ๋ณด์ ํ๊ธ(KO)") | |
| sim = gr.Number(label="์๋ฏธ ์ ์ฌ๋(%)", precision=2) | |
| final = gr.Textbox(lines=6, label="์์ ํ ์์ด ์ ์ + ์ํ") | |
| def _run(ko_text, t, thr): | |
| msg, en, back, en_tuned, ko_tuned, simv, fin = td_translate(ko_text, t, thr) | |
| if msg: | |
| return "", "", "", "", 0.0, msg | |
| return en, back, en_tuned, ko_tuned, simv, fin | |
| run.click(_run, [ko_in, tone, thres], | |
| [en_out, ko_back, en_tone, ko_tone, sim, final]) | |
| if __name__ == "__main__": | |
| port = int(os.getenv("PORT", "7860")) | |
| app.launch(server_name="0.0.0.0", server_port=port) |