File size: 4,764 Bytes
713d093
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7edd82
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# -*- coding: utf-8 -*-

import os
import re
import gradio as gr
from deep_translator import GoogleTranslator
from sentence_transformers import SentenceTransformer, util

# model load (first run may take 1-2 minutes)
embed_model = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")

def clean(s: str) -> str:
    if not s:
        return ""
    return re.sub(r"\s+", " ", s).strip()

def ko2en(text: str) -> str:
    return GoogleTranslator(source="ko", target="en").translate(text)

def en2ko(text: str) -> str:
    return GoogleTranslator(source="en", target="ko").translate(text)

def sim_ko(a: str, b: str) -> float:
    if not a or not b:
        return 0.0
    v = embed_model.encode([a, b], convert_to_tensor=True)
    return float(util.cos_sim(v[0], v[1]).item()) * 100.0

# tone rules (EN / KO)
def tone_en(text: str, tone: str) -> str:
    t = clean(text)
    if tone == "์ฟจํ†ค":
        t = re.sub(r"\b(very|really|so|truly)\b\s*", "", t, flags=re.I)
        t = re.sub(r"\b(Thank you so much|Thanks a lot)\b", "Thank you", t, flags=re.I)
    elif tone == "์›œํ†ค":
        if re.search(r"\b(Thank you|Thanks)\b", t, re.I) and not re.search(r"\bappreciat(e|ion)\b", t, re.I):
            t += " I sincerely appreciate your support."
        if not re.search(r"\bplease\b", t, re.I) and t:
            t = "Please " + t[0].lower() + t[1:]
    elif tone == "๋น„์ฆˆ๋‹ˆ์Šคํ†ค":
        if not t.endswith("."):
            t += "."
        if not re.search(r"\b(Please let me know|I look forward to)\b", t, re.I):
            t += " Please let me know if you need any further information."
    return t

def tone_ko(text: str, tone: str) -> str:
    t = clean(text)
    if tone == "์ฟจํ†ค":
        t = re.sub(r"(์ •๋ง|์ง„์‹ฌ์œผ๋กœ|๋„ˆ๋ฌด|๋งค์šฐ)\s*", "", t)
        t = re.sub(r"(๋ถ€ํƒ๋“œ๋ฆฝ๋‹ˆ๋‹ค|๊ฐ์‚ฌ๋“œ๋ฆฝ๋‹ˆ๋‹ค)", "๊ฐ์‚ฌํ•ฉ๋‹ˆ๋‹ค", t)
    elif tone == "์›œํ†ค":
        if "๊ฐ์‚ฌ" in t and "์ง„์‹ฌ" not in t:
            t = t.replace("๊ฐ์‚ฌํ•ฉ๋‹ˆ๋‹ค", "์ง„์‹ฌ์œผ๋กœ ๊ฐ์‚ฌํ•ฉ๋‹ˆ๋‹ค")
        if not re.search(r"(๋ถ€ํƒ๋“œ๋ฆฝ๋‹ˆ๋‹ค|๋„์™€์ฃผ์‹œ๋ฉด ๊ฐ์‚ฌํ•˜๊ฒ ์Šต๋‹ˆ๋‹ค)", t):
            t += " ๋„์™€์ฃผ์‹œ๋ฉด ๊ฐ์‚ฌํ•˜๊ฒ ์Šต๋‹ˆ๋‹ค."
    elif tone == "๋น„์ฆˆ๋‹ˆ์Šคํ†ค":
        t = re.sub(r"(๊ณ ๋งˆ์›Œ์š”|๊ณ ๋งˆ์›Œ|๊ฐ์‚ฌํ•ด์š”)", "๊ฐ์‚ฌํ•ฉ๋‹ˆ๋‹ค", t)
        if not (t.endswith("์Šต๋‹ˆ๋‹ค.") or t.endswith("๋“œ๋ฆฝ๋‹ˆ๋‹ค.") or t.endswith("์š”.")):
            t += " ๊ฐ์‚ฌํ•ฉ๋‹ˆ๋‹ค."
    return t

def td_translate(ko_text: str, tone: str, target: float = 98.2):
    ko_text = clean(ko_text)
    if not ko_text:
        return "์›๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”.", "", "", "", "", 0.0, ""
    try:
        en = ko2en(ko_text)
        back = en2ko(en)
    except Exception as e:
        return f"[๋ฒˆ์—ญ ์˜ค๋ฅ˜] {e}", "", "", "", "", 0.0, ""

    sim = sim_ko(ko_text, back)
    en_tuned = tone_en(en, tone)
    ko_tuned = tone_ko(back, tone)
    ok = sim >= float(target)
    flag = "๊ธฐ์ค€ ์ถฉ์กฑ" if ok else "๊ธฐ์ค€ ๋ฏธ๋‹ฌ (ํ‘œํ˜„์„ ๊ตฌ์ฒดํ™”ํ•ด ๋ณด์„ธ์š”)"
    final = f"{en_tuned}\n\n{flag} (์œ ์‚ฌ๋„: {sim:.2f}%, ๊ธฐ์ค€: {float(target):.1f}%)"
    guide = f"ํ†ค: {tone} | KO->EN->KO ์žฌ๋ฒˆ์—ญ ๊ธฐ๋ฐ˜ ์˜๋ฏธ ๊ฒ€์ฆ"
    return "", en, back, en_tuned, ko_tuned, sim, final + "\n" + guide

with gr.Blocks(title="Tha Deeply Translator") as app:
    gr.Markdown("Tha Deeply โ€” ํ†ค ์Šค์œ„์น˜ / ์žฌ๋ฒˆ์—ญ / ์œ ์‚ฌ๋„ ๋ถ„์„๊ธฐ")

    default_tone = "๋น„์ฆˆ๋‹ˆ์Šคํ†ค"
    default_threshold = 98.2
    default_text = "์ •๋ง ๊ณ ๋ง™์Šต๋‹ˆ๋‹ค. ๋‚ด์ผ ์˜ค์ „๊นŒ์ง€ ํšŒ์‹  ๋ถ€ํƒ๋“œ๋ฆฝ๋‹ˆ๋‹ค."

    with gr.Row():
        tone = gr.Radio(["์ฟจํ†ค", "์›œํ†ค", "๋น„์ฆˆ๋‹ˆ์Šคํ†ค"], value=default_tone, label="ํ†ค ์„ ํƒ")
        thres = gr.Slider(0, 100, value=default_threshold, step=0.1, label="์œ ์‚ฌ๋„ ๊ธฐ์ค€(%)")
    ko_in = gr.Textbox(lines=4, label="์›๋ฌธ(ํ•œ๊ตญ์–ด)", value=default_text, placeholder="ํ•œ๊ตญ์–ด ๋ฌธ์žฅ์„ ์ž…๋ ฅํ•˜์„ธ์š”")
    run = gr.Button("์‹คํ–‰")

    en_out = gr.Textbox(lines=4, label="1) ์˜๋ฌธ ๋ฒˆ์—ญ(EN)")
    ko_back = gr.Textbox(lines=4, label="2) ์žฌ๋ฒˆ์—ญ(KO)")
    en_tone = gr.Textbox(lines=4, label="3) ํ†ค ๋ณด์ • ์˜๋ฌธ(EN)")
    ko_tone = gr.Textbox(lines=4, label="4) ํ†ค ๋ณด์ • ํ•œ๊ธ€(KO)")
    sim = gr.Number(label="์˜๋ฏธ ์œ ์‚ฌ๋„(%)", precision=2)
    final = gr.Textbox(lines=6, label="์™„์ „ํ•œ ์˜์–ด ์ œ์•ˆ + ์ƒํƒœ")

    def _run(ko_text, t, thr):
        msg, en, back, en_tuned, ko_tuned, simv, fin = td_translate(ko_text, t, thr)
        if msg:
            return "", "", "", "", 0.0, msg
        return en, back, en_tuned, ko_tuned, simv, fin

    run.click(_run, [ko_in, tone, thres],
              [en_out, ko_back, en_tone, ko_tone, sim, final])

if __name__ == "__main__":
    port = int(os.getenv("PORT", "7860"))
    app.launch(server_name="0.0.0.0", server_port=port)