Spaces:
Sleeping
Sleeping
| import re | |
| # --- IPA map --- | |
| ipa_map = { | |
| "ng": "ŋ", "ny": "ɲ", "sy": "ʃ", "kh": "x", "c": "tʃ", "j": "dʒ", | |
| "b": "b", "d": "d̪", "t": "t̪", "g": "ɡ", "k": "k", "p": "p", | |
| "m": "m", "n": "n", "l": "l", "s": "s", "h": "h", "r": "r", "w": "w", "y": "j", | |
| "a": "a", "i": "i", "u": "u", "o": "o", "e": "ə" | |
| } | |
| # Sebutan huruf | |
| letter_words = { | |
| "a":"a","b":"be","c":"ce","d":"de","e":"e","f":"ef","g":"ge","h":"ha", | |
| "i":"i","j":"je","k":"ka","l":"el","m":"em","n":"en","o":"o","p":"pe", | |
| "q":"ki","r":"er","s":"es","t":"te","u":"u","v":"fe","w":"we","x":"eks", | |
| "y":"ye","z":"zet" | |
| } | |
| digit_words = { | |
| "0":"nol","1":"satu","2":"dua","3":"tiga","4":"empat", | |
| "5":"lima","6":"enam","7":"tujuh","8":"delapan","9":"sembilan" | |
| } | |
| # --- Number to words (hingga triliun) --- | |
| def number_to_words(n: int) -> str: | |
| n = int(n) | |
| if n == 0: | |
| return "nol" | |
| def _below_thousand(x): | |
| words = ["nol","satu","dua","tiga","empat","lima","enam","tujuh","delapan","sembilan","sepuluh","sebelas"] | |
| if x < 12: return words[x] | |
| if x < 20: return _below_thousand(x-10)+" belas" | |
| if x < 100: | |
| q,r=divmod(x,10); return _below_thousand(q)+" puluh"+((" "+_below_thousand(r)) if r else "") | |
| if x < 200: return "seratus"+((" "+_below_thousand(x-100)) if x>100 else "") | |
| if x < 1000: | |
| q,r=divmod(x,100); return _below_thousand(q)+" ratus"+((" "+_below_thousand(r)) if r else "") | |
| scales=[(1_000_000_000_000,"triliun"),(1_000_000_000,"miliar"),(1_000_000,"juta"),(1000,"ribu")] | |
| parts=[]; remaining=n | |
| for v,nm in scales: | |
| if remaining>=v: | |
| q,remaining=divmod(remaining,v) | |
| if v==1000 and q==1: parts.append("seribu") | |
| else: parts.append(number_to_words(q)+" "+nm) | |
| if remaining: parts.append(_below_thousand(remaining)) | |
| return " ".join(parts) | |
| # --- Nomor HP --- | |
| phone_pattern=re.compile(r'(?<!\w)(?:\+62|\d)\d{7,}(?!\w)') | |
| def expand_phones(text:str)->str: | |
| def repl(m): | |
| digits=re.findall(r'\d',m.group(0)) | |
| return " ".join(digit_words[d] for d in digits) | |
| return phone_pattern.sub(repl,text) | |
| # --- Angka umum --- | |
| def expand_numbers(text:str)->str: | |
| def repl(m): | |
| return number_to_words(int(m.group())) | |
| return re.sub(r'\d+',repl,text) | |
| # --- Singkatan --- | |
| abbr_pattern=re.compile(r'(?<!\w)([A-Z]{2,})(?!\w)') | |
| def expand_abbreviations(text:str)->str: | |
| def repl(m): | |
| token=m.group(1) | |
| if token=="HP": # <-- jangan expand 'HP' kalau berdiri sendiri | |
| return "ha pe" | |
| return " ".join(letter_words[ch.lower()] for ch in token) | |
| return abbr_pattern.sub(repl,text) | |
| # --- IPA --- | |
| def apply_ipa_map(text:str)->str: | |
| t=text.lower() | |
| for k in sorted(ipa_map,key=len,reverse=True): | |
| t=re.sub(re.escape(k),ipa_map[k],t) | |
| return re.sub(r'\s+',' ',t).strip() | |
| # --- Pipeline --- | |
| def indo_to_ipa(text:str)->str: | |
| # 1. nomor HP | |
| step1=expand_phones(text) | |
| # 2. angka biasa | |
| step2=expand_numbers(step1) | |
| # 3. singkatan | |
| step3=expand_abbreviations(step2) | |
| # 4. mapping IPA | |
| return apply_ipa_map(step3) | |
| # import re | |
| # ipa_map = { | |
| # "ng": "ŋ", | |
| # "ny": "ɲ", | |
| # "sy": "ʃ", | |
| # "kh": "x", | |
| # "c": "tʃ", | |
| # "j": "dʒ", | |
| # "y": "j", | |
| # "r": "r", | |
| # "x": "ks", | |
| # "a": "a", | |
| # "i": "i", | |
| # "u": "u", | |
| # "e": "ə", | |
| # "o": "o", | |
| # "b": "b", | |
| # "d": "d̪", | |
| # "t": "t̪", | |
| # "g": "ɡ", | |
| # "k": "k", | |
| # "p": "p", | |
| # "m": "m", | |
| # "n": "n", | |
| # "l": "l", | |
| # "s": "s", | |
| # "h": "h", | |
| # "w": "w", | |
| # } | |
| # num_words = { | |
| # 0: "nol", | |
| # 1: "satu", | |
| # 2: "dua", | |
| # 3: "tiga", | |
| # 4: "empat", | |
| # 5: "lima", | |
| # 6: "enam", | |
| # 7: "tujuh", | |
| # 8: "delapan", | |
| # 9: "sembilan", | |
| # 10: "sepuluh", | |
| # 11: "sebelas" | |
| # } | |
| # def number_to_words(n: int) -> str: | |
| # """Konversi angka 0–9999 ke kata dalam bahasa Indonesia""" | |
| # if n < 12: | |
| # return num_words[n] | |
| # elif n < 20: | |
| # return number_to_words(n-10) + " belas" | |
| # elif n < 100: | |
| # puluhan, sisa = divmod(n, 10) | |
| # result = number_to_words(puluhan) + " puluh" | |
| # if sisa: | |
| # result += " " + number_to_words(sisa) | |
| # return result | |
| # elif n < 200: | |
| # return "seratus" + (" " + number_to_words(n-100) if n > 100 else "") | |
| # elif n < 1000: | |
| # ratusan, sisa = divmod(n, 100) | |
| # result = number_to_words(ratusan) + " ratus" | |
| # if sisa: | |
| # result += " " + number_to_words(sisa) | |
| # return result | |
| # elif n < 2000: | |
| # return "seribu" + (" " + number_to_words(n-1000) if n > 1000 else "") | |
| # elif n < 10000: | |
| # ribuan, sisa = divmod(n, 1000) | |
| # result = number_to_words(ribuan) + " ribu" | |
| # if sisa: | |
| # result += " " + number_to_words(sisa) | |
| # return result | |
| # else: | |
| # return str(n) # fallback | |
| # def expand_abbreviation(word: str) -> str: | |
| # """Ubah singkatan (huruf kapital) jadi ucapan Indonesia""" | |
| # if word.isupper() and len(word) > 1: # contoh: KTP, DPR, RI | |
| # return " ".join(letter_words.get(ch.lower(), ch) for ch in word) | |
| # return word | |
| # letter_words = { | |
| # "a": "a", | |
| # "b": "be", | |
| # "c": "ce", | |
| # "d": "de", | |
| # "e": "e", | |
| # "f": "ef", | |
| # "g": "ge", | |
| # "h": "ha", | |
| # "i": "i", | |
| # "j": "je", | |
| # "k": "ka", | |
| # "l": "el", | |
| # "m": "em", | |
| # "n": "en", | |
| # "o": "o", | |
| # "p": "pe", | |
| # "q": "ki", | |
| # "r": "er", | |
| # "s": "es", | |
| # "t": "te", | |
| # "u": "u", | |
| # "v": "fe", | |
| # "w": "we", | |
| # "x": "eks", | |
| # "y": "ye", | |
| # "z": "zet", | |
| # } | |
| # def indo_to_ipa(text: str) -> str: | |
| # text = text.lower() | |
| # # Tangani singkatan (huruf kapital semua) | |
| # words = [] | |
| # for w in text.split(): | |
| # if w.isupper() and len(w) > 1: | |
| # words.append(expand_abbreviation(w)) | |
| # else: | |
| # words.append(w) | |
| # text = " ".join(words) | |
| # # Tangani angka → kata | |
| # def replace_number(match): | |
| # num = int(match.group()) | |
| # return number_to_words(num) | |
| # text = re.sub(r"\d+", replace_number, text) | |
| # # Konversi huruf → IPA | |
| # for k in sorted(ipa_map.keys(), key=lambda x: -len(x)): | |
| # text = re.sub(k, ipa_map[k], text) | |
| # return text | |
| # # def indo_to_ipa(text: str) -> str: | |
| # # text = text.lower() | |
| # # # Cari semua angka dalam teks dan ubah ke kata | |
| # # def replace_number(match): | |
| # # num = int(match.group()) | |
| # # return number_to_words(num) | |
| # # text = re.sub(r"\d+", replace_number, text) | |
| # # # Konversi huruf → IPA | |
| # # for k in sorted(ipa_map.keys(), key=lambda x: -len(x)): | |
| # # text = re.sub(k, ipa_map[k], text) | |
| # # return text |