import re # --- IPA map --- ipa_map = { "ng": "ŋ", "ny": "ɲ", "sy": "ʃ", "kh": "x", "c": "tʃ", "j": "dʒ", "b": "b", "d": "d̪", "t": "t̪", "g": "ɡ", "k": "k", "p": "p", "m": "m", "n": "n", "l": "l", "s": "s", "h": "h", "r": "r", "w": "w", "y": "j", "a": "a", "i": "i", "u": "u", "o": "o", "e": "ə" } # Sebutan huruf letter_words = { "a":"a","b":"be","c":"ce","d":"de","e":"e","f":"ef","g":"ge","h":"ha", "i":"i","j":"je","k":"ka","l":"el","m":"em","n":"en","o":"o","p":"pe", "q":"ki","r":"er","s":"es","t":"te","u":"u","v":"fe","w":"we","x":"eks", "y":"ye","z":"zet" } digit_words = { "0":"nol","1":"satu","2":"dua","3":"tiga","4":"empat", "5":"lima","6":"enam","7":"tujuh","8":"delapan","9":"sembilan" } # --- Number to words (hingga triliun) --- def number_to_words(n: int) -> str: n = int(n) if n == 0: return "nol" def _below_thousand(x): words = ["nol","satu","dua","tiga","empat","lima","enam","tujuh","delapan","sembilan","sepuluh","sebelas"] if x < 12: return words[x] if x < 20: return _below_thousand(x-10)+" belas" if x < 100: q,r=divmod(x,10); return _below_thousand(q)+" puluh"+((" "+_below_thousand(r)) if r else "") if x < 200: return "seratus"+((" "+_below_thousand(x-100)) if x>100 else "") if x < 1000: q,r=divmod(x,100); return _below_thousand(q)+" ratus"+((" "+_below_thousand(r)) if r else "") scales=[(1_000_000_000_000,"triliun"),(1_000_000_000,"miliar"),(1_000_000,"juta"),(1000,"ribu")] parts=[]; remaining=n for v,nm in scales: if remaining>=v: q,remaining=divmod(remaining,v) if v==1000 and q==1: parts.append("seribu") else: parts.append(number_to_words(q)+" "+nm) if remaining: parts.append(_below_thousand(remaining)) return " ".join(parts) # --- Nomor HP --- phone_pattern=re.compile(r'(?str: def repl(m): digits=re.findall(r'\d',m.group(0)) return " ".join(digit_words[d] for d in digits) return phone_pattern.sub(repl,text) # --- Angka umum --- def expand_numbers(text:str)->str: def repl(m): return number_to_words(int(m.group())) return re.sub(r'\d+',repl,text) # --- Singkatan --- abbr_pattern=re.compile(r'(?str: def repl(m): token=m.group(1) if token=="HP": # <-- jangan expand 'HP' kalau berdiri sendiri return "ha pe" return " ".join(letter_words[ch.lower()] for ch in token) return abbr_pattern.sub(repl,text) # --- IPA --- def apply_ipa_map(text:str)->str: t=text.lower() for k in sorted(ipa_map,key=len,reverse=True): t=re.sub(re.escape(k),ipa_map[k],t) return re.sub(r'\s+',' ',t).strip() # --- Pipeline --- def indo_to_ipa(text:str)->str: # 1. nomor HP step1=expand_phones(text) # 2. angka biasa step2=expand_numbers(step1) # 3. singkatan step3=expand_abbreviations(step2) # 4. mapping IPA return apply_ipa_map(step3) # import re # ipa_map = { # "ng": "ŋ", # "ny": "ɲ", # "sy": "ʃ", # "kh": "x", # "c": "tʃ", # "j": "dʒ", # "y": "j", # "r": "r", # "x": "ks", # "a": "a", # "i": "i", # "u": "u", # "e": "ə", # "o": "o", # "b": "b", # "d": "d̪", # "t": "t̪", # "g": "ɡ", # "k": "k", # "p": "p", # "m": "m", # "n": "n", # "l": "l", # "s": "s", # "h": "h", # "w": "w", # } # num_words = { # 0: "nol", # 1: "satu", # 2: "dua", # 3: "tiga", # 4: "empat", # 5: "lima", # 6: "enam", # 7: "tujuh", # 8: "delapan", # 9: "sembilan", # 10: "sepuluh", # 11: "sebelas" # } # def number_to_words(n: int) -> str: # """Konversi angka 0–9999 ke kata dalam bahasa Indonesia""" # if n < 12: # return num_words[n] # elif n < 20: # return number_to_words(n-10) + " belas" # elif n < 100: # puluhan, sisa = divmod(n, 10) # result = number_to_words(puluhan) + " puluh" # if sisa: # result += " " + number_to_words(sisa) # return result # elif n < 200: # return "seratus" + (" " + number_to_words(n-100) if n > 100 else "") # elif n < 1000: # ratusan, sisa = divmod(n, 100) # result = number_to_words(ratusan) + " ratus" # if sisa: # result += " " + number_to_words(sisa) # return result # elif n < 2000: # return "seribu" + (" " + number_to_words(n-1000) if n > 1000 else "") # elif n < 10000: # ribuan, sisa = divmod(n, 1000) # result = number_to_words(ribuan) + " ribu" # if sisa: # result += " " + number_to_words(sisa) # return result # else: # return str(n) # fallback # def expand_abbreviation(word: str) -> str: # """Ubah singkatan (huruf kapital) jadi ucapan Indonesia""" # if word.isupper() and len(word) > 1: # contoh: KTP, DPR, RI # return " ".join(letter_words.get(ch.lower(), ch) for ch in word) # return word # letter_words = { # "a": "a", # "b": "be", # "c": "ce", # "d": "de", # "e": "e", # "f": "ef", # "g": "ge", # "h": "ha", # "i": "i", # "j": "je", # "k": "ka", # "l": "el", # "m": "em", # "n": "en", # "o": "o", # "p": "pe", # "q": "ki", # "r": "er", # "s": "es", # "t": "te", # "u": "u", # "v": "fe", # "w": "we", # "x": "eks", # "y": "ye", # "z": "zet", # } # def indo_to_ipa(text: str) -> str: # text = text.lower() # # Tangani singkatan (huruf kapital semua) # words = [] # for w in text.split(): # if w.isupper() and len(w) > 1: # words.append(expand_abbreviation(w)) # else: # words.append(w) # text = " ".join(words) # # Tangani angka → kata # def replace_number(match): # num = int(match.group()) # return number_to_words(num) # text = re.sub(r"\d+", replace_number, text) # # Konversi huruf → IPA # for k in sorted(ipa_map.keys(), key=lambda x: -len(x)): # text = re.sub(k, ipa_map[k], text) # return text # # def indo_to_ipa(text: str) -> str: # # text = text.lower() # # # Cari semua angka dalam teks dan ubah ke kata # # def replace_number(match): # # num = int(match.group()) # # return number_to_words(num) # # text = re.sub(r"\d+", replace_number, text) # # # Konversi huruf → IPA # # for k in sorted(ipa_map.keys(), key=lambda x: -len(x)): # # text = re.sub(k, ipa_map[k], text) # # return text