Spaces:
Sleeping
Sleeping
| # punctuation = ["!", "?", "…", ",", ".", "'", "-"] | |
| punctuation = ["!", "?", "…", ",", ".", "'", "-", "¿", "¡"] | |
| pu_symbols = punctuation + ["SP", "UNK"] | |
| pad = "_" | |
| # chinese | |
| zh_symbols = [ | |
| "E", | |
| "En", | |
| "a", | |
| "ai", | |
| "an", | |
| "ang", | |
| "ao", | |
| "b", | |
| "c", | |
| "ch", | |
| "d", | |
| "e", | |
| "ei", | |
| "en", | |
| "eng", | |
| "er", | |
| "f", | |
| "g", | |
| "h", | |
| "i", | |
| "i0", | |
| "ia", | |
| "ian", | |
| "iang", | |
| "iao", | |
| "ie", | |
| "in", | |
| "ing", | |
| "iong", | |
| "ir", | |
| "iu", | |
| "j", | |
| "k", | |
| "l", | |
| "m", | |
| "n", | |
| "o", | |
| "ong", | |
| "ou", | |
| "p", | |
| "q", | |
| "r", | |
| "s", | |
| "sh", | |
| "t", | |
| "u", | |
| "ua", | |
| "uai", | |
| "uan", | |
| "uang", | |
| "ui", | |
| "un", | |
| "uo", | |
| "v", | |
| "van", | |
| "ve", | |
| "vn", | |
| "w", | |
| "x", | |
| "y", | |
| "z", | |
| "zh", | |
| "AA", | |
| "EE", | |
| "OO", | |
| ] | |
| num_zh_tones = 6 | |
| # japanese | |
| ja_symbols = [ | |
| "N", | |
| "a", | |
| "a:", | |
| "b", | |
| "by", | |
| "ch", | |
| "d", | |
| "dy", | |
| "e", | |
| "e:", | |
| "f", | |
| "g", | |
| "gy", | |
| "h", | |
| "hy", | |
| "i", | |
| "i:", | |
| "j", | |
| "k", | |
| "ky", | |
| "m", | |
| "my", | |
| "n", | |
| "ny", | |
| "o", | |
| "o:", | |
| "p", | |
| "py", | |
| "q", | |
| "r", | |
| "ry", | |
| "s", | |
| "sh", | |
| "t", | |
| "ts", | |
| "ty", | |
| "u", | |
| "u:", | |
| "w", | |
| "y", | |
| "z", | |
| "zy", | |
| ] | |
| num_ja_tones = 1 | |
| # English | |
| en_symbols = [ | |
| "aa", | |
| "ae", | |
| "ah", | |
| "ao", | |
| "aw", | |
| "ay", | |
| "b", | |
| "ch", | |
| "d", | |
| "dh", | |
| "eh", | |
| "er", | |
| "ey", | |
| "f", | |
| "g", | |
| "hh", | |
| "ih", | |
| "iy", | |
| "jh", | |
| "k", | |
| "l", | |
| "m", | |
| "n", | |
| "ng", | |
| "ow", | |
| "oy", | |
| "p", | |
| "r", | |
| "s", | |
| "sh", | |
| "t", | |
| "th", | |
| "uh", | |
| "uw", | |
| "V", | |
| "w", | |
| "y", | |
| "z", | |
| "zh", | |
| ] | |
| num_en_tones = 4 | |
| # Korean | |
| kr_symbols = ['ᄌ', 'ᅥ', 'ᆫ', 'ᅦ', 'ᄋ', 'ᅵ', 'ᄅ', 'ᅴ', 'ᄀ', 'ᅡ', 'ᄎ', 'ᅪ', 'ᄑ', 'ᅩ', 'ᄐ', 'ᄃ', 'ᅢ', 'ᅮ', 'ᆼ', 'ᅳ', 'ᄒ', 'ᄆ', 'ᆯ', 'ᆷ', 'ᄂ', 'ᄇ', 'ᄉ', 'ᆮ', 'ᄁ', 'ᅬ', 'ᅣ', 'ᄄ', 'ᆨ', 'ᄍ', 'ᅧ', 'ᄏ', 'ᆸ', 'ᅭ', '(', 'ᄊ', ')', 'ᅲ', 'ᅨ', 'ᄈ', 'ᅱ', 'ᅯ', 'ᅫ', 'ᅰ', 'ᅤ', '~', '\\', '[', ']', '/', '^', ':', 'ㄸ', '*'] | |
| num_kr_tones = 1 | |
| # Spanish | |
| es_symbols = [ | |
| "N", | |
| "Q", | |
| "a", | |
| "b", | |
| "d", | |
| "e", | |
| "f", | |
| "g", | |
| "h", | |
| "i", | |
| "j", | |
| "k", | |
| "l", | |
| "m", | |
| "n", | |
| "o", | |
| "p", | |
| "s", | |
| "t", | |
| "u", | |
| "v", | |
| "w", | |
| "x", | |
| "y", | |
| "z", | |
| "ɑ", | |
| "æ", | |
| "ʃ", | |
| "ʑ", | |
| "ç", | |
| "ɯ", | |
| "ɪ", | |
| "ɔ", | |
| "ɛ", | |
| "ɹ", | |
| "ð", | |
| "ə", | |
| "ɫ", | |
| "ɥ", | |
| "ɸ", | |
| "ʊ", | |
| "ɾ", | |
| "ʒ", | |
| "θ", | |
| "β", | |
| "ŋ", | |
| "ɦ", | |
| "ɡ", | |
| "r", | |
| "ɲ", | |
| "ʝ", | |
| "ɣ", | |
| "ʎ", | |
| "ˈ", | |
| "ˌ", | |
| "ː" | |
| ] | |
| num_es_tones = 1 | |
| # French | |
| fr_symbols = [ | |
| "\u0303", | |
| "œ", | |
| "ø", | |
| "ʁ", | |
| "ɒ", | |
| "ʌ", | |
| "ɜ", | |
| "ɐ" | |
| ] | |
| num_fr_tones = 1 | |
| # German | |
| de_symbols = [ | |
| "ʏ", | |
| "̩" | |
| ] | |
| num_de_tones = 1 | |
| # Russian | |
| ru_symbols = [ | |
| "ɭ", | |
| "ʲ", | |
| "ɕ", | |
| "\"", | |
| "ɵ", | |
| "^", | |
| "ɬ" | |
| ] | |
| num_ru_tones = 1 | |
| # combine all symbols | |
| normal_symbols = sorted(set(zh_symbols + ja_symbols + en_symbols + kr_symbols + es_symbols + fr_symbols + de_symbols + ru_symbols)) | |
| symbols = [pad] + normal_symbols + pu_symbols | |
| sil_phonemes_ids = [symbols.index(i) for i in pu_symbols] | |
| # combine all tones | |
| num_tones = num_zh_tones + num_ja_tones + num_en_tones + num_kr_tones + num_es_tones + num_fr_tones + num_de_tones + num_ru_tones | |
| # language maps | |
| language_id_map = {"ZH": 0, "JP": 1, "EN": 2, "ZH_MIX_EN": 3, 'KR': 4, 'ES': 5, 'SP': 5 ,'FR': 6} | |
| num_languages = len(language_id_map.keys()) | |
| language_tone_start_map = { | |
| "ZH": 0, | |
| "ZH_MIX_EN": 0, | |
| "JP": num_zh_tones, | |
| "EN": num_zh_tones + num_ja_tones, | |
| 'KR': num_zh_tones + num_ja_tones + num_en_tones, | |
| "ES": num_zh_tones + num_ja_tones + num_en_tones + num_kr_tones, | |
| "SP": num_zh_tones + num_ja_tones + num_en_tones + num_kr_tones, | |
| "FR": num_zh_tones + num_ja_tones + num_en_tones + num_kr_tones + num_es_tones, | |
| } | |
| if __name__ == "__main__": | |
| a = set(zh_symbols) | |
| b = set(en_symbols) | |
| print(sorted(a & b)) | |