Spaces:
Runtime error
Runtime error
Update text/cantonese.py
Browse files- text/cantonese.py +8 -5
text/cantonese.py
CHANGED
|
@@ -177,10 +177,16 @@ def get_jyutping(text):
|
|
| 177 |
|
| 178 |
words = word_segmentation(text)
|
| 179 |
jyutping_array = []
|
|
|
|
|
|
|
| 180 |
|
| 181 |
for word in words:
|
| 182 |
-
if word
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
else:
|
| 185 |
jyutpings = ""
|
| 186 |
|
|
@@ -189,9 +195,6 @@ def get_jyutping(text):
|
|
| 189 |
else:
|
| 190 |
jyutpings = word2jyutping(word)
|
| 191 |
|
| 192 |
-
if 'la1' in jyutpings:
|
| 193 |
-
print(text, words, jyutpings)
|
| 194 |
-
|
| 195 |
# match multple jyutping eg: liu4 ge3, or single jyutping eg: liu4
|
| 196 |
if not re.search(r"^([a-z]+[1-6]+[ ]?)+$", jyutpings):
|
| 197 |
raise ValueError(
|
|
|
|
| 177 |
|
| 178 |
words = word_segmentation(text)
|
| 179 |
jyutping_array = []
|
| 180 |
+
punct_pattern = re.compile(
|
| 181 |
+
r"^[{}]+$".format(re.escape("".join(punctuation))))
|
| 182 |
|
| 183 |
for word in words:
|
| 184 |
+
if punct_pattern.match(word):
|
| 185 |
+
puncts = re.split(r"([{}])".format(
|
| 186 |
+
re.escape("".join(punctuation))), word)
|
| 187 |
+
for punct in puncts:
|
| 188 |
+
if len(punct) > 0:
|
| 189 |
+
jyutping_array.append(punct)
|
| 190 |
else:
|
| 191 |
jyutpings = ""
|
| 192 |
|
|
|
|
| 195 |
else:
|
| 196 |
jyutpings = word2jyutping(word)
|
| 197 |
|
|
|
|
|
|
|
|
|
|
| 198 |
# match multple jyutping eg: liu4 ge3, or single jyutping eg: liu4
|
| 199 |
if not re.search(r"^([a-z]+[1-6]+[ ]?)+$", jyutpings):
|
| 200 |
raise ValueError(
|