File size: 1,399 Bytes
fae0e6c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import re
def contains_chinese(text):
"""
Detect if a string contains Chinese characters or Chinese punctuation
Args:
text (str): The string to detect
Returns:
bool: True if contains Chinese characters or punctuation, False otherwise
"""
# Chinese character Unicode ranges:
# \u4e00-\u9fff: CJK Unified Ideographs
# \u3400-\u4dbf: CJK Extension A
# \uf900-\ufaff: CJK Compatibility Ideographs
# \u3000-\u303f: CJK Symbols and Punctuation
# \uff00-\uffef: Fullwidth ASCII, Fullwidth punctuation
chinese_pattern = re.compile(r'[\u4e00-\u9fff\u3400-\u4dbf\uf900-\ufaff\u3000-\u303f\uff00-\uffef]')
return bool(chinese_pattern.search(text))
def replace_chinese_punctuation(text):
# Handle single-character replacements with translate
punctuation_map = str.maketrans({
'οΌ': ',',
'γ': '.',
'οΌ': '!',
'οΌ': '?',
'οΌ': ';',
'οΌ': ':',
'β': '"',
'β': '"',
'β': "'",
'β': "'",
'οΌ': '(',
'οΌ': ')',
'γ': '[',
'γ': ']',
'γ': '<',
'γ': '>',
'γ': ',',
'β': '-'
})
# First, replace multi-character punctuation
text = text.replace('β¦β¦', '...')
# Then apply single-character replacements
return text.translate(punctuation_map)
|