|
|
import re |
|
|
|
|
|
def contains_chinese(text): |
|
|
""" |
|
|
Detect if a string contains Chinese characters or Chinese punctuation |
|
|
|
|
|
Args: |
|
|
text (str): The string to detect |
|
|
|
|
|
Returns: |
|
|
bool: True if contains Chinese characters or punctuation, False otherwise |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
chinese_pattern = re.compile(r'[\u4e00-\u9fff\u3400-\u4dbf\uf900-\ufaff\u3000-\u303f\uff00-\uffef]') |
|
|
return bool(chinese_pattern.search(text)) |
|
|
|
|
|
def replace_chinese_punctuation(text): |
|
|
|
|
|
punctuation_map = str.maketrans({ |
|
|
'οΌ': ',', |
|
|
'γ': '.', |
|
|
'οΌ': '!', |
|
|
'οΌ': '?', |
|
|
'οΌ': ';', |
|
|
'οΌ': ':', |
|
|
'β': '"', |
|
|
'β': '"', |
|
|
'β': "'", |
|
|
'β': "'", |
|
|
'οΌ': '(', |
|
|
'οΌ': ')', |
|
|
'γ': '[', |
|
|
'γ': ']', |
|
|
'γ': '<', |
|
|
'γ': '>', |
|
|
'γ': ',', |
|
|
'β': '-' |
|
|
}) |
|
|
|
|
|
text = text.replace('β¦β¦', '...') |
|
|
|
|
|
return text.translate(punctuation_map) |
|
|
|
|
|
|