File size: 1,399 Bytes
fae0e6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import re

def contains_chinese(text):
    """
    Detect if a string contains Chinese characters or Chinese punctuation
    
    Args:
        text (str): The string to detect
    
    Returns:
        bool: True if contains Chinese characters or punctuation, False otherwise
    """
    # Chinese character Unicode ranges:
    # \u4e00-\u9fff: CJK Unified Ideographs
    # \u3400-\u4dbf: CJK Extension A
    # \uf900-\ufaff: CJK Compatibility Ideographs
    # \u3000-\u303f: CJK Symbols and Punctuation
    # \uff00-\uffef: Fullwidth ASCII, Fullwidth punctuation
    chinese_pattern = re.compile(r'[\u4e00-\u9fff\u3400-\u4dbf\uf900-\ufaff\u3000-\u303f\uff00-\uffef]')
    return bool(chinese_pattern.search(text))

def replace_chinese_punctuation(text):
    # Handle single-character replacements with translate
    punctuation_map = str.maketrans({
        ',': ',',
        '。': '.',
        '!': '!',
        '?': '?',
        'οΌ›': ';',
        ':': ':',
        'β€œ': '"',
        '”': '"',
        'β€˜': "'",
        '’': "'",
        '(': '(',
        'οΌ‰': ')',
        '【': '[',
        '】': ']',
        'γ€Š': '<',
        '》': '>',
        '、': ',',
        'β€”': '-'
    })
    # First, replace multi-character punctuation
    text = text.replace('……', '...')
    # Then apply single-character replacements
    return text.translate(punctuation_map)