Spaces:

miromind-ai
/

MiroMind-Open-Source-Deep-Research

Running

File size: 1,399 Bytes

fae0e6c

import re

def contains_chinese(text):
    """
    Detect if a string contains Chinese characters or Chinese punctuation
    
    Args:
        text (str): The string to detect
    
    Returns:
        bool: True if contains Chinese characters or punctuation, False otherwise
    """
    # Chinese character Unicode ranges:
    # \u4e00-\u9fff: CJK Unified Ideographs
    # \u3400-\u4dbf: CJK Extension A
    # \uf900-\ufaff: CJK Compatibility Ideographs
    # \u3000-\u303f: CJK Symbols and Punctuation
    # \uff00-\uffef: Fullwidth ASCII, Fullwidth punctuation
    chinese_pattern = re.compile(r'[\u4e00-\u9fff\u3400-\u4dbf\uf900-\ufaff\u3000-\u303f\uff00-\uffef]')
    return bool(chinese_pattern.search(text))

def replace_chinese_punctuation(text):
    # Handle single-character replacements with translate
    punctuation_map = str.maketrans({
        '，': ',',
        '。': '.',
        '！': '!',
        '？': '?',
        '；': ';',
        '：': ':',
        '“': '"',
        '”': '"',
        '‘': "'",
        '’': "'",
        '（': '(',
        '）': ')',
        '【': '[',
        '】': ']',
        '《': '<',
        '》': '>',
        '、': ',',
        '—': '-'
    })
    # First, replace multi-character punctuation
    text = text.replace('……', '...')
    # Then apply single-character replacements
    return text.translate(punctuation_map)