Spaces:
Sleeping
Sleeping
| # msdl/utils.py | |
| import os | |
| import re | |
| import shutil | |
| import sys | |
| import yaml | |
| from functools import lru_cache | |
| from pathlib import Path | |
| from msdl.config import ( | |
| BACKEND_DOCKERFILE_DIR, | |
| CLOUD_LLM_DOCKERFILE, | |
| FRONTEND_DOCKERFILE_DIR, | |
| LOCAL_LLM_DOCKERFILE, | |
| PACKAGE_DIR, | |
| REACT_DOCKERFILE, | |
| TEMP_DIR, | |
| ENV_FILE_PATH, | |
| ) | |
| from msdl.i18n import t | |
| def get_env_variable(var_name, default=None): | |
| if ENV_FILE_PATH.exists(): | |
| with ENV_FILE_PATH.open("r") as env_file: | |
| for line in env_file: | |
| if line.startswith(f"{var_name}="): | |
| return line.strip().split("=", 1)[1] | |
| return os.getenv(var_name, default) | |
| def get_existing_api_key(env_var_name): | |
| env_vars = read_env_file() | |
| return env_vars.get(env_var_name) | |
| def read_env_file(): | |
| env_vars = {} | |
| if ENV_FILE_PATH.exists(): | |
| with ENV_FILE_PATH.open("r") as env_file: | |
| for line in env_file: | |
| if "=" in line and not line.strip().startswith("#"): | |
| key, value = line.strip().split("=", 1) | |
| env_vars[key] = value.strip('"').strip("'") | |
| return env_vars | |
| def clean_api_key(api_key): | |
| cleaned_key = api_key.strip() | |
| cleaned_key = re.sub(r"\s+", "", cleaned_key) | |
| return cleaned_key | |
| def validate_api_key(api_key, key_type, t): | |
| basic_pattern = r"^sk-[A-Za-z0-9]+$" | |
| web_search_pattern = r"^[A-Za-z0-9_\-\.]+$" | |
| tencent_pattern = r"^[A-Za-z0-9]+$" | |
| validation_rules = { | |
| # Model API Keys | |
| "SILICON_API_KEY": basic_pattern, | |
| "OPENAI_API_KEY": basic_pattern, | |
| "QWEN_API_KEY": basic_pattern, | |
| # Search Engine API Keys | |
| "BING_SEARCH_API_KEY": web_search_pattern, | |
| "BRAVE_SEARCH_API_KEY": web_search_pattern, | |
| "GOOGLE_SERPER_API_KEY": web_search_pattern, | |
| "TENCENT_SEARCH_SECRET_ID": tencent_pattern, | |
| "TENCENT_SEARCH_SECRET_KEY": tencent_pattern, | |
| # Legacy support | |
| "WEB_SEARCH_API_KEY": web_search_pattern, | |
| } | |
| if key_type not in validation_rules: | |
| raise ValueError(t("UNKNOWN_API_KEY_TYPE", KEY_TYPE=key_type)) | |
| pattern = validation_rules[key_type] | |
| return re.match(pattern, api_key) is not None | |
| def save_api_key_to_env(key_type, api_key, t): | |
| """Save API key to .env file | |
| Args: | |
| key_type: Environment variable name or model format | |
| api_key: API key value | |
| t: Translation function | |
| """ | |
| # Convert model format to env var name if needed | |
| env_var_name = { | |
| "internlm_silicon": "SILICON_API_KEY", | |
| "gpt4": "OPENAI_API_KEY", | |
| "qwen": "QWEN_API_KEY", | |
| }.get(key_type, key_type) # If not a model format, use key_type directly | |
| if not validate_api_key(api_key, env_var_name, t): | |
| raise ValueError(t("INVALID_API_KEY", KEY_TYPE=env_var_name)) | |
| env_vars = read_env_file() | |
| env_vars[env_var_name] = api_key | |
| with ENV_FILE_PATH.open("w") as env_file: | |
| for key, value in env_vars.items(): | |
| env_file.write(f"{key}={value}\n") | |
| print(t("API_KEY_SAVED", ENV_VAR_NAME=env_var_name)) | |
| def ensure_directory(path): | |
| path = Path(path) | |
| if not path.exists(): | |
| path.mkdir(parents=True, exist_ok=True) | |
| print(t("DIR_CREATED", dir=path)) | |
| def copy_templates_to_temp(template_files): | |
| template_dir = PACKAGE_DIR / "templates" | |
| ensure_directory(TEMP_DIR) | |
| for filename in template_files: | |
| src = template_dir / filename | |
| dst = TEMP_DIR / filename | |
| if src.exists(): | |
| shutil.copy2(src, dst) | |
| print(t("FILE_COPIED", file=filename)) | |
| else: | |
| print(t("FILE_NOT_FOUND", file=filename)) | |
| sys.exit(1) | |
| def modify_docker_compose(model_type, backend_language, model_format, search_engine): | |
| """Modify docker-compose.yaml based on user choices""" | |
| docker_compose_path = os.path.join(TEMP_DIR, "docker-compose.yaml") | |
| with open(docker_compose_path, "r") as file: | |
| compose_data = yaml.safe_load(file) | |
| # Set the name of the project | |
| compose_data["name"] = "mindsearch" | |
| # Configure backend service | |
| backend_service = compose_data["services"]["backend"] | |
| # Set environment variables | |
| if "environment" not in backend_service: | |
| backend_service["environment"] = [] | |
| # Add or update environment variables | |
| env_vars = { | |
| "LANG": backend_language, | |
| "MODEL_FORMAT": model_format, | |
| "SEARCH_ENGINE": search_engine | |
| } | |
| # Ensure .env file is included | |
| if "env_file" not in backend_service: | |
| backend_service["env_file"] = [".env"] | |
| elif ".env" not in backend_service["env_file"]: | |
| backend_service["env_file"].append(".env") | |
| # Set command with all parameters | |
| command = f"python -m mindsearch.app --lang {backend_language} --model_format {model_format} --search_engine {search_engine}" | |
| backend_service["command"] = command | |
| # Convert environment variables to docker-compose format | |
| backend_service["environment"] = [ | |
| f"{key}={value}" for key, value in env_vars.items() | |
| ] | |
| # Configure based on model type | |
| if model_type == CLOUD_LLM_DOCKERFILE: | |
| if "deploy" in backend_service: | |
| del backend_service["deploy"] | |
| # Remove volumes for cloud deployment | |
| if "volumes" in backend_service: | |
| del backend_service["volumes"] | |
| elif model_type == LOCAL_LLM_DOCKERFILE: | |
| # Add GPU configuration for local deployment | |
| if "deploy" not in backend_service: | |
| backend_service["deploy"] = { | |
| "resources": { | |
| "reservations": { | |
| "devices": [ | |
| {"driver": "nvidia", "count": 1, "capabilities": ["gpu"]} | |
| ] | |
| } | |
| } | |
| } | |
| # Add volume for cache in local deployment | |
| backend_service["volumes"] = ["/root/.cache:/root/.cache"] | |
| else: | |
| raise ValueError(t("UNKNOWN_DOCKERFILE", dockerfile=model_type)) | |
| # Save the modified docker-compose.yaml | |
| with open(docker_compose_path, "w") as file: | |
| yaml.dump(compose_data, file) | |
| print( | |
| t( | |
| "docker_compose_updated", | |
| mode=(t("CLOUD") if model_type == CLOUD_LLM_DOCKERFILE else t("LOCAL")), | |
| format=model_format, | |
| ) | |
| ) | |
| def get_model_formats(model_type): | |
| if model_type == CLOUD_LLM_DOCKERFILE: | |
| return ["internlm_silicon", "qwen", "gpt4"] | |
| elif model_type == LOCAL_LLM_DOCKERFILE: | |
| return ["internlm_server", "internlm_client", "internlm_hf"] | |
| else: | |
| raise ValueError(t("UNKNOWN_MODEL_TYPE", model_type=model_type)) | |
| def copy_backend_dockerfile(choice): | |
| """Copy backend Dockerfile to temp directory based on user choice""" | |
| source_file = Path(BACKEND_DOCKERFILE_DIR) / choice | |
| dest_file = "backend.dockerfile" | |
| source_path = PACKAGE_DIR / "templates" / source_file | |
| dest_path = TEMP_DIR / dest_file | |
| if not source_path.exists(): | |
| raise FileNotFoundError(t("FILE_NOT_FOUND", file=source_file)) | |
| dest_path.parent.mkdir(parents=True, exist_ok=True) | |
| dest_path.write_text(source_path.read_text()) | |
| print( | |
| t( | |
| "BACKEND_DOCKERFILE_COPIED", | |
| source_path=str(source_path), | |
| dest_path=str(dest_path), | |
| )) | |
| def copy_frontend_dockerfile(): | |
| """Copy frontend Dockerfile to temp directory""" | |
| source_file = Path(FRONTEND_DOCKERFILE_DIR) / REACT_DOCKERFILE | |
| dest_file = "frontend.dockerfile" | |
| source_path = PACKAGE_DIR / "templates" / source_file | |
| dest_path = TEMP_DIR / dest_file | |
| if not source_path.exists(): | |
| raise FileNotFoundError(t("FILE_NOT_FOUND", file=source_file)) | |
| dest_path.parent.mkdir(parents=True, exist_ok=True) | |
| dest_path.write_text(source_path.read_text()) | |
| print( | |
| t( | |
| "FRONTEND_DOCKERFILE_COPIED", | |
| source_path=str(source_path), | |
| dest_path=str(dest_path), | |
| )) | |