|
|
""" |
|
|
Configuration management for EceMotion Pictures. |
|
|
Centralized settings for models, parameters, and deployment. |
|
|
""" |
|
|
|
|
|
import os |
|
|
from typing import Dict, Any, Optional |
|
|
|
|
|
|
|
|
MODEL_VIDEO = os.getenv("MODEL_VIDEO", "damo-vilab/text-to-video-ms-1.7b") |
|
|
MODEL_AUDIO = os.getenv("MODEL_AUDIO", "parler-tts/parler-tts-mini-v1") |
|
|
MODEL_LLM = os.getenv("MODEL_LLM", "microsoft/DialoGPT-medium") |
|
|
|
|
|
|
|
|
MAX_DURATION = int(os.getenv("MAX_DURATION", "15")) |
|
|
MIN_DURATION = int(os.getenv("MIN_DURATION", "5")) |
|
|
DEFAULT_FPS = int(os.getenv("DEFAULT_FPS", "8")) |
|
|
DEFAULT_FRAMES = int(os.getenv("DEFAULT_FRAMES", "64")) |
|
|
|
|
|
|
|
|
AUDIO_SAMPLE_RATE = int(os.getenv("AUDIO_SAMPLE_RATE", "22050")) |
|
|
AUDIO_BITRATE = os.getenv("AUDIO_BITRATE", "128k") |
|
|
MUSIC_GAIN = float(os.getenv("MUSIC_GAIN", "0.3")) |
|
|
|
|
|
|
|
|
GPU_MEMORY_THRESHOLD = float(os.getenv("GPU_MEMORY_THRESHOLD", "0.8")) |
|
|
USE_QUANTIZATION = os.getenv("USE_QUANTIZATION", "true").lower() == "true" |
|
|
QUANTIZATION_BITS = int(os.getenv("QUANTIZATION_BITS", "8")) |
|
|
|
|
|
|
|
|
SYNC_TOLERANCE_MS = int(os.getenv("SYNC_TOLERANCE_MS", "200")) |
|
|
FORCE_SYNC = os.getenv("FORCE_SYNC", "false").lower() == "true" |
|
|
|
|
|
|
|
|
VHS_INTENSITY = float(os.getenv("VHS_INTENSITY", "0.5")) |
|
|
SCANLINE_OPACITY = float(os.getenv("SCANLINE_OPACITY", "0.2")) |
|
|
CHROMATIC_ABERRATION = float(os.getenv("CHROMATIC_ABERRATION", "0.05")) |
|
|
FILM_GRAIN = float(os.getenv("FILM_GRAIN", "0.1")) |
|
|
|
|
|
|
|
|
UI_THEME = os.getenv("UI_THEME", "default") |
|
|
SHOW_PROGRESS = os.getenv("SHOW_PROGRESS", "true").lower() == "true" |
|
|
ENABLE_EXAMPLES = os.getenv("ENABLE_EXAMPLES", "true").lower() == "true" |
|
|
|
|
|
|
|
|
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") |
|
|
LOG_FORMAT = os.getenv("LOG_FORMAT", "text") |
|
|
|
|
|
|
|
|
MODEL_CONFIGS: Dict[str, Dict[str, Any]] = { |
|
|
"damo-vilab/text-to-video-ms-1.7b": { |
|
|
"max_frames": 64, |
|
|
"min_frames": 8, |
|
|
"default_frames": 32, |
|
|
"memory_usage_gb": 6, |
|
|
"supports_quantization": False, |
|
|
"stable": True, |
|
|
}, |
|
|
"THUDM/CogVideoX-5b": { |
|
|
"max_frames": 48, |
|
|
"min_frames": 16, |
|
|
"default_frames": 32, |
|
|
"memory_usage_gb": 16, |
|
|
"supports_quantization": True, |
|
|
"stable": False, |
|
|
}, |
|
|
"parler-tts/parler-tts-mini-v1": { |
|
|
"max_text_length": 500, |
|
|
"min_text_length": 10, |
|
|
"default_voice": "Announcer '80s", |
|
|
"memory_usage_gb": 2, |
|
|
"stable": True, |
|
|
}, |
|
|
"SWivid/F5-TTS": { |
|
|
"max_text_length": 300, |
|
|
"min_text_length": 10, |
|
|
"default_voice": "announcer", |
|
|
"memory_usage_gb": 4, |
|
|
"stable": False, |
|
|
}, |
|
|
"microsoft/DialoGPT-medium": { |
|
|
"max_tokens": 512, |
|
|
"temperature": 0.7, |
|
|
"top_p": 0.9, |
|
|
"memory_usage_gb": 2, |
|
|
"stable": True, |
|
|
}, |
|
|
"Qwen/Qwen2.5-7B-Instruct": { |
|
|
"max_tokens": 1024, |
|
|
"temperature": 0.7, |
|
|
"top_p": 0.9, |
|
|
"memory_usage_gb": 8, |
|
|
"stable": False, |
|
|
}, |
|
|
} |
|
|
|
|
|
|
|
|
VOICE_STYLES = { |
|
|
"Announcer '80s": "A confident, upbeat 1980s TV announcer with warm AM-radio tone.", |
|
|
"Mall PA": "Casual, slightly echoey mall public-address vibe.", |
|
|
"Late Night": "Low energy, sly late-night infomercial style.", |
|
|
"News Anchor": "Professional, authoritative news anchor delivery.", |
|
|
"Infomercial": "Enthusiastic, persuasive infomercial host style.", |
|
|
"Radio DJ": "Smooth, charismatic radio disc jockey voice.", |
|
|
} |
|
|
|
|
|
|
|
|
STRUCTURE_TEMPLATES = [ |
|
|
"Montage → Close-up → Logo stinger", |
|
|
"Before/After → Feature highlight → CTA", |
|
|
"Testimonial → B-roll → Price tag reveal", |
|
|
"Unboxing → Demo → Deal countdown", |
|
|
"Retro news bulletin → Product shot → Tagline", |
|
|
"Opening hook → Problem/Solution → Call to action", |
|
|
"Brand story → Product showcase → Final tagline", |
|
|
] |
|
|
|
|
|
|
|
|
TAGLINES = [ |
|
|
"So retro, it's the future.", |
|
|
"Pixels you can trust.", |
|
|
"VHS vibes. Modern results.", |
|
|
"Old-school cool. New-school sales.", |
|
|
"Where nostalgia meets innovation.", |
|
|
"Rewind to the future.", |
|
|
"Classic style. Modern performance.", |
|
|
"The past perfected.", |
|
|
"EceMotion Pictures - Bringing the '80s back to life.", |
|
|
"Your story, our vision, timeless memories.", |
|
|
] |
|
|
|
|
|
def get_model_config(model_name: str) -> Dict[str, Any]: |
|
|
"""Get configuration for a specific model.""" |
|
|
return MODEL_CONFIGS.get(model_name, { |
|
|
"max_frames": 32, |
|
|
"min_frames": 8, |
|
|
"default_frames": 16, |
|
|
"memory_usage_gb": 4, |
|
|
"supports_quantization": False, |
|
|
"stable": True, |
|
|
}) |
|
|
|
|
|
def get_device() -> str: |
|
|
"""Determine the best available device.""" |
|
|
try: |
|
|
import torch |
|
|
if torch.cuda.is_available() and os.getenv("CUDA_VISIBLE_DEVICES", None) not in (None, ""): |
|
|
return "cuda" |
|
|
except ImportError: |
|
|
pass |
|
|
return "cpu" |
|
|
|
|
|
def validate_config() -> bool: |
|
|
"""Validate configuration settings.""" |
|
|
try: |
|
|
assert MIN_DURATION < MAX_DURATION, "MIN_DURATION must be less than MAX_DURATION" |
|
|
assert DEFAULT_FPS > 0, "DEFAULT_FPS must be positive" |
|
|
assert AUDIO_SAMPLE_RATE > 0, "AUDIO_SAMPLE_RATE must be positive" |
|
|
assert 0 <= VHS_INTENSITY <= 1, "VHS_INTENSITY must be between 0 and 1" |
|
|
assert 0 <= SCANLINE_OPACITY <= 1, "SCANLINE_OPACITY must be between 0 and 1" |
|
|
return True |
|
|
except AssertionError as e: |
|
|
print(f"Configuration validation failed: {e}") |
|
|
return False |
|
|
|
|
|
def get_safe_model_name(model_name: str, model_type: str) -> str: |
|
|
"""Get a safe model name with fallback to stable models.""" |
|
|
config = get_model_config(model_name) |
|
|
|
|
|
|
|
|
if not config.get("stable", False): |
|
|
if model_type == "video": |
|
|
return "damo-vilab/text-to-video-ms-1.7b" |
|
|
elif model_type == "audio": |
|
|
return "parler-tts/parler-tts-mini-v1" |
|
|
elif model_type == "llm": |
|
|
return "microsoft/DialoGPT-medium" |
|
|
|
|
|
return model_name |
|
|
|
|
|
def log_config(): |
|
|
"""Log current configuration for debugging.""" |
|
|
print(f"EceMotion Pictures Configuration:") |
|
|
print(f" Video Model: {MODEL_VIDEO}") |
|
|
print(f" Audio Model: {MODEL_AUDIO}") |
|
|
print(f" LLM Model: {MODEL_LLM}") |
|
|
print(f" Device: {get_device()}") |
|
|
print(f" Duration Range: {MIN_DURATION}-{MAX_DURATION}s") |
|
|
print(f" FPS: {DEFAULT_FPS}") |
|
|
print(f" Sync Tolerance: {SYNC_TOLERANCE_MS}ms") |
|
|
|