drvsbrkcn's picture
Upload app.py
4b08e73 verified
"""
EceMotion Pictures - Production Grade Commercial Generator
Advanced text-to-video commercial generator with perfect audio-video sync.
"""
import os
import tempfile
import logging
from typing import Optional, Tuple, Dict, Any
from pathlib import Path
import traceback
# Initialize spaces GPU before any other imports
import spaces
import gradio as gr
import numpy as np
# Import our enhanced modules
from config import (
MODEL_VIDEO, MODEL_AUDIO, MODEL_LLM, MAX_DURATION, MIN_DURATION,
DEFAULT_FPS, VOICE_STYLES, get_device, validate_config, log_config
)
from sync_manager import create_sync_manager
from llm_script_generator import create_script_generator
from utils_audio import synth_voice, retro_bed, mix_to_stereo, write_wav
from utils_video import synth_t2v, apply_retro_filters, mux_audio
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Initialize components
DEVICE = get_device()
sync_manager = create_sync_manager()
script_generator = create_script_generator()
# Validate configuration
if not validate_config():
logger.error("Configuration validation failed")
exit(1)
# Log configuration
log_config()
# Modern CSS for Gradio
CSS = """
.gradio-container {
max-width: 1200px !important;
margin: 0 auto;
}
.app-header {
text-align: center;
margin-bottom: 2rem;
}
.app-title {
font-size: 2.5rem;
font-weight: 700;
background: linear-gradient(45deg, #ff6b6b, #4ecdc4);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
margin-bottom: 0.5rem;
}
.app-subtitle {
opacity: 0.7;
font-size: 1.1rem;
color: #666;
}
.control-section {
background: #f8f9fa;
border-radius: 12px;
padding: 1.5rem;
margin-bottom: 1rem;
}
.output-section {
background: #ffffff;
border: 2px solid #e9ecef;
border-radius: 12px;
padding: 1.5rem;
}
.progress-info {
background: #e3f2fd;
border-left: 4px solid #2196f3;
padding: 1rem;
margin: 1rem 0;
border-radius: 4px;
}
.error-info {
background: #ffebee;
border-left: 4px solid #f44336;
padding: 1rem;
margin: 1rem 0;
border-radius: 4px;
}
"""
# Example configurations are now displayed as markdown text
def create_interface():
"""Create the modern Gradio interface."""
with gr.Blocks(
css=CSS,
title="EceMotion Pictures",
theme=gr.themes.Soft()
) as demo:
# Header
with gr.Row():
gr.HTML("""
<div class="app-header">
<div class="app-title">🎬 EceMotion Pictures</div>
<div class="app-subtitle">AI-Powered 1980s Style Commercial Generator</div>
</div>
""")
# Main interface
with gr.Row():
# Left column - Controls
with gr.Column(scale=1):
with gr.Group():
gr.Markdown("### 🎯 Commercial Setup")
brand = gr.Textbox(
label="Brand Name",
placeholder="YourBrand™",
value="EceMotion Pictures",
info="Enter your brand name"
)
structure = gr.Textbox(
label="Commercial Structure",
placeholder="e.g., Montage → Close-up → Logo stinger",
value="Montage → Close-up → Logo stinger",
info="Define the flow of your commercial"
)
with gr.Row():
script_prompt = gr.Textbox(
label="Script Hook",
placeholder="Opening hook or idea",
value="Remember when technology was simple?",
scale=3
)
roll_btn = gr.Button("🎲", scale=1, size="sm")
duration = gr.Slider(
minimum=MIN_DURATION,
maximum=MAX_DURATION,
value=10,
step=1,
label="Duration (seconds)",
info=f"Between {MIN_DURATION}-{MAX_DURATION} seconds"
)
with gr.Group():
gr.Markdown("### 🎤 Audio Settings")
voice = gr.Dropdown(
choices=list(VOICE_STYLES.keys()),
value="Announcer '80s",
label="Voice Style",
info="Choose the announcer style"
)
music = gr.Checkbox(
value=True,
label="Background Music",
info="Add retro synth jingle"
)
with gr.Group():
gr.Markdown("### ⚙️ Advanced Settings")
with gr.Accordion("Model & Quality", open=False):
model_video = gr.Dropdown(
choices=["damo-vilab/text-to-video-ms-1.7b", "THUDM/CogVideoX-5b"],
value=MODEL_VIDEO,
label="Video Model",
info="Choose the text-to-video model"
)
model_audio = gr.Dropdown(
choices=["parler-tts/parler-tts-mini-v1", "SWivid/F5-TTS"],
value=MODEL_AUDIO,
label="Audio Model",
info="Choose the text-to-speech model"
)
with gr.Accordion("Retro Effects", open=False):
vhs_intensity = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.5,
step=0.1,
label="VHS Effect Intensity"
)
seed = gr.Number(
value=42,
precision=0,
label="Random Seed",
info="For reproducible results"
)
# Generate button
generate_btn = gr.Button(
"🎬 Generate Commercial",
variant="primary",
size="lg"
)
# Right column - Output
with gr.Column(scale=1):
with gr.Group():
gr.Markdown("### 📺 Generated Commercial")
# Progress tracking
progress_info = gr.HTML("""
<div class="progress-info">
<strong>Ready to generate!</strong><br>
Click the generate button to create your retro commercial.
</div>
""")
# Video output
output_video = gr.Video(
height=400,
label="Commercial Preview",
show_download_button=True
)
# Script output
output_script = gr.Textbox(
label="Generated Script",
lines=8,
max_lines=12,
show_copy_button=True
)
# Download section
with gr.Row():
download_btn = gr.DownloadButton(
"📥 Download Commercial",
variant="secondary"
)
# Examples section
with gr.Row():
with gr.Column():
gr.Markdown("### 💡 Example Configurations")
gr.Markdown("""
**Example 1:** EceMotion Pictures - "Remember when technology was simple?" (Announcer '80s, 10s)
**Example 2:** VaporWave Studios - "The future is now, but it looks like the past" (Mall PA, 8s)
**Example 3:** Neon Dreams - "Step into the digital sunset" (Late Night, 12s)
""")
# Footer
gr.Markdown("""
<div style='text-align: center; opacity: 0.7; font-size: 0.9rem; margin-top: 2rem;'>
<p>🎬 Powered by EceMotion Pictures • Perfect audio-video sync • Professional quality</p>
<p>Models: Text-to-Video • Text-to-Speech • Enhanced VHS effects</p>
</div>
""")
def roll_script_suggestion(structure_text: str, seed_val: int) -> str:
"""Generate script suggestions using LLM."""
try:
suggestions = script_generator.suggest_scripts(structure_text, n=1, seed=seed_val)
return suggestions[0] if suggestions else "Back to '87 - the future is now!"
except Exception as e:
logger.error(f"Script suggestion failed: {e}")
return "Back to '87 - the future is now!"
@spaces.GPU(timeout=120) # 2 minute timeout for ZeroGPU
def generate_commercial(
brand_name: str,
structure_text: str,
script_text: str,
duration_val: int,
voice_style: str,
music_enabled: bool,
video_model: str,
audio_model: str,
vhs_intensity: float,
seed_val: int
) -> Tuple[str, str, str]:
"""
Generate a complete retro commercial with perfect sync.
"""
try:
# Update progress
progress_html = """
<div class="progress-info">
<strong>🎬 Generating Commercial...</strong><br>
<div style="margin-top: 0.5rem;">
<div>📝 Generating script with AI...</div>
</div>
</div>
"""
yield progress_html, None, None, None
# Generate script using LLM
generated_script = script_generator.generate_script(
brand=brand_name or "Brand",
structure=structure_text or "Montage → Close-up → Logo",
script_prompt=script_text or "Back to '87",
duration=duration_val,
voice_style=voice_style,
seed=seed_val
)
# Update progress
progress_html = """
<div class="progress-info">
<strong>🎬 Generating Commercial...</strong><br>
<div style="margin-top: 0.5rem;">
<div>✅ Script generated</div>
<div>🎥 Generating video...</div>
</div>
</div>
"""
yield progress_html, None, None, None
# Create temporary directory
with tempfile.TemporaryDirectory() as tmpdir:
# Generate video
video_prompt = f"{structure_text}. {script_text}. 1980s commercial, VHS texture, soft lighting, bold retro titles, 4:3, brand {brand_name}"
# Calculate optimal frame count
num_frames = min(sync_manager.get_optimal_frame_count(duration_val, DEFAULT_FPS), 16) # Cap for ZeroGPU
clip = synth_t2v(
prompt=video_prompt,
seed=seed_val,
num_frames=num_frames,
fps=DEFAULT_FPS,
device=DEVICE,
model_name=video_model
)
# Save raw video
raw_video_path = os.path.join(tmpdir, "raw.mp4")
clip.write_videofile(
raw_video_path,
fps=DEFAULT_FPS,
codec='libx264',
audio=False,
verbose=False,
logger=None
)
# Apply retro filters
retro_video_path = os.path.join(tmpdir, "retro.mp4")
apply_retro_filters(raw_video_path, retro_video_path, intensity=vhs_intensity)
# Update progress
progress_html = """
<div class="progress-info">
<strong>🎬 Generating Commercial...</strong><br>
<div style="margin-top: 0.5rem;">
<div>✅ Script generated</div>
<div>✅ Video generated</div>
<div>🎤 Generating audio...</div>
</div>
</div>
"""
yield progress_html, None, None, None
# Generate audio
voiceover_text = " ".join([seg.text for seg in generated_script.segments])
sr_voice, wav_voice = synth_voice(
text=voiceover_text,
voice_prompt=voice_style,
model_name=audio_model,
device=DEVICE
)
# Add background music if requested
if music_enabled:
sr_music, wav_music = retro_bed(clip.duration)
sr_final, stereo_audio = mix_to_stereo(
sr_voice, wav_voice, sr_music, wav_music, bed_gain=0.3
)
else:
sr_final = sr_voice
stereo_audio = np.stack([wav_voice, wav_voice], axis=1)
# Save audio
audio_path = os.path.join(tmpdir, "audio.wav")
write_wav(audio_path, sr_final, stereo_audio)
# Update progress
progress_html = """
<div class="progress-info">
<strong>🎬 Generating Commercial...</strong><br>
<div style="margin-top: 0.5rem;">
<div>✅ Script generated</div>
<div>✅ Video generated</div>
<div>✅ Audio generated</div>
<div>🔄 Synchronizing audio and video...</div>
</div>
</div>
"""
yield progress_html, None, None, None
# Synchronize audio and video
# Clean brand name for filename (remove spaces and special characters)
clean_brand = "".join(c for c in brand_name if c.isalnum() or c in (' ', '-', '_')).rstrip()
clean_brand = clean_brand.replace(' ', '_')
if not clean_brand:
clean_brand = "commercial"
final_video_path = os.path.join(tmpdir, f"{clean_brand}_commercial.mp4")
try:
sync_manager.synchronize_media(
video_path=retro_video_path,
audio_path=audio_path,
output_path=final_video_path,
prefer_audio_duration=True
)
except Exception as e:
logger.error(f"Sync failed, using simple mux: {e}")
# Fallback to simple muxing
from utils_video import mux_audio
mux_audio(retro_video_path, audio_path, final_video_path)
# Validate sync and file existence
if os.path.exists(final_video_path):
is_synced, sync_diff = sync_manager.validate_sync(final_video_path, final_video_path)
else:
logger.error("Final video file was not created")
is_synced, sync_diff = False, float('inf')
# Format script output
script_lines = []
for i, segment in enumerate(generated_script.segments, 1):
script_lines.append(f"{i}. {segment.timing_marker} {segment.text}")
script_output = "\n".join(script_lines) + f"\n\nTAGLINE: {generated_script.tagline}"
# Final progress
sync_status = "✅ Perfect sync" if is_synced else f"⚠️ Sync diff: {sync_diff:.3f}s"
progress_html = f"""
<div class="progress-info">
<strong>🎉 Commercial Complete!</strong><br>
<div style="margin-top: 0.5rem;">
<div>✅ Script generated ({generated_script.word_count} words)</div>
<div>✅ Video generated ({num_frames} frames)</div>
<div>✅ Audio generated ({len(stereo_audio)/sr_final:.1f}s)</div>
<div>{sync_status}</div>
</div>
</div>
"""
yield progress_html, final_video_path, script_output, final_video_path
except Exception as e:
logger.error(f"Commercial generation failed: {e}")
logger.error(f"Traceback: {traceback.format_exc()}")
error_html = f"""
<div class="error-info">
<strong>❌ Generation Failed</strong><br>
<div style="margin-top: 0.5rem; color: #666;">
Error: {str(e)}<br>
Please try again with different parameters or check the logs.
</div>
</div>
"""
yield error_html, None, None, None
def create_interface():
"""Create the Gradio interface."""
with gr.Blocks(
title="EceMotion Pictures",
theme=gr.themes.Soft(),
css="""
.progress-info {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 1rem;
border-radius: 10px;
margin: 1rem 0;
}
.error-info {
background: linear-gradient(135deg, #ff6b6b 0%, #ee5a24 100%);
color: white;
padding: 1rem;
border-radius: 10px;
margin: 1rem 0;
}
"""
) as demo:
gr.Markdown("""
# 🎬 EceMotion Pictures
## AI-Powered 1980s Style Commercial Generator
Create authentic retro commercials with perfect audio-video synchronization!
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Commercial Setup")
brand = gr.Textbox(
label="Brand Name",
placeholder="Enter your brand name...",
value="EceMotion Pictures"
)
structure = gr.Dropdown(
label="Commercial Structure",
choices=[
"Montage → Close-up → Logo stinger",
"Problem → Solution → Call to action",
"Story → Product reveal → Tagline",
"Before/After → Benefits → Brand"
],
value="Montage → Close-up → Logo stinger"
)
script_prompt = gr.Textbox(
label="Script Hook",
placeholder="Enter your script prompt...",
value="Remember when technology was simple?"
)
duration = gr.Slider(
label="Duration (seconds)",
minimum=5,
maximum=15,
value=10,
step=1
)
with gr.Row():
voice = gr.Dropdown(
label="Voice Style",
choices=["Announcer '80s", "Friendly '80s", "Dramatic '80s", "Upbeat '80s"],
value="Announcer '80s"
)
music = gr.Checkbox(label="Background Music", value=True)
with gr.Accordion("Advanced Settings", open=False):
model_video = gr.Dropdown(
label="Video Model",
choices=["damo-vilab/text-to-video-ms-1.7b", "THUDM/CogVideoX-5b"],
value="damo-vilab/text-to-video-ms-1.7b"
)
model_audio = gr.Dropdown(
label="Audio Model",
choices=["parler-tts/parler-tts-mini-v1", "SWivid/F5-TTS"],
value="parler-tts/parler-tts-mini-v1"
)
vhs_intensity = gr.Slider(
label="VHS Effect Intensity",
minimum=0.0,
maximum=1.0,
value=0.5,
step=0.1
)
seed = gr.Number(label="Random Seed", value=42, precision=0)
with gr.Row():
roll_btn = gr.Button("🎲 Roll Script", variant="secondary")
generate_btn = gr.Button("🎬 Generate Commercial", variant="primary")
with gr.Column(scale=2):
gr.Markdown("### Generated Commercial")
progress_info = gr.HTML()
output_video = gr.Video(label="Commercial Preview")
output_script = gr.Textbox(label="Generated Script", lines=10)
download_btn = gr.DownloadButton("📥 Download Commercial", visible=False)
# Connect event handlers
roll_btn.click(
roll_script_suggestion,
inputs=[structure, seed],
outputs=[script_prompt]
)
generate_btn.click(
generate_commercial,
inputs=[
brand, structure, script_prompt, duration, voice, music,
model_video, model_audio, vhs_intensity, seed
],
outputs=[progress_info, output_video, output_script, download_btn]
)
return demo
def main():
"""Main application entry point."""
logger.info("Starting EceMotion Pictures")
logger.info(f"Using device: {DEVICE}")
logger.info(f"Video model: {MODEL_VIDEO}")
logger.info(f"Audio model: {MODEL_AUDIO}")
logger.info(f"LLM model: {MODEL_LLM}")
# Create and launch interface
demo = create_interface()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_error=True
)
if __name__ == "__main__":
main()