Spaces:

Princeaka
/

justiceai

Running

App Files Files Community

justiceai / videogenerator.py

Princeaka

Update videogenerator.py

9cd3579 verified 7 days ago

raw

history blame contribute delete

49.6 kB

	#!/usr/bin/env python3
	import os
	import tempfile
	import subprocess
	import sqlite3
	from datetime import datetime
	from pathlib import Path
	from typing import Optional, Dict, Any, List
	import logging
	import wave
	import shutil

	logger = logging.getLogger("videogenerator")

	try:
	import torch
	TORCH_AVAILABLE = True
	except Exception:
	TORCH_AVAILABLE = False
	torch = None

	try:
	from PIL import Image
	PIL_AVAILABLE = True
	except Exception:
	PIL_AVAILABLE = False

	try:
	import ffmpeg
	FFMPEG_AVAILABLE = True
	except Exception:
	FFMPEG_AVAILABLE = False

	try:
	from TTS.api import TTS
	TTS_AVAILABLE = True
	except Exception:
	TTS_AVAILABLE = False


	class VideoGenerator:
	"""
	Offline text-to-video generator with local enhancement (ESRGAN + RIFE).
	No external API or internet required.
	IMPORTANT: If you want the returned URLs to be reachable by your frontend,
	configure your web server or app to serve the `workdir` directory under the
	public URL prefix defined by the environment variable VIDEO_PUBLIC_URL
	(default: /static/video_sandbox). Example (FastAPI):
	app.mount("/static/video_sandbox", StaticFiles(directory="/tmp/video_sandbox"), name="videos")
	"""

	def __init__(self, workdir="video_sandbox", db_name="history.db"):
	# Use a writable directory in environment or /tmp
	base_dir = Path(os.getenv("VIDEO_SANDBOX_DIR", "/tmp/video_sandbox"))

	self.workdir = base_dir.resolve()
	self.output_dir = self.workdir / "output"
	self.frames_dir = self.workdir / "frames"
	self.audio_dir = self.workdir / "audio"
	self.db_path = self.workdir / db_name

	# Public base URL mapping (what frontend will use). No trailing slash.
	self.public_base_url = os.getenv("VIDEO_PUBLIC_URL", "/static/video_sandbox").rstrip("/")

	# Create directories safely
	for d in [self.workdir, self.output_dir, self.frames_dir, self.audio_dir]:
	d.mkdir(parents=True, exist_ok=True)

	self._init_db()
	self.device = "cuda" if (TORCH_AVAILABLE and torch and torch.cuda.is_available()) else "cpu"
	logger.info(f"VideoGenerator initialized with device: {self.device}, public_base_url: {self.public_base_url}")


	# ---------------- Database ----------------
	def _init_db(self):
	conn = sqlite3.connect(self.db_path)
	c = conn.cursor()
	c.execute(
	"""CREATE TABLE IF NOT EXISTS history (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	prompt TEXT,
	image_path TEXT,
	audio_path TEXT,
	output_path TEXT,
	enhanced_path TEXT,
	created_at TEXT
	)"""
	)
	conn.commit()
	conn.close()

	def _save_history(self, prompt, image_path, audio_path, output_path, enhanced_path=None):
	conn = sqlite3.connect(self.db_path)
	c = conn.cursor()
	c.execute(
	"INSERT INTO history (prompt, image_path, audio_path, output_path, enhanced_path, created_at) VALUES (?, ?, ?, ?, ?, ?)",
	(prompt, image_path, audio_path, output_path, enhanced_path, datetime.now().isoformat()),
	)
	conn.commit()
	conn.close()

	# ---------------- Frame + Audio Generation ----------------
	def _generate_frames_from_text(self, prompt, num_frames=16, resolution=(512, 512)):
	"""
	Generate creative video frames with realistic scenes based on prompt.
	Simulates beings (humans, animals) and objects with motion and detail.
	"""
	frames = []
	from PIL import ImageDraw, ImageFont
	import math
	import random

	# Parse prompt for content type
	prompt_lower = prompt.lower()

	# Detect subjects
	has_human = any(word in prompt_lower for word in ["person", "man", "woman", "human", "people", "walking", "running"])
	has_animal = any(word in prompt_lower for word in ["dog", "cat", "bird", "animal", "flying", "swimming"])
	has_nature = any(word in prompt_lower for word in ["tree", "forest", "mountain", "sky", "cloud", "sunset", "sunrise"])
	has_city = any(word in prompt_lower for word in ["city", "building", "car", "street", "urban", "skyline"])

	# Scene setup
	scene_type = "abstract"
	if has_human:
	scene_type = "human"
	elif has_animal:
	scene_type = "animal"
	elif has_nature:
	scene_type = "nature"
	elif has_city:
	scene_type = "city"

	for i in range(num_frames):
	img = Image.new("RGB", resolution, (0, 0, 0))
	draw = ImageDraw.Draw(img)
	progress = i / max(1, num_frames - 1)

	# Dynamic background based on scene
	if scene_type == "nature":
	# Sky gradient
	for y in range(resolution[1]):
	sky_progress = y / resolution[1]
	r = int(135 + 50 * sky_progress)
	g = int(206 - 50 * sky_progress)
	b = int(235 - 30 * sky_progress)
	draw.line([(0, y), (resolution[0], y)], fill=(r, g, b))

	# Ground
	ground_y = int(resolution[1] * 0.7)
	draw.rectangle([0, ground_y, resolution[0], resolution[1]], fill=(34, 139, 34))

	# Trees
	for tree_x in range(50, resolution[0], 100):
	trunk_x = tree_x + int(20 * math.sin(progress * 2 * math.pi))
	draw.rectangle([trunk_x, ground_y-60, trunk_x+20, ground_y], fill=(101, 67, 33))
	draw.ellipse([trunk_x-30, ground_y-100, trunk_x+50, ground_y-40], fill=(0, 128, 0))

	# Clouds
	for cloud_x in range(100, resolution[0], 150):
	cloud_offset = int(progress * 50)
	cx = (cloud_x + cloud_offset) % resolution[0]
	cy = 80 + int(10 * math.sin(progress * math.pi))
	draw.ellipse([cx-40, cy-20, cx+40, cy+20], fill=(255, 255, 255))
	draw.ellipse([cx-20, cy-15, cx+60, cy+25], fill=(255, 255, 255))

	elif scene_type == "city":
	# Sky
	for y in range(resolution[1] // 2):
	sky_val = int(100 + 100 * (y / (resolution[1] // 2)))
	draw.line([(0, y), (resolution[0], y)], fill=(sky_val, sky_val, sky_val + 50))

	# Buildings
	for bldg_x in range(0, resolution[0], 80):
	height = random.randint(150, 300)
	y_start = resolution[1] - height
	draw.rectangle([bldg_x, y_start, bldg_x+70, resolution[1]],
	fill=(random.randint(100, 150), random.randint(100, 150), random.randint(100, 150)))
	for win_y in range(y_start + 20, resolution[1], 30):
	for win_x in range(bldg_x + 10, bldg_x + 60, 20):
	light = random.choice([True, False])
	color = (255, 255, 200) if light else (50, 50, 50)
	draw.rectangle([win_x, win_y, win_x+10, win_y+15], fill=color)

	# Moving car
	car_x = int(progress * resolution[0])
	car_y = resolution[1] - 40
	draw.rectangle([car_x, car_y, car_x+60, car_y+25], fill=(255, 0, 0))
	draw.ellipse([car_x+10, car_y+20, car_x+25, car_y+35], fill=(0, 0, 0))
	draw.ellipse([car_x+45, car_y+20, car_x+60, car_y+35], fill=(0, 0, 0))

	elif scene_type == "human":
	draw.rectangle([0, 0, resolution[0], resolution[1]], fill=(200, 220, 255))
	ground_y = int(resolution[1] * 0.75)
	draw.rectangle([0, ground_y, resolution[0], resolution[1]], fill=(150, 150, 150))

	person_x = int(100 + progress * (resolution[0] - 200))
	person_y = ground_y - 100
	leg_offset = int(20 * math.sin(progress * 10))

	draw.ellipse([person_x+15, person_y, person_x+45, person_y+30], fill=(255, 220, 177))
	draw.rectangle([person_x+20, person_y+30, person_x+40, person_y+70], fill=(0, 0, 255))
	draw.line([(person_x+20, person_y+40), (person_x+5, person_y+60)], fill=(255, 220, 177), width=5)
	draw.line([(person_x+40, person_y+40), (person_x+55, person_y+60)], fill=(255, 220, 177), width=5)
	draw.line([(person_x+25, person_y+70), (person_x+20+leg_offset, ground_y)], fill=(0, 0, 139), width=5)
	draw.line([(person_x+35, person_y+70), (person_x+40-leg_offset, ground_y)], fill=(0, 0, 139), width=5)

	elif scene_type == "animal":
	for y in range(resolution[1]):
	val = int(180 + 50 * (y / resolution[1]))
	draw.line([(0, y), (resolution[0], y)], fill=(val, val-20, val-40))

	ground_y = int(resolution[1] * 0.8)
	draw.rectangle([0, ground_y, resolution[0], resolution[1]], fill=(139, 90, 43))

	animal_x = int(50 + progress * (resolution[0] - 150))
	animal_y = ground_y - 60

	draw.ellipse([animal_x, animal_y, animal_x+80, animal_y+40], fill=(139, 69, 19))
	draw.ellipse([animal_x+60, animal_y-20, animal_x+100, animal_y+20], fill=(139, 69, 19))
	draw.polygon([(animal_x+65, animal_y-20), (animal_x+70, animal_y-35), (animal_x+75, animal_y-20)], fill=(101, 67, 33))
	draw.polygon([(animal_x+85, animal_y-20), (animal_x+90, animal_y-35), (animal_x+95, animal_y-20)], fill=(101, 67, 33))

	leg_anim = int(5 * math.sin(progress * 15))
	for leg_x in [animal_x+10, animal_x+30, animal_x+50, animal_x+70]:
	draw.rectangle([leg_x, animal_y+40, leg_x+8, ground_y+leg_anim], fill=(101, 67, 33))

	tail_angle = 20 * math.sin(progress * 10)
	tail_end_x = animal_x - 20 + int(tail_angle)
	tail_end_y = animal_y + 10
	draw.line([(animal_x, animal_y+20), (tail_end_x, tail_end_y)], fill=(101, 67, 33), width=5)

	else:
	for y in range(resolution[1]):
	color_val = int(y / resolution[1] * 255)
	r = int(50 + 100 * progress + color_val // 3)
	g = int(100 + 80 * progress + color_val // 3)
	b = int(150 + 50 * progress + color_val // 3)
	draw.line([(0, y), (resolution[0], y)], fill=(min(255, r), min(255, g), min(255, b)))

	for j in range(5):
	x = int((j * 100 + progress * 200) % resolution[0])
	y = int(resolution[1] // 2 + 50 * math.sin(progress * 2 * math.pi + j))
	radius = 20 + int(10 * math.sin(progress * math.pi + j))
	color = (
	int(255 * abs(math.sin(progress * math.pi + j))),
	int(255 * abs(math.cos(progress * math.pi + j))),
	int(255 * abs(math.sin(progress * 2 * math.pi + j)))
	)
	draw.ellipse([x-radius, y-radius, x+radius, y+radius], fill=color)

	try:
	font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 32)
	except:
	font = ImageFont.load_default()

	text = prompt[:40] if len(prompt) <= 40 else prompt[:37] + "..."
	bbox = draw.textbbox((0, 0), text, font=font)
	text_width = bbox[2] - bbox[0]
	x = (resolution[0] - text_width) // 2
	y = 20

	for offset in [(-1,-1), (-1,1), (1,-1), (1,1)]:
	draw.text((x+offset[0], y+offset[1]), text, font=font, fill=(0, 0, 0))
	draw.text((x, y), text, font=font, fill=(255, 255, 255))

	frames.append(img)

	return frames

	def _combine_frames_to_video(self, frames, out_path, fps=8):
	if not FFMPEG_AVAILABLE:
	raise RuntimeError("ffmpeg-python not available")

	# ensure a string dir is passed to mkdtemp
	tmp_dir = tempfile.mkdtemp(dir=str(self.frames_dir))
	try:
	for i, frame in enumerate(frames):
	frame_path = os.path.join(tmp_dir, f"frame_{i:03d}.png")
	frame.save(frame_path)

	(
	ffmpeg
	.input(os.path.join(tmp_dir, "frame_%03d.png"), framerate=fps)
	.output(out_path, vcodec='libx264', pix_fmt='yuv420p')
	.overwrite_output()
	.run(quiet=True, capture_stdout=True, capture_stderr=True)
	)
	except Exception as e:
	logger.error(f"Failed to combine frames: {e}")
	raise
	finally:
	# Cleanup temp directory
	try:
	shutil.rmtree(tmp_dir, ignore_errors=True)
	except Exception:
	pass

	return out_path

	def _synthesize_audio(self, text, out_path):
	"""Synthesize audio with better quality and error handling"""
	if not TTS_AVAILABLE:
	logger.warning("TTS not available, creating audio with beeps")
	duration = max(2.0, len(text.split()) * 0.5)

	# Try ffmpeg with tone generation
	if shutil.which("ffmpeg"):
	try:
	subprocess.run([
	"ffmpeg", "-f", "lavfi",
	"-i", f"sine=frequency=440:duration={duration}",
	"-ar", "22050",
	"-y", out_path
	], capture_output=True, check=True, timeout=30)
	logger.info(f"Created tone audio at {out_path}")
	return out_path
	except Exception as e:
	logger.warning(f"ffmpeg tone generation failed: {e}")

	# Fallback to silent WAV
	try:
	sample_rate = 22050
	n_channels = 1
	sampwidth = 2
	n_frames = int(duration * sample_rate)
	with wave.open(out_path, "wb") as wf:
	wf.setnchannels(n_channels)
	wf.setsampwidth(sampwidth)
	wf.setframerate(sample_rate)
	wf.writeframes(b'\x00\x00' * n_frames)
	logger.info(f"Created silent WAV at {out_path}")
	return out_path
	except Exception as e:
	logger.error(f"Failed to create audio: {e}")
	raise

	try:
	# Use TTS with better error handling
	logger.info("Synthesizing audio with TTS...")
	tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
	tts.tts_to_file(text=text, file_path=out_path)
	logger.info(f"TTS synthesis successful: {out_path}")
	except Exception as e:
	logger.error(f"TTS synthesis failed: {e}, falling back to tone")
	# Fallback to tone
	if shutil.which("ffmpeg"):
	duration = max(2.0, len(text.split()) * 0.5)
	subprocess.run([
	"ffmpeg", "-f", "lavfi",
	"-i", f"sine=frequency=440:duration={duration}",
	"-ar", "22050",
	"-y", out_path
	], capture_output=True, check=True, timeout=30)

	return out_path

	def _merge_audio_video(self, video_path, audio_path, out_path):
	"""Merge audio and video with better error handling"""
	if not FFMPEG_AVAILABLE:
	raise RuntimeError("ffmpeg-python not available")

	try:
	logger.info(f"Merging video {video_path} with audio {audio_path}")
	video_in = ffmpeg.input(video_path)
	audio_in = ffmpeg.input(audio_path)
	(
	ffmpeg
	.output(video_in, audio_in, out_path,
	vcodec='libx264',
	acodec='aac',
	audio_bitrate='128k',
	shortest=None,
	**{'b:v': '2M'})
	.overwrite_output()
	.run(capture_stdout=True, capture_stderr=True)
	)
	logger.info(f"Successfully merged video and audio into {out_path}")
	except ffmpeg.Error as e:
	logger.error(f"FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}")
	raise
	except Exception as e:
	logger.error(f"Failed to merge audio and video: {e}")
	raise

	return out_path



	# ---------------- Enhancement Modules ----------------
	def enhance_video(self, input_path, scale=2, smooth=True):
	"""
	Uses Real-ESRGAN and RIFE (local binaries) for upscale and motion smoothing.
	Requires realesrgan-ncnn-vulkan and rife-ncnn-vulkan in workdir.
	"""
	realesrgan_bin = str(self.workdir / "realesrgan" / "realesrgan-ncnn-vulkan")
	rife_bin = str(self.workdir / "rife" / "rife-ncnn-vulkan")

	# Add .exe extension on Windows
	if os.name == "nt":
	realesrgan_bin += ".exe"
	rife_bin += ".exe"

	input_p = Path(input_path)
	upscaled = str(input_p.with_name(input_p.stem + "_upscaled" + input_p.suffix))
	smoothed = str(input_p.with_name(input_p.stem + "_smoothed" + input_p.suffix))

	if os.path.exists(realesrgan_bin):
	try:
	result = subprocess.run(
	[realesrgan_bin, "-i", input_path, "-o", upscaled, "-s", str(scale)],
	capture_output=True, timeout=300
	)
	if result.returncode != 0:
	logger.warning(f"ESRGAN failed: {result.stderr.decode(errors='ignore')}")
	upscaled = input_path
	except Exception as e:
	logger.warning(f"ESRGAN enhancement failed: {e}")
	upscaled = input_path
	else:
	logger.info("ESRGAN binary not found, skipping upscaling")
	upscaled = input_path

	if smooth and os.path.exists(rife_bin):
	try:
	result = subprocess.run(
	[rife_bin, "-i", upscaled, "-o", smoothed],
	capture_output=True, timeout=300
	)
	if result.returncode != 0:
	logger.warning(f"RIFE failed: {result.stderr.decode(errors='ignore')}")
	return upscaled
	return smoothed
	except Exception as e:
	logger.warning(f"RIFE smoothing failed: {e}")
	return upscaled
	else:
	logger.info("RIFE binary not found or smoothing disabled")

	return upscaled

	# ---------------- Path -> URL mapping ----------------
	def _fs_path_to_public_url(self, fs_path: Optional[str]) -> Optional[str]:
	"""
	Convert an absolute filesystem path under self.workdir into a public URL
	using self.public_base_url. If the path is not under workdir, returns None.
	"""
	if not fs_path:
	return None
	try:
	p = Path(fs_path).resolve()
	rel = p.relative_to(self.workdir)
	# Use POSIX-style path for URL
	return f"{self.public_base_url}/{rel.as_posix()}"
	except Exception:
	# If the file is not inside the workdir, we can't map it safely.
	return None

	# ---------------- Core Generator ----------------
	def generate(
	self,
	prompt: str,
	image_path: str = None,
	audio_path: str = None,
	output_name: str = None,
	num_frames: int = 16,
	fps: int = 8,
	enhance=True,
	duration_minutes: float = None,
	):
	"""Generate video with support for longer durations up to 10 minutes"""
	if duration_minutes:
	duration_minutes = min(duration_minutes, 10)
	num_frames = int(duration_minutes * 60 * fps)
	logger.info(f"Generating {duration_minutes} minute video with {num_frames} frames at {fps} fps")
	try:
	if not output_name:
	output_name = f"video_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
	output_name_safe = Path(output_name).name
	output_path = str(self.output_dir / output_name_safe)

	frames = self._generate_frames_from_text(prompt, num_frames)
	raw_video = self._combine_frames_to_video(frames, out_path=output_path, fps=fps)

	if not audio_path:
	audio_out = str(self.audio_dir / f"{Path(output_name_safe).stem}.wav")
	self._synthesize_audio(prompt, audio_out)
	audio_path = audio_out

	final_out = str(self.output_dir / f"final_{Path(output_name_safe).name}")
	self._merge_audio_video(raw_video, audio_path, final_out)

	enhanced_path = None
	if enhance:
	try:
	enhanced_path = self.enhance_video(final_out)
	except Exception as e:
	logger.warning(f"Enhancement step failed: {e}")
	enhanced_path = None

	# Save history with filesystem paths
	self._save_history(prompt, image_path, audio_path, final_out, enhanced_path)

	# Convert filesystem paths to public URLs when possible
	video_url = self._fs_path_to_public_url(final_out) or ""
	enhanced_url = self._fs_path_to_public_url(enhanced_path) or ""
	audio_url = self._fs_path_to_public_url(audio_path) or ""

	return {
	"video": final_out or "",
	"video_url": video_url,
	"enhanced": enhanced_path or "",
	"enhanced_url": enhanced_url,
	"audio": audio_path or "",
	"audio_url": audio_url,
	"frames": len(frames),
	"status": "success"
	}

	except Exception as e:
	logger.error(f"Video generation failed: {e}")
	# ✅ Always return valid keys even when failing
	return {
	"video": "",
	"video_url": "",
	"enhanced": "",
	"enhanced_url": "",
	"audio": "",
	"audio_url": "",
	"frames": 0,
	"status": "error",
	"error": str(e)
	}

	# ---------------- Utility ----------------
	def get_history(self, limit=20) -> List[Dict[str, Any]]:
	"""Get video generation history"""
	try:
	conn = sqlite3.connect(self.db_path)
	c = conn.cursor()
	c.execute("SELECT * FROM history ORDER BY id DESC LIMIT ?", (limit,))
	rows = c.fetchall()
	conn.close()

	# Convert to dict format
	history = []
	for row in rows:
	history.append({
	"id": row[0],
	"prompt": row[1],
	"image_path": row[2],
	"audio_path": row[3],
	"output_path": row[4],
	"enhanced_path": row[5],
	"created_at": row[6]
	})
	return history
	except Exception as e:
	logger.error(f"Failed to get history: {e}")
	return []

	def get_status(self) -> Dict[str, Any]:
	"""Get generator status and capabilities"""
	return {
	"torch_available": TORCH_AVAILABLE,
	"pil_available": PIL_AVAILABLE,
	"ffmpeg_available": FFMPEG_AVAILABLE,
	"tts_available": TTS_AVAILABLE,
	"device": self.device,
	"workdir": str(self.workdir),
	"output_dir": str(self.output_dir),
	"public_base_url": self.public_base_url
	}


	if __name__ == "__main__":
	# Basic demo run
	vg = VideoGenerator()
	result = vg.generate("A bright futuristic city skyline with flying cars and neon lights.")
	print("Generated:", result)#!/usr/bin/env python3
	import os
	import tempfile
	import subprocess
	import sqlite3
	from datetime import datetime
	from pathlib import Path
	from typing import Optional, Dict, Any, List
	import logging
	import wave
	import shutil

	logger = logging.getLogger("videogenerator")

	try:
	import torch
	TORCH_AVAILABLE = True
	except Exception:
	TORCH_AVAILABLE = False
	torch = None

	try:
	from PIL import Image
	PIL_AVAILABLE = True
	except Exception:
	PIL_AVAILABLE = False

	try:
	import ffmpeg
	FFMPEG_AVAILABLE = True
	except Exception:
	FFMPEG_AVAILABLE = False

	try:
	from TTS.api import TTS
	TTS_AVAILABLE = True
	except Exception:
	TTS_AVAILABLE = False


	class VideoGenerator:
	"""
	Offline text-to-video generator with local enhancement (ESRGAN + RIFE).
	No external API or internet required.
	IMPORTANT: If you want the returned URLs to be reachable by your frontend,
	configure your web server or app to serve the `workdir` directory under the
	public URL prefix defined by the environment variable VIDEO_PUBLIC_URL
	(default: /static/video_sandbox). Example (FastAPI):
	app.mount("/static/video_sandbox", StaticFiles(directory="/tmp/video_sandbox"), name="videos")
	"""

	def __init__(self, workdir="video_sandbox", db_name="history.db"):
	# Use a writable directory in environment or /tmp
	base_dir = Path(os.getenv("VIDEO_SANDBOX_DIR", "/tmp/video_sandbox"))

	self.workdir = base_dir.resolve()
	self.output_dir = self.workdir / "output"
	self.frames_dir = self.workdir / "frames"
	self.audio_dir = self.workdir / "audio"
	self.db_path = self.workdir / db_name

	# Public base URL mapping (what frontend will use). No trailing slash.
	self.public_base_url = os.getenv("VIDEO_PUBLIC_URL", "/static/video_sandbox").rstrip("/")

	# Create directories safely
	for d in [self.workdir, self.output_dir, self.frames_dir, self.audio_dir]:
	d.mkdir(parents=True, exist_ok=True)

	self._init_db()
	self.device = "cuda" if (TORCH_AVAILABLE and torch and torch.cuda.is_available()) else "cpu"
	logger.info(f"VideoGenerator initialized with device: {self.device}, public_base_url: {self.public_base_url}")


	# ---------------- Database ----------------
	def _init_db(self):
	conn = sqlite3.connect(self.db_path)
	c = conn.cursor()
	c.execute(
	"""CREATE TABLE IF NOT EXISTS history (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	prompt TEXT,
	image_path TEXT,
	audio_path TEXT,
	output_path TEXT,
	enhanced_path TEXT,
	created_at TEXT
	)"""
	)
	conn.commit()
	conn.close()

	def _save_history(self, prompt, image_path, audio_path, output_path, enhanced_path=None):
	conn = sqlite3.connect(self.db_path)
	c = conn.cursor()
	c.execute(
	"INSERT INTO history (prompt, image_path, audio_path, output_path, enhanced_path, created_at) VALUES (?, ?, ?, ?, ?, ?)",
	(prompt, image_path, audio_path, output_path, enhanced_path, datetime.now().isoformat()),
	)
	conn.commit()
	conn.close()

	# ---------------- Frame + Audio Generation ----------------
	def _generate_frames_from_text(self, prompt, num_frames=16, resolution=(512, 512)):
	"""
	Generate creative video frames with realistic scenes based on prompt.
	Simulates beings (humans, animals) and objects with motion and detail.
	"""
	frames = []
	from PIL import ImageDraw, ImageFont
	import math
	import random

	# Parse prompt for content type
	prompt_lower = prompt.lower()

	# Detect subjects
	has_human = any(word in prompt_lower for word in ["person", "man", "woman", "human", "people", "walking", "running"])
	has_animal = any(word in prompt_lower for word in ["dog", "cat", "bird", "animal", "flying", "swimming"])
	has_nature = any(word in prompt_lower for word in ["tree", "forest", "mountain", "sky", "cloud", "sunset", "sunrise"])
	has_city = any(word in prompt_lower for word in ["city", "building", "car", "street", "urban", "skyline"])

	# Scene setup
	scene_type = "abstract"
	if has_human:
	scene_type = "human"
	elif has_animal:
	scene_type = "animal"
	elif has_nature:
	scene_type = "nature"
	elif has_city:
	scene_type = "city"

	for i in range(num_frames):
	img = Image.new("RGB", resolution, (0, 0, 0))
	draw = ImageDraw.Draw(img)
	progress = i / max(1, num_frames - 1)

	# Dynamic background based on scene
	if scene_type == "nature":
	# Sky gradient
	for y in range(resolution[1]):
	sky_progress = y / resolution[1]
	r = int(135 + 50 * sky_progress)
	g = int(206 - 50 * sky_progress)
	b = int(235 - 30 * sky_progress)
	draw.line([(0, y), (resolution[0], y)], fill=(r, g, b))

	# Ground
	ground_y = int(resolution[1] * 0.7)
	draw.rectangle([0, ground_y, resolution[0], resolution[1]], fill=(34, 139, 34))

	# Trees
	for tree_x in range(50, resolution[0], 100):
	trunk_x = tree_x + int(20 * math.sin(progress * 2 * math.pi))
	draw.rectangle([trunk_x, ground_y-60, trunk_x+20, ground_y], fill=(101, 67, 33))
	draw.ellipse([trunk_x-30, ground_y-100, trunk_x+50, ground_y-40], fill=(0, 128, 0))

	# Clouds
	for cloud_x in range(100, resolution[0], 150):
	cloud_offset = int(progress * 50)
	cx = (cloud_x + cloud_offset) % resolution[0]
	cy = 80 + int(10 * math.sin(progress * math.pi))
	draw.ellipse([cx-40, cy-20, cx+40, cy+20], fill=(255, 255, 255))
	draw.ellipse([cx-20, cy-15, cx+60, cy+25], fill=(255, 255, 255))

	elif scene_type == "city":
	# Sky
	for y in range(resolution[1] // 2):
	sky_val = int(100 + 100 * (y / (resolution[1] // 2)))
	draw.line([(0, y), (resolution[0], y)], fill=(sky_val, sky_val, sky_val + 50))

	# Buildings
	for bldg_x in range(0, resolution[0], 80):
	height = random.randint(150, 300)
	y_start = resolution[1] - height
	draw.rectangle([bldg_x, y_start, bldg_x+70, resolution[1]],
	fill=(random.randint(100, 150), random.randint(100, 150), random.randint(100, 150)))
	for win_y in range(y_start + 20, resolution[1], 30):
	for win_x in range(bldg_x + 10, bldg_x + 60, 20):
	light = random.choice([True, False])
	color = (255, 255, 200) if light else (50, 50, 50)
	draw.rectangle([win_x, win_y, win_x+10, win_y+15], fill=color)

	# Moving car
	car_x = int(progress * resolution[0])
	car_y = resolution[1] - 40
	draw.rectangle([car_x, car_y, car_x+60, car_y+25], fill=(255, 0, 0))
	draw.ellipse([car_x+10, car_y+20, car_x+25, car_y+35], fill=(0, 0, 0))
	draw.ellipse([car_x+45, car_y+20, car_x+60, car_y+35], fill=(0, 0, 0))

	elif scene_type == "human":
	draw.rectangle([0, 0, resolution[0], resolution[1]], fill=(200, 220, 255))
	ground_y = int(resolution[1] * 0.75)
	draw.rectangle([0, ground_y, resolution[0], resolution[1]], fill=(150, 150, 150))

	person_x = int(100 + progress * (resolution[0] - 200))
	person_y = ground_y - 100
	leg_offset = int(20 * math.sin(progress * 10))

	draw.ellipse([person_x+15, person_y, person_x+45, person_y+30], fill=(255, 220, 177))
	draw.rectangle([person_x+20, person_y+30, person_x+40, person_y+70], fill=(0, 0, 255))
	draw.line([(person_x+20, person_y+40), (person_x+5, person_y+60)], fill=(255, 220, 177), width=5)
	draw.line([(person_x+40, person_y+40), (person_x+55, person_y+60)], fill=(255, 220, 177), width=5)
	draw.line([(person_x+25, person_y+70), (person_x+20+leg_offset, ground_y)], fill=(0, 0, 139), width=5)
	draw.line([(person_x+35, person_y+70), (person_x+40-leg_offset, ground_y)], fill=(0, 0, 139), width=5)

	elif scene_type == "animal":
	for y in range(resolution[1]):
	val = int(180 + 50 * (y / resolution[1]))
	draw.line([(0, y), (resolution[0], y)], fill=(val, val-20, val-40))

	ground_y = int(resolution[1] * 0.8)
	draw.rectangle([0, ground_y, resolution[0], resolution[1]], fill=(139, 90, 43))

	animal_x = int(50 + progress * (resolution[0] - 150))
	animal_y = ground_y - 60

	draw.ellipse([animal_x, animal_y, animal_x+80, animal_y+40], fill=(139, 69, 19))
	draw.ellipse([animal_x+60, animal_y-20, animal_x+100, animal_y+20], fill=(139, 69, 19))
	draw.polygon([(animal_x+65, animal_y-20), (animal_x+70, animal_y-35), (animal_x+75, animal_y-20)], fill=(101, 67, 33))
	draw.polygon([(animal_x+85, animal_y-20), (animal_x+90, animal_y-35), (animal_x+95, animal_y-20)], fill=(101, 67, 33))

	leg_anim = int(5 * math.sin(progress * 15))
	for leg_x in [animal_x+10, animal_x+30, animal_x+50, animal_x+70]:
	draw.rectangle([leg_x, animal_y+40, leg_x+8, ground_y+leg_anim], fill=(101, 67, 33))

	tail_angle = 20 * math.sin(progress * 10)
	tail_end_x = animal_x - 20 + int(tail_angle)
	tail_end_y = animal_y + 10
	draw.line([(animal_x, animal_y+20), (tail_end_x, tail_end_y)], fill=(101, 67, 33), width=5)

	else:
	for y in range(resolution[1]):
	color_val = int(y / resolution[1] * 255)
	r = int(50 + 100 * progress + color_val // 3)
	g = int(100 + 80 * progress + color_val // 3)
	b = int(150 + 50 * progress + color_val // 3)
	draw.line([(0, y), (resolution[0], y)], fill=(min(255, r), min(255, g), min(255, b)))

	for j in range(5):
	x = int((j * 100 + progress * 200) % resolution[0])
	y = int(resolution[1] // 2 + 50 * math.sin(progress * 2 * math.pi + j))
	radius = 20 + int(10 * math.sin(progress * math.pi + j))
	color = (
	int(255 * abs(math.sin(progress * math.pi + j))),
	int(255 * abs(math.cos(progress * math.pi + j))),
	int(255 * abs(math.sin(progress * 2 * math.pi + j)))
	)
	draw.ellipse([x-radius, y-radius, x+radius, y+radius], fill=color)

	try:
	font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 32)
	except:
	font = ImageFont.load_default()

	text = prompt[:40] if len(prompt) <= 40 else prompt[:37] + "..."
	bbox = draw.textbbox((0, 0), text, font=font)
	text_width = bbox[2] - bbox[0]
	x = (resolution[0] - text_width) // 2
	y = 20

	for offset in [(-1,-1), (-1,1), (1,-1), (1,1)]:
	draw.text((x+offset[0], y+offset[1]), text, font=font, fill=(0, 0, 0))
	draw.text((x, y), text, font=font, fill=(255, 255, 255))

	frames.append(img)

	return frames

	def _combine_frames_to_video(self, frames, out_path, fps=8):
	if not FFMPEG_AVAILABLE:
	raise RuntimeError("ffmpeg-python not available")

	# ensure a string dir is passed to mkdtemp
	tmp_dir = tempfile.mkdtemp(dir=str(self.frames_dir))
	try:
	for i, frame in enumerate(frames):
	frame_path = os.path.join(tmp_dir, f"frame_{i:03d}.png")
	frame.save(frame_path)

	(
	ffmpeg
	.input(os.path.join(tmp_dir, "frame_%03d.png"), framerate=fps)
	.output(out_path, vcodec='libx264', pix_fmt='yuv420p')
	.overwrite_output()
	.run(quiet=True, capture_stdout=True, capture_stderr=True)
	)
	except Exception as e:
	logger.error(f"Failed to combine frames: {e}")
	raise
	finally:
	# Cleanup temp directory
	try:
	shutil.rmtree(tmp_dir, ignore_errors=True)
	except Exception:
	pass

	return out_path

	def _synthesize_audio(self, text, out_path):
	"""Synthesize audio with better quality and error handling"""
	if not TTS_AVAILABLE:
	logger.warning("TTS not available, creating audio with beeps")
	duration = max(2.0, len(text.split()) * 0.5)

	# Try ffmpeg with tone generation
	if shutil.which("ffmpeg"):
	try:
	subprocess.run([
	"ffmpeg", "-f", "lavfi",
	"-i", f"sine=frequency=440:duration={duration}",
	"-ar", "22050",
	"-y", out_path
	], capture_output=True, check=True, timeout=30)
	logger.info(f"Created tone audio at {out_path}")
	return out_path
	except Exception as e:
	logger.warning(f"ffmpeg tone generation failed: {e}")

	# Fallback to silent WAV
	try:
	sample_rate = 22050
	n_channels = 1
	sampwidth = 2
	n_frames = int(duration * sample_rate)
	with wave.open(out_path, "wb") as wf:
	wf.setnchannels(n_channels)
	wf.setsampwidth(sampwidth)
	wf.setframerate(sample_rate)
	wf.writeframes(b'\x00\x00' * n_frames)
	logger.info(f"Created silent WAV at {out_path}")
	return out_path
	except Exception as e:
	logger.error(f"Failed to create audio: {e}")
	raise

	try:
	# Use TTS with better error handling
	logger.info("Synthesizing audio with TTS...")
	tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
	tts.tts_to_file(text=text, file_path=out_path)
	logger.info(f"TTS synthesis successful: {out_path}")
	except Exception as e:
	logger.error(f"TTS synthesis failed: {e}, falling back to tone")
	# Fallback to tone
	if shutil.which("ffmpeg"):
	duration = max(2.0, len(text.split()) * 0.5)
	subprocess.run([
	"ffmpeg", "-f", "lavfi",
	"-i", f"sine=frequency=440:duration={duration}",
	"-ar", "22050",
	"-y", out_path
	], capture_output=True, check=True, timeout=30)

	return out_path

	def _merge_audio_video(self, video_path, audio_path, out_path):
	"""Merge audio and video with better error handling"""
	if not FFMPEG_AVAILABLE:
	raise RuntimeError("ffmpeg-python not available")

	try:
	logger.info(f"Merging video {video_path} with audio {audio_path}")
	video_in = ffmpeg.input(video_path)
	audio_in = ffmpeg.input(audio_path)
	(
	ffmpeg
	.output(video_in, audio_in, out_path,
	vcodec='libx264',
	acodec='aac',
	audio_bitrate='128k',
	shortest=None,
	**{'b:v': '2M'})
	.overwrite_output()
	.run(capture_stdout=True, capture_stderr=True)
	)
	logger.info(f"Successfully merged video and audio into {out_path}")
	except ffmpeg.Error as e:
	logger.error(f"FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}")
	raise
	except Exception as e:
	logger.error(f"Failed to merge audio and video: {e}")
	raise

	return out_path



	# ---------------- Enhancement Modules ----------------
	def enhance_video(self, input_path, scale=2, smooth=True):
	"""
	Uses Real-ESRGAN and RIFE (local binaries) for upscale and motion smoothing.
	Requires realesrgan-ncnn-vulkan and rife-ncnn-vulkan in workdir.
	"""
	realesrgan_bin = str(self.workdir / "realesrgan" / "realesrgan-ncnn-vulkan")
	rife_bin = str(self.workdir / "rife" / "rife-ncnn-vulkan")

	# Add .exe extension on Windows
	if os.name == "nt":
	realesrgan_bin += ".exe"
	rife_bin += ".exe"

	input_p = Path(input_path)
	upscaled = str(input_p.with_name(input_p.stem + "_upscaled" + input_p.suffix))
	smoothed = str(input_p.with_name(input_p.stem + "_smoothed" + input_p.suffix))

	if os.path.exists(realesrgan_bin):
	try:
	result = subprocess.run(
	[realesrgan_bin, "-i", input_path, "-o", upscaled, "-s", str(scale)],
	capture_output=True, timeout=300
	)
	if result.returncode != 0:
	logger.warning(f"ESRGAN failed: {result.stderr.decode(errors='ignore')}")
	upscaled = input_path
	except Exception as e:
	logger.warning(f"ESRGAN enhancement failed: {e}")
	upscaled = input_path
	else:
	logger.info("ESRGAN binary not found, skipping upscaling")
	upscaled = input_path

	if smooth and os.path.exists(rife_bin):
	try:
	result = subprocess.run(
	[rife_bin, "-i", upscaled, "-o", smoothed],
	capture_output=True, timeout=300
	)
	if result.returncode != 0:
	logger.warning(f"RIFE failed: {result.stderr.decode(errors='ignore')}")
	return upscaled
	return smoothed
	except Exception as e:
	logger.warning(f"RIFE smoothing failed: {e}")
	return upscaled
	else:
	logger.info("RIFE binary not found or smoothing disabled")

	return upscaled

	# ---------------- Path -> URL mapping ----------------
	def _fs_path_to_public_url(self, fs_path: Optional[str]) -> Optional[str]:
	"""
	Convert an absolute filesystem path under self.workdir into a public URL
	using self.public_base_url. If the path is not under workdir, returns None.
	"""
	if not fs_path:
	return None
	try:
	p = Path(fs_path).resolve()
	rel = p.relative_to(self.workdir)
	# Use POSIX-style path for URL
	return f"{self.public_base_url}/{rel.as_posix()}"
	except Exception:
	# If the file is not inside the workdir, we can't map it safely.
	return None

	# ---------------- Core Generator ----------------
	def generate(
	self,
	prompt: str,
	image_path: str = None,
	audio_path: str = None,
	output_name: str = None,
	num_frames: int = 16,
	fps: int = 8,
	enhance=True,
	duration_minutes: float = None,
	):
	"""Generate video with support for longer durations up to 10 minutes"""
	if duration_minutes:
	duration_minutes = min(duration_minutes, 10)
	num_frames = int(duration_minutes * 60 * fps)
	logger.info(f"Generating {duration_minutes} minute video with {num_frames} frames at {fps} fps")
	try:
	if not output_name:
	output_name = f"video_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
	output_name_safe = Path(output_name).name
	output_path = str(self.output_dir / output_name_safe)

	frames = self._generate_frames_from_text(prompt, num_frames)
	raw_video = self._combine_frames_to_video(frames, out_path=output_path, fps=fps)

	if not audio_path:
	audio_out = str(self.audio_dir / f"{Path(output_name_safe).stem}.wav")
	self._synthesize_audio(prompt, audio_out)
	audio_path = audio_out

	final_out = str(self.output_dir / f"final_{Path(output_name_safe).name}")
	self._merge_audio_video(raw_video, audio_path, final_out)

	enhanced_path = None
	if enhance:
	try:
	enhanced_path = self.enhance_video(final_out)
	except Exception as e:
	logger.warning(f"Enhancement step failed: {e}")
	enhanced_path = None

	# Save history with filesystem paths
	self._save_history(prompt, image_path, audio_path, final_out, enhanced_path)

	# Convert filesystem paths to public URLs when possible
	video_url = self._fs_path_to_public_url(final_out) or ""
	enhanced_url = self._fs_path_to_public_url(enhanced_path) or ""
	audio_url = self._fs_path_to_public_url(audio_path) or ""

	return {
	"video": final_out or "",
	"video_url": video_url,
	"enhanced": enhanced_path or "",
	"enhanced_url": enhanced_url,
	"audio": audio_path or "",
	"audio_url": audio_url,
	"frames": len(frames),
	"status": "success"
	}

	except Exception as e:
	logger.error(f"Video generation failed: {e}")
	# ✅ Always return valid keys even when failing
	return {
	"video": "",
	"video_url": "",
	"enhanced": "",
	"enhanced_url": "",
	"audio": "",
	"audio_url": "",
	"frames": 0,
	"status": "error",
	"error": str(e)
	}

	# ---------------- Utility ----------------
	def get_history(self, limit=20) -> List[Dict[str, Any]]:
	"""Get video generation history"""
	try:
	conn = sqlite3.connect(self.db_path)
	c = conn.cursor()
	c.execute("SELECT * FROM history ORDER BY id DESC LIMIT ?", (limit,))
	rows = c.fetchall()
	conn.close()

	# Convert to dict format
	history = []
	for row in rows:
	history.append({
	"id": row[0],
	"prompt": row[1],
	"image_path": row[2],
	"audio_path": row[3],
	"output_path": row[4],
	"enhanced_path": row[5],
	"created_at": row[6]
	})
	return history
	except Exception as e:
	logger.error(f"Failed to get history: {e}")
	return []

	def get_status(self) -> Dict[str, Any]:
	"""Get generator status and capabilities"""
	return {
	"torch_available": TORCH_AVAILABLE,
	"pil_available": PIL_AVAILABLE,
	"ffmpeg_available": FFMPEG_AVAILABLE,
	"tts_available": TTS_AVAILABLE,
	"device": self.device,
	"workdir": str(self.workdir),
	"output_dir": str(self.output_dir),
	"public_base_url": self.public_base_url
	}


	if __name__ == "__main__":
	# Basic demo run
	vg = VideoGenerator()
	result = vg.generate("A bright futuristic city skyline with flying cars and neon lights.")
	print("Generated:", result)