Spaces:
Running
Running
Merge branch 'feature/text-to-music'
Browse files
app.py
CHANGED
|
@@ -146,6 +146,61 @@ def reap_old_videos(ttl_seconds: int = VIDEO_FILE_TTL_SECONDS) -> None:
|
|
| 146 |
# Temp dir might not exist or be accessible; ignore
|
| 147 |
pass
|
| 148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
TRANSFORMERS_JS_SYSTEM_PROMPT = """You are an expert web developer creating a transformers.js application. You will generate THREE separate files: index.html, index.js, and style.css.
|
| 150 |
|
| 151 |
IMPORTANT: You MUST output ALL THREE files in the following format:
|
|
@@ -1529,6 +1584,68 @@ def generate_video_from_text(prompt: str, session_id: Optional[str] = None) -> s
|
|
| 1529 |
print(f"Text-to-video generation error: {str(e)}")
|
| 1530 |
return f"Error generating video (text-to-video): {str(e)}"
|
| 1531 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1532 |
def extract_image_prompts_from_text(text: str, num_images_needed: int = 1) -> list:
|
| 1533 |
"""Extract image generation prompts from the full text based on number of images needed"""
|
| 1534 |
# Use the entire text as the base prompt for image generation
|
|
@@ -1821,6 +1938,53 @@ def create_video_replacement_blocks_text_to_video(html_content: str, prompt: str
|
|
| 1821 |
# If no <body>, just append
|
| 1822 |
return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
|
| 1823 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1824 |
def create_image_replacement_blocks_from_input_image(html_content: str, user_prompt: str, input_image_data, max_images: int = 1) -> str:
|
| 1825 |
"""Create search/replace blocks using image-to-image generation with a provided input image.
|
| 1826 |
|
|
@@ -1993,7 +2157,7 @@ def create_video_replacement_blocks_from_input_image(html_content: str, user_pro
|
|
| 1993 |
print("[Image2Video] No <body> tag; appending video via replacement block")
|
| 1994 |
return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
|
| 1995 |
|
| 1996 |
-
def
|
| 1997 |
"""Apply text-to-image and/or image-to-image replacements to HTML content.
|
| 1998 |
|
| 1999 |
If both toggles are enabled, text-to-image replacements run first, then image-to-image.
|
|
@@ -2002,7 +2166,7 @@ def apply_generated_images_to_html(html_content: str, user_prompt: str, enable_t
|
|
| 2002 |
try:
|
| 2003 |
print(
|
| 2004 |
f"[MediaApply] enable_i2v={enable_image_to_video}, enable_i2i={enable_image_to_image}, "
|
| 2005 |
-
f"enable_t2i={enable_text_to_image}, has_image={input_image_data is not None}"
|
| 2006 |
)
|
| 2007 |
# If image-to-video is enabled, replace the first image with a generated video and return.
|
| 2008 |
if enable_image_to_video and input_image_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
|
|
@@ -2040,6 +2204,18 @@ def apply_generated_images_to_html(html_content: str, user_prompt: str, enable_t
|
|
| 2040 |
print("[MediaApply] No t2v replacement blocks generated")
|
| 2041 |
return result
|
| 2042 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2043 |
# If an input image is provided and image-to-image is enabled, we only replace one image
|
| 2044 |
# and skip text-to-image to satisfy the requirement to replace exactly the number of uploaded images.
|
| 2045 |
if enable_image_to_image and input_image_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
|
|
@@ -2888,7 +3064,7 @@ The HTML code above contains the complete original website structure with all im
|
|
| 2888 |
stop_generation = False
|
| 2889 |
|
| 2890 |
|
| 2891 |
-
def generation_code(query: Optional[str], vlm_image: Optional[gr.Image], gen_image: Optional[gr.Image], file: Optional[str], website_url: Optional[str], _setting: Dict[str, str], _history: Optional[History], _current_model: Dict, enable_search: bool = False, language: str = "html", provider: str = "auto", enable_image_generation: bool = False, enable_image_to_image: bool = False, image_to_image_prompt: Optional[str] = None, text_to_image_prompt: Optional[str] = None, enable_image_to_video: bool = False, image_to_video_prompt: Optional[str] = None, enable_text_to_video: bool = False, text_to_video_prompt: Optional[str] = None):
|
| 2892 |
if query is None:
|
| 2893 |
query = ''
|
| 2894 |
if _history is None:
|
|
@@ -2928,7 +3104,9 @@ def generation_code(query: Optional[str], vlm_image: Optional[gr.Image], gen_ima
|
|
| 2928 |
# On each generate, reap old global files and cleanup previous session files
|
| 2929 |
try:
|
| 2930 |
cleanup_session_videos(session_id)
|
|
|
|
| 2931 |
reap_old_videos()
|
|
|
|
| 2932 |
except Exception:
|
| 2933 |
pass
|
| 2934 |
|
|
@@ -3028,9 +3206,9 @@ This will help me create a better design for you."""
|
|
| 3028 |
|
| 3029 |
clean_code = remove_code_block(content)
|
| 3030 |
|
| 3031 |
-
# Apply
|
| 3032 |
print("[Generate] Applying post-generation media to GLM-4.5 HTML output")
|
| 3033 |
-
final_content =
|
| 3034 |
content,
|
| 3035 |
query,
|
| 3036 |
enable_text_to_image=enable_image_generation,
|
|
@@ -3042,6 +3220,8 @@ This will help me create a better design for you."""
|
|
| 3042 |
session_id=session_id,
|
| 3043 |
enable_text_to_video=enable_text_to_video,
|
| 3044 |
text_to_video_prompt=text_to_video_prompt,
|
|
|
|
|
|
|
| 3045 |
)
|
| 3046 |
|
| 3047 |
_history.append([query, final_content])
|
|
@@ -3195,9 +3375,9 @@ This will help me create a better design for you."""
|
|
| 3195 |
modified_content = apply_search_replace_changes(last_content, clean_code)
|
| 3196 |
clean_content = remove_code_block(modified_content)
|
| 3197 |
|
| 3198 |
-
# Apply
|
| 3199 |
print("[Generate] Applying post-generation media to modified HTML content")
|
| 3200 |
-
clean_content =
|
| 3201 |
clean_content,
|
| 3202 |
query,
|
| 3203 |
enable_text_to_image=enable_image_generation,
|
|
@@ -3209,6 +3389,8 @@ This will help me create a better design for you."""
|
|
| 3209 |
session_id=session_id,
|
| 3210 |
enable_text_to_video=enable_text_to_video,
|
| 3211 |
text_to_video_prompt=text_to_video_prompt,
|
|
|
|
|
|
|
| 3212 |
)
|
| 3213 |
|
| 3214 |
yield {
|
|
@@ -3218,9 +3400,9 @@ This will help me create a better design for you."""
|
|
| 3218 |
history_output: history_to_chatbot_messages(_history),
|
| 3219 |
}
|
| 3220 |
else:
|
| 3221 |
-
# Apply
|
| 3222 |
print("[Generate] Applying post-generation media to new HTML content")
|
| 3223 |
-
final_content =
|
| 3224 |
clean_code,
|
| 3225 |
query,
|
| 3226 |
enable_text_to_image=enable_image_generation,
|
|
@@ -3233,6 +3415,8 @@ This will help me create a better design for you."""
|
|
| 3233 |
session_id=session_id,
|
| 3234 |
enable_text_to_video=enable_text_to_video,
|
| 3235 |
text_to_video_prompt=text_to_video_prompt,
|
|
|
|
|
|
|
| 3236 |
)
|
| 3237 |
|
| 3238 |
preview_val = None
|
|
@@ -3620,9 +3804,9 @@ This will help me create a better design for you."""
|
|
| 3620 |
modified_content = apply_search_replace_changes(last_content, final_code)
|
| 3621 |
clean_content = remove_code_block(modified_content)
|
| 3622 |
|
| 3623 |
-
# Apply
|
| 3624 |
print("[Generate] Applying post-generation media to follow-up HTML content")
|
| 3625 |
-
clean_content =
|
| 3626 |
clean_content,
|
| 3627 |
query,
|
| 3628 |
enable_text_to_image=enable_image_generation,
|
|
@@ -3635,6 +3819,8 @@ This will help me create a better design for you."""
|
|
| 3635 |
text_to_image_prompt=text_to_image_prompt,
|
| 3636 |
enable_text_to_video=enable_text_to_video,
|
| 3637 |
text_to_video_prompt=text_to_video_prompt,
|
|
|
|
|
|
|
| 3638 |
)
|
| 3639 |
|
| 3640 |
# Update history with the cleaned content
|
|
@@ -3649,9 +3835,9 @@ This will help me create a better design for you."""
|
|
| 3649 |
# Regular generation - use the content as is
|
| 3650 |
final_content = remove_code_block(content)
|
| 3651 |
|
| 3652 |
-
# Apply
|
| 3653 |
print("[Generate] Applying post-generation media to final HTML content")
|
| 3654 |
-
final_content =
|
| 3655 |
final_content,
|
| 3656 |
query,
|
| 3657 |
enable_text_to_image=enable_image_generation,
|
|
@@ -3664,6 +3850,8 @@ This will help me create a better design for you."""
|
|
| 3664 |
session_id=session_id,
|
| 3665 |
enable_text_to_video=enable_text_to_video,
|
| 3666 |
text_to_video_prompt=text_to_video_prompt,
|
|
|
|
|
|
|
| 3667 |
)
|
| 3668 |
|
| 3669 |
_history.append([query, final_content])
|
|
@@ -4858,6 +5046,20 @@ with gr.Blocks(
|
|
| 4858 |
visible=False
|
| 4859 |
)
|
| 4860 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4861 |
def on_image_to_image_toggle(toggled, beta_enabled):
|
| 4862 |
# Only show in classic mode (beta disabled)
|
| 4863 |
vis = bool(toggled) and not bool(beta_enabled)
|
|
@@ -4891,6 +5093,11 @@ with gr.Blocks(
|
|
| 4891 |
inputs=[text_to_video_toggle, beta_toggle],
|
| 4892 |
outputs=[text_to_video_prompt]
|
| 4893 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4894 |
model_dropdown = gr.Dropdown(
|
| 4895 |
choices=[model['name'] for model in AVAILABLE_MODELS],
|
| 4896 |
value=DEFAULT_MODEL_NAME,
|
|
@@ -5141,7 +5348,7 @@ with gr.Blocks(
|
|
| 5141 |
show_progress="hidden",
|
| 5142 |
).then(
|
| 5143 |
generation_code,
|
| 5144 |
-
inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt, text_to_video_toggle, text_to_video_prompt],
|
| 5145 |
outputs=[code_output, history, sandbox, history_output]
|
| 5146 |
).then(
|
| 5147 |
end_generation_ui,
|
|
@@ -5217,6 +5424,8 @@ with gr.Blocks(
|
|
| 5217 |
upd_t2v_prompt = gr.skip()
|
| 5218 |
upd_model_dropdown = gr.skip()
|
| 5219 |
upd_current_model = gr.skip()
|
|
|
|
|
|
|
| 5220 |
|
| 5221 |
# Split by comma to separate main prompt and directives
|
| 5222 |
segments = [seg.strip() for seg in (text or "").split(",") if seg.strip()]
|
|
@@ -5282,6 +5491,13 @@ with gr.Blocks(
|
|
| 5282 |
if p:
|
| 5283 |
upd_t2v_prompt = gr.update(value=p)
|
| 5284 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5285 |
# URL (website redesign)
|
| 5286 |
url = _extract_url(seg)
|
| 5287 |
if url:
|
|
@@ -5346,6 +5562,8 @@ with gr.Blocks(
|
|
| 5346 |
upd_t2v_prompt,
|
| 5347 |
upd_model_dropdown,
|
| 5348 |
upd_current_model,
|
|
|
|
|
|
|
| 5349 |
)
|
| 5350 |
|
| 5351 |
# Wire chat submit -> apply settings -> run generation
|
|
@@ -5371,6 +5589,8 @@ with gr.Blocks(
|
|
| 5371 |
text_to_video_prompt,
|
| 5372 |
model_dropdown,
|
| 5373 |
current_model,
|
|
|
|
|
|
|
| 5374 |
],
|
| 5375 |
queue=False,
|
| 5376 |
).then(
|
|
@@ -5380,7 +5600,7 @@ with gr.Blocks(
|
|
| 5380 |
show_progress="hidden",
|
| 5381 |
).then(
|
| 5382 |
generation_code,
|
| 5383 |
-
inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt, text_to_video_toggle, text_to_video_prompt],
|
| 5384 |
outputs=[code_output, history, sandbox, history_output]
|
| 5385 |
).then(
|
| 5386 |
end_generation_ui,
|
|
@@ -5397,12 +5617,13 @@ with gr.Blocks(
|
|
| 5397 |
)
|
| 5398 |
|
| 5399 |
# Toggle between classic controls and beta chat UI
|
| 5400 |
-
def toggle_beta(checked: bool, t2i: bool, i2i: bool, i2v: bool, t2v: bool):
|
| 5401 |
# Prompts only visible in classic mode and when their toggles are on
|
| 5402 |
t2i_vis = (not checked) and bool(t2i)
|
| 5403 |
i2i_vis = (not checked) and bool(i2i)
|
| 5404 |
i2v_vis = (not checked) and bool(i2v)
|
| 5405 |
t2v_vis = (not checked) and bool(t2v)
|
|
|
|
| 5406 |
|
| 5407 |
return (
|
| 5408 |
# Chat UI group
|
|
@@ -5426,6 +5647,8 @@ with gr.Blocks(
|
|
| 5426 |
gr.update(visible=i2v_vis), # image_to_video_prompt
|
| 5427 |
gr.update(visible=not checked), # text_to_video_toggle
|
| 5428 |
gr.update(visible=t2v_vis), # text_to_video_prompt
|
|
|
|
|
|
|
| 5429 |
gr.update(visible=not checked), # model_dropdown
|
| 5430 |
gr.update(visible=not checked), # quick_start_md
|
| 5431 |
gr.update(visible=not checked), # quick_examples_col
|
|
@@ -5433,7 +5656,7 @@ with gr.Blocks(
|
|
| 5433 |
|
| 5434 |
beta_toggle.change(
|
| 5435 |
toggle_beta,
|
| 5436 |
-
inputs=[beta_toggle, image_generation_toggle, image_to_image_toggle, image_to_video_toggle, text_to_video_toggle],
|
| 5437 |
outputs=[
|
| 5438 |
sidebar_chatbot,
|
| 5439 |
sidebar_msg,
|
|
@@ -5454,6 +5677,8 @@ with gr.Blocks(
|
|
| 5454 |
image_to_video_prompt,
|
| 5455 |
text_to_video_toggle,
|
| 5456 |
text_to_video_prompt,
|
|
|
|
|
|
|
| 5457 |
model_dropdown,
|
| 5458 |
quick_start_md,
|
| 5459 |
quick_examples_col,
|
|
|
|
| 146 |
# Temp dir might not exist or be accessible; ignore
|
| 147 |
pass
|
| 148 |
|
| 149 |
+
# ---------------------------------------------------------------------------
|
| 150 |
+
# Audio temp-file management (per-session tracking and cleanup)
|
| 151 |
+
# ---------------------------------------------------------------------------
|
| 152 |
+
AUDIO_TEMP_DIR = os.path.join(tempfile.gettempdir(), "anycoder_audio")
|
| 153 |
+
AUDIO_FILE_TTL_SECONDS = 6 * 60 * 60 # 6 hours
|
| 154 |
+
_SESSION_AUDIO_FILES: Dict[str, List[str]] = {}
|
| 155 |
+
_AUDIO_FILES_LOCK = threading.Lock()
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def _ensure_audio_dir_exists() -> None:
|
| 159 |
+
try:
|
| 160 |
+
os.makedirs(AUDIO_TEMP_DIR, exist_ok=True)
|
| 161 |
+
except Exception:
|
| 162 |
+
pass
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def _register_audio_for_session(session_id: Optional[str], file_path: str) -> None:
|
| 166 |
+
if not session_id or not file_path:
|
| 167 |
+
return
|
| 168 |
+
with _AUDIO_FILES_LOCK:
|
| 169 |
+
if session_id not in _SESSION_AUDIO_FILES:
|
| 170 |
+
_SESSION_AUDIO_FILES[session_id] = []
|
| 171 |
+
_SESSION_AUDIO_FILES[session_id].append(file_path)
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
def cleanup_session_audio(session_id: Optional[str]) -> None:
|
| 175 |
+
if not session_id:
|
| 176 |
+
return
|
| 177 |
+
with _AUDIO_FILES_LOCK:
|
| 178 |
+
file_list = _SESSION_AUDIO_FILES.pop(session_id, [])
|
| 179 |
+
for path in file_list:
|
| 180 |
+
try:
|
| 181 |
+
if path and os.path.exists(path):
|
| 182 |
+
os.unlink(path)
|
| 183 |
+
except Exception:
|
| 184 |
+
pass
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
def reap_old_audio(ttl_seconds: int = AUDIO_FILE_TTL_SECONDS) -> None:
|
| 188 |
+
try:
|
| 189 |
+
_ensure_audio_dir_exists()
|
| 190 |
+
now_ts = time.time()
|
| 191 |
+
for name in os.listdir(AUDIO_TEMP_DIR):
|
| 192 |
+
path = os.path.join(AUDIO_TEMP_DIR, name)
|
| 193 |
+
try:
|
| 194 |
+
if not os.path.isfile(path):
|
| 195 |
+
continue
|
| 196 |
+
mtime = os.path.getmtime(path)
|
| 197 |
+
if now_ts - mtime > ttl_seconds:
|
| 198 |
+
os.unlink(path)
|
| 199 |
+
except Exception:
|
| 200 |
+
pass
|
| 201 |
+
except Exception:
|
| 202 |
+
pass
|
| 203 |
+
|
| 204 |
TRANSFORMERS_JS_SYSTEM_PROMPT = """You are an expert web developer creating a transformers.js application. You will generate THREE separate files: index.html, index.js, and style.css.
|
| 205 |
|
| 206 |
IMPORTANT: You MUST output ALL THREE files in the following format:
|
|
|
|
| 1584 |
print(f"Text-to-video generation error: {str(e)}")
|
| 1585 |
return f"Error generating video (text-to-video): {str(e)}"
|
| 1586 |
|
| 1587 |
+
def generate_music_from_text(prompt: str, music_length_ms: int = 30000, session_id: Optional[str] = None) -> str:
|
| 1588 |
+
"""Generate music from a text prompt using ElevenLabs Music API and return an HTML <audio> tag.
|
| 1589 |
+
|
| 1590 |
+
Saves audio to a temp file and references it via file:// URL similar to videos.
|
| 1591 |
+
Requires ELEVENLABS_API_KEY in the environment.
|
| 1592 |
+
"""
|
| 1593 |
+
try:
|
| 1594 |
+
api_key = os.getenv('ELEVENLABS_API_KEY')
|
| 1595 |
+
if not api_key:
|
| 1596 |
+
return "Error: ELEVENLABS_API_KEY environment variable is not set."
|
| 1597 |
+
|
| 1598 |
+
headers = {
|
| 1599 |
+
'Content-Type': 'application/json',
|
| 1600 |
+
'xi-api-key': api_key,
|
| 1601 |
+
}
|
| 1602 |
+
payload = {
|
| 1603 |
+
'prompt': (prompt or 'Epic orchestral theme with soaring strings and powerful brass'),
|
| 1604 |
+
'music_length_ms': int(music_length_ms) if music_length_ms else 30000,
|
| 1605 |
+
}
|
| 1606 |
+
|
| 1607 |
+
resp = requests.post('https://api.elevenlabs.io/v1/music/compose', headers=headers, json=payload)
|
| 1608 |
+
try:
|
| 1609 |
+
resp.raise_for_status()
|
| 1610 |
+
except Exception as e:
|
| 1611 |
+
return f"Error generating music: {getattr(e, 'response', resp).text if hasattr(e, 'response') else resp.text}"
|
| 1612 |
+
|
| 1613 |
+
# Persist audio to temp file and return an <audio> element using file:// URL
|
| 1614 |
+
_ensure_audio_dir_exists()
|
| 1615 |
+
file_name = f"{uuid.uuid4()}.wav"
|
| 1616 |
+
file_path = os.path.join(AUDIO_TEMP_DIR, file_name)
|
| 1617 |
+
try:
|
| 1618 |
+
with open(file_path, 'wb') as f:
|
| 1619 |
+
f.write(resp.content)
|
| 1620 |
+
_register_audio_for_session(session_id, file_path)
|
| 1621 |
+
except Exception as save_exc:
|
| 1622 |
+
return f"Error generating music: could not save audio file ({save_exc})"
|
| 1623 |
+
|
| 1624 |
+
# Build file URI
|
| 1625 |
+
try:
|
| 1626 |
+
from pathlib import Path
|
| 1627 |
+
file_url = Path(file_path).as_uri()
|
| 1628 |
+
except Exception:
|
| 1629 |
+
if file_path.startswith('/'):
|
| 1630 |
+
file_url = f"file:///{file_path.lstrip('/')}"
|
| 1631 |
+
else:
|
| 1632 |
+
file_url = f"file:///{file_path}"
|
| 1633 |
+
|
| 1634 |
+
audio_html = (
|
| 1635 |
+
"<div class=\"anycoder-music\" style=\"max-width:420px;margin:16px auto;padding:12px 16px;border:1px solid #e5e7eb;border-radius:12px;background:linear-gradient(180deg,#fafafa,#f3f4f6);box-shadow:0 2px 8px rgba(0,0,0,0.06)\">"
|
| 1636 |
+
" <div style=\"font-size:13px;color:#374151;margin-bottom:8px;display:flex;align-items:center;gap:6px\">"
|
| 1637 |
+
" <span>π΅ Generated music</span>"
|
| 1638 |
+
" </div>"
|
| 1639 |
+
f" <audio controls autoplay loop style=\"width:100%;outline:none;\">"
|
| 1640 |
+
f" <source src=\"{file_url}\" type=\"audio/wav\" />"
|
| 1641 |
+
" Your browser does not support the audio element."
|
| 1642 |
+
" </audio>"
|
| 1643 |
+
"</div>"
|
| 1644 |
+
)
|
| 1645 |
+
return audio_html
|
| 1646 |
+
except Exception as e:
|
| 1647 |
+
return f"Error generating music: {str(e)}"
|
| 1648 |
+
|
| 1649 |
def extract_image_prompts_from_text(text: str, num_images_needed: int = 1) -> list:
|
| 1650 |
"""Extract image generation prompts from the full text based on number of images needed"""
|
| 1651 |
# Use the entire text as the base prompt for image generation
|
|
|
|
| 1938 |
# If no <body>, just append
|
| 1939 |
return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
|
| 1940 |
|
| 1941 |
+
def create_music_replacement_blocks_text_to_music(html_content: str, prompt: str, session_id: Optional[str] = None) -> str:
|
| 1942 |
+
"""Create search/replace blocks that insert ONE generated <audio> near the top of <body>.
|
| 1943 |
+
|
| 1944 |
+
Unlike images/videos which replace placeholders, music doesn't map to an <img> tag.
|
| 1945 |
+
We simply insert an <audio> player after the opening <body>.
|
| 1946 |
+
"""
|
| 1947 |
+
if not prompt or not prompt.strip():
|
| 1948 |
+
return ""
|
| 1949 |
+
|
| 1950 |
+
audio_html = generate_music_from_text(prompt, session_id=session_id)
|
| 1951 |
+
if audio_html.startswith("Error"):
|
| 1952 |
+
return ""
|
| 1953 |
+
|
| 1954 |
+
# Prefer inserting after the first <section>...</section> if present; else after <body>
|
| 1955 |
+
import re
|
| 1956 |
+
section_match = re.search(r"<section\b[\s\S]*?</section>", html_content, flags=re.IGNORECASE)
|
| 1957 |
+
if section_match:
|
| 1958 |
+
section_html = section_match.group(0)
|
| 1959 |
+
section_clean = re.sub(r"\s+", " ", section_html.strip())
|
| 1960 |
+
variations = [
|
| 1961 |
+
section_html,
|
| 1962 |
+
section_clean,
|
| 1963 |
+
section_clean.replace('"', "'"),
|
| 1964 |
+
section_clean.replace("'", '"'),
|
| 1965 |
+
re.sub(r"\s+", " ", section_clean),
|
| 1966 |
+
]
|
| 1967 |
+
blocks = []
|
| 1968 |
+
for v in variations:
|
| 1969 |
+
blocks.append(f"""{SEARCH_START}
|
| 1970 |
+
{v}
|
| 1971 |
+
{DIVIDER}
|
| 1972 |
+
{v}\n {audio_html}
|
| 1973 |
+
{REPLACE_END}""")
|
| 1974 |
+
return "\n\n".join(blocks)
|
| 1975 |
+
if '<body' in html_content:
|
| 1976 |
+
body_end = html_content.find('>', html_content.find('<body')) + 1
|
| 1977 |
+
insertion_point = html_content[:body_end] + '\n '
|
| 1978 |
+
return f"""{SEARCH_START}
|
| 1979 |
+
{insertion_point}
|
| 1980 |
+
{DIVIDER}
|
| 1981 |
+
{insertion_point}
|
| 1982 |
+
{audio_html}
|
| 1983 |
+
{REPLACE_END}"""
|
| 1984 |
+
|
| 1985 |
+
# If no <body>, just append
|
| 1986 |
+
return f"{SEARCH_START}\n\n{DIVIDER}\n{audio_html}\n{REPLACE_END}"
|
| 1987 |
+
|
| 1988 |
def create_image_replacement_blocks_from_input_image(html_content: str, user_prompt: str, input_image_data, max_images: int = 1) -> str:
|
| 1989 |
"""Create search/replace blocks using image-to-image generation with a provided input image.
|
| 1990 |
|
|
|
|
| 2157 |
print("[Image2Video] No <body> tag; appending video via replacement block")
|
| 2158 |
return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
|
| 2159 |
|
| 2160 |
+
def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_text_to_image: bool, enable_image_to_image: bool, input_image_data, image_to_image_prompt: str | None = None, text_to_image_prompt: str | None = None, enable_image_to_video: bool = False, image_to_video_prompt: str | None = None, session_id: Optional[str] = None, enable_text_to_video: bool = False, text_to_video_prompt: str | None = None, enable_text_to_music: bool = False, text_to_music_prompt: str | None = None) -> str:
|
| 2161 |
"""Apply text-to-image and/or image-to-image replacements to HTML content.
|
| 2162 |
|
| 2163 |
If both toggles are enabled, text-to-image replacements run first, then image-to-image.
|
|
|
|
| 2166 |
try:
|
| 2167 |
print(
|
| 2168 |
f"[MediaApply] enable_i2v={enable_image_to_video}, enable_i2i={enable_image_to_image}, "
|
| 2169 |
+
f"enable_t2i={enable_text_to_image}, enable_t2v={enable_text_to_video}, enable_t2m={enable_text_to_music}, has_image={input_image_data is not None}"
|
| 2170 |
)
|
| 2171 |
# If image-to-video is enabled, replace the first image with a generated video and return.
|
| 2172 |
if enable_image_to_video and input_image_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
|
|
|
|
| 2204 |
print("[MediaApply] No t2v replacement blocks generated")
|
| 2205 |
return result
|
| 2206 |
|
| 2207 |
+
# If text-to-music is enabled, insert a generated audio player near the top of body and return.
|
| 2208 |
+
if enable_text_to_music and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
|
| 2209 |
+
t2m_prompt = (text_to_music_prompt or user_prompt or "").strip()
|
| 2210 |
+
print(f"[MediaApply] Running text-to-music with prompt len={len(t2m_prompt)}")
|
| 2211 |
+
blocks_tm = create_music_replacement_blocks_text_to_music(result, t2m_prompt, session_id=session_id)
|
| 2212 |
+
if blocks_tm:
|
| 2213 |
+
print("[MediaApply] Applying text-to-music replacement blocks")
|
| 2214 |
+
result = apply_search_replace_changes(result, blocks_tm)
|
| 2215 |
+
else:
|
| 2216 |
+
print("[MediaApply] No t2m replacement blocks generated")
|
| 2217 |
+
return result
|
| 2218 |
+
|
| 2219 |
# If an input image is provided and image-to-image is enabled, we only replace one image
|
| 2220 |
# and skip text-to-image to satisfy the requirement to replace exactly the number of uploaded images.
|
| 2221 |
if enable_image_to_image and input_image_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
|
|
|
|
| 3064 |
stop_generation = False
|
| 3065 |
|
| 3066 |
|
| 3067 |
+
def generation_code(query: Optional[str], vlm_image: Optional[gr.Image], gen_image: Optional[gr.Image], file: Optional[str], website_url: Optional[str], _setting: Dict[str, str], _history: Optional[History], _current_model: Dict, enable_search: bool = False, language: str = "html", provider: str = "auto", enable_image_generation: bool = False, enable_image_to_image: bool = False, image_to_image_prompt: Optional[str] = None, text_to_image_prompt: Optional[str] = None, enable_image_to_video: bool = False, image_to_video_prompt: Optional[str] = None, enable_text_to_video: bool = False, text_to_video_prompt: Optional[str] = None, enable_text_to_music: bool = False, text_to_music_prompt: Optional[str] = None):
|
| 3068 |
if query is None:
|
| 3069 |
query = ''
|
| 3070 |
if _history is None:
|
|
|
|
| 3104 |
# On each generate, reap old global files and cleanup previous session files
|
| 3105 |
try:
|
| 3106 |
cleanup_session_videos(session_id)
|
| 3107 |
+
cleanup_session_audio(session_id)
|
| 3108 |
reap_old_videos()
|
| 3109 |
+
reap_old_audio()
|
| 3110 |
except Exception:
|
| 3111 |
pass
|
| 3112 |
|
|
|
|
| 3206 |
|
| 3207 |
clean_code = remove_code_block(content)
|
| 3208 |
|
| 3209 |
+
# Apply media generation (images/video/music)
|
| 3210 |
print("[Generate] Applying post-generation media to GLM-4.5 HTML output")
|
| 3211 |
+
final_content = apply_generated_media_to_html(
|
| 3212 |
content,
|
| 3213 |
query,
|
| 3214 |
enable_text_to_image=enable_image_generation,
|
|
|
|
| 3220 |
session_id=session_id,
|
| 3221 |
enable_text_to_video=enable_text_to_video,
|
| 3222 |
text_to_video_prompt=text_to_video_prompt,
|
| 3223 |
+
enable_text_to_music=enable_text_to_music,
|
| 3224 |
+
text_to_music_prompt=text_to_music_prompt,
|
| 3225 |
)
|
| 3226 |
|
| 3227 |
_history.append([query, final_content])
|
|
|
|
| 3375 |
modified_content = apply_search_replace_changes(last_content, clean_code)
|
| 3376 |
clean_content = remove_code_block(modified_content)
|
| 3377 |
|
| 3378 |
+
# Apply media generation (images/video/music)
|
| 3379 |
print("[Generate] Applying post-generation media to modified HTML content")
|
| 3380 |
+
clean_content = apply_generated_media_to_html(
|
| 3381 |
clean_content,
|
| 3382 |
query,
|
| 3383 |
enable_text_to_image=enable_image_generation,
|
|
|
|
| 3389 |
session_id=session_id,
|
| 3390 |
enable_text_to_video=enable_text_to_video,
|
| 3391 |
text_to_video_prompt=text_to_video_prompt,
|
| 3392 |
+
enable_text_to_music=enable_text_to_music,
|
| 3393 |
+
text_to_music_prompt=text_to_music_prompt,
|
| 3394 |
)
|
| 3395 |
|
| 3396 |
yield {
|
|
|
|
| 3400 |
history_output: history_to_chatbot_messages(_history),
|
| 3401 |
}
|
| 3402 |
else:
|
| 3403 |
+
# Apply media generation (images/video/music)
|
| 3404 |
print("[Generate] Applying post-generation media to new HTML content")
|
| 3405 |
+
final_content = apply_generated_media_to_html(
|
| 3406 |
clean_code,
|
| 3407 |
query,
|
| 3408 |
enable_text_to_image=enable_image_generation,
|
|
|
|
| 3415 |
session_id=session_id,
|
| 3416 |
enable_text_to_video=enable_text_to_video,
|
| 3417 |
text_to_video_prompt=text_to_video_prompt,
|
| 3418 |
+
enable_text_to_music=enable_text_to_music,
|
| 3419 |
+
text_to_music_prompt=text_to_music_prompt,
|
| 3420 |
)
|
| 3421 |
|
| 3422 |
preview_val = None
|
|
|
|
| 3804 |
modified_content = apply_search_replace_changes(last_content, final_code)
|
| 3805 |
clean_content = remove_code_block(modified_content)
|
| 3806 |
|
| 3807 |
+
# Apply media generation (images/video/music)
|
| 3808 |
print("[Generate] Applying post-generation media to follow-up HTML content")
|
| 3809 |
+
clean_content = apply_generated_media_to_html(
|
| 3810 |
clean_content,
|
| 3811 |
query,
|
| 3812 |
enable_text_to_image=enable_image_generation,
|
|
|
|
| 3819 |
text_to_image_prompt=text_to_image_prompt,
|
| 3820 |
enable_text_to_video=enable_text_to_video,
|
| 3821 |
text_to_video_prompt=text_to_video_prompt,
|
| 3822 |
+
enable_text_to_music=enable_text_to_music,
|
| 3823 |
+
text_to_music_prompt=text_to_music_prompt,
|
| 3824 |
)
|
| 3825 |
|
| 3826 |
# Update history with the cleaned content
|
|
|
|
| 3835 |
# Regular generation - use the content as is
|
| 3836 |
final_content = remove_code_block(content)
|
| 3837 |
|
| 3838 |
+
# Apply media generation (images/video/music)
|
| 3839 |
print("[Generate] Applying post-generation media to final HTML content")
|
| 3840 |
+
final_content = apply_generated_media_to_html(
|
| 3841 |
final_content,
|
| 3842 |
query,
|
| 3843 |
enable_text_to_image=enable_image_generation,
|
|
|
|
| 3850 |
session_id=session_id,
|
| 3851 |
enable_text_to_video=enable_text_to_video,
|
| 3852 |
text_to_video_prompt=text_to_video_prompt,
|
| 3853 |
+
enable_text_to_music=enable_text_to_music,
|
| 3854 |
+
text_to_music_prompt=text_to_music_prompt,
|
| 3855 |
)
|
| 3856 |
|
| 3857 |
_history.append([query, final_content])
|
|
|
|
| 5046 |
visible=False
|
| 5047 |
)
|
| 5048 |
|
| 5049 |
+
# Text-to-Music
|
| 5050 |
+
text_to_music_toggle = gr.Checkbox(
|
| 5051 |
+
label="π΅ Generate Music (text β music)",
|
| 5052 |
+
value=False,
|
| 5053 |
+
visible=True,
|
| 5054 |
+
info="Compose short music from your prompt using ElevenLabs Music"
|
| 5055 |
+
)
|
| 5056 |
+
text_to_music_prompt = gr.Textbox(
|
| 5057 |
+
label="Text-to-Music Prompt",
|
| 5058 |
+
placeholder="Describe the music to generate (e.g., 'Epic orchestral theme with soaring strings and powerful brass')",
|
| 5059 |
+
lines=2,
|
| 5060 |
+
visible=False
|
| 5061 |
+
)
|
| 5062 |
+
|
| 5063 |
def on_image_to_image_toggle(toggled, beta_enabled):
|
| 5064 |
# Only show in classic mode (beta disabled)
|
| 5065 |
vis = bool(toggled) and not bool(beta_enabled)
|
|
|
|
| 5093 |
inputs=[text_to_video_toggle, beta_toggle],
|
| 5094 |
outputs=[text_to_video_prompt]
|
| 5095 |
)
|
| 5096 |
+
text_to_music_toggle.change(
|
| 5097 |
+
on_text_to_image_toggle,
|
| 5098 |
+
inputs=[text_to_music_toggle, beta_toggle],
|
| 5099 |
+
outputs=[text_to_music_prompt]
|
| 5100 |
+
)
|
| 5101 |
model_dropdown = gr.Dropdown(
|
| 5102 |
choices=[model['name'] for model in AVAILABLE_MODELS],
|
| 5103 |
value=DEFAULT_MODEL_NAME,
|
|
|
|
| 5348 |
show_progress="hidden",
|
| 5349 |
).then(
|
| 5350 |
generation_code,
|
| 5351 |
+
inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt, text_to_video_toggle, text_to_video_prompt, text_to_music_toggle, text_to_music_prompt],
|
| 5352 |
outputs=[code_output, history, sandbox, history_output]
|
| 5353 |
).then(
|
| 5354 |
end_generation_ui,
|
|
|
|
| 5424 |
upd_t2v_prompt = gr.skip()
|
| 5425 |
upd_model_dropdown = gr.skip()
|
| 5426 |
upd_current_model = gr.skip()
|
| 5427 |
+
upd_t2m_toggle = gr.skip()
|
| 5428 |
+
upd_t2m_prompt = gr.skip()
|
| 5429 |
|
| 5430 |
# Split by comma to separate main prompt and directives
|
| 5431 |
segments = [seg.strip() for seg in (text or "").split(",") if seg.strip()]
|
|
|
|
| 5491 |
if p:
|
| 5492 |
upd_t2v_prompt = gr.update(value=p)
|
| 5493 |
|
| 5494 |
+
# Text-to-music
|
| 5495 |
+
if ("text to music" in seg_norm) or ("text-to-music" in seg_norm) or ("generate music" in seg_norm) or ("compose music" in seg_norm):
|
| 5496 |
+
upd_t2m_toggle = gr.update(value=True)
|
| 5497 |
+
p = after_colon(seg)
|
| 5498 |
+
if p:
|
| 5499 |
+
upd_t2m_prompt = gr.update(value=p)
|
| 5500 |
+
|
| 5501 |
# URL (website redesign)
|
| 5502 |
url = _extract_url(seg)
|
| 5503 |
if url:
|
|
|
|
| 5562 |
upd_t2v_prompt,
|
| 5563 |
upd_model_dropdown,
|
| 5564 |
upd_current_model,
|
| 5565 |
+
upd_t2m_toggle,
|
| 5566 |
+
upd_t2m_prompt,
|
| 5567 |
)
|
| 5568 |
|
| 5569 |
# Wire chat submit -> apply settings -> run generation
|
|
|
|
| 5589 |
text_to_video_prompt,
|
| 5590 |
model_dropdown,
|
| 5591 |
current_model,
|
| 5592 |
+
text_to_music_toggle,
|
| 5593 |
+
text_to_music_prompt,
|
| 5594 |
],
|
| 5595 |
queue=False,
|
| 5596 |
).then(
|
|
|
|
| 5600 |
show_progress="hidden",
|
| 5601 |
).then(
|
| 5602 |
generation_code,
|
| 5603 |
+
inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt, text_to_video_toggle, text_to_video_prompt, text_to_music_toggle, text_to_music_prompt],
|
| 5604 |
outputs=[code_output, history, sandbox, history_output]
|
| 5605 |
).then(
|
| 5606 |
end_generation_ui,
|
|
|
|
| 5617 |
)
|
| 5618 |
|
| 5619 |
# Toggle between classic controls and beta chat UI
|
| 5620 |
+
def toggle_beta(checked: bool, t2i: bool, i2i: bool, i2v: bool, t2v: bool, t2m: bool):
|
| 5621 |
# Prompts only visible in classic mode and when their toggles are on
|
| 5622 |
t2i_vis = (not checked) and bool(t2i)
|
| 5623 |
i2i_vis = (not checked) and bool(i2i)
|
| 5624 |
i2v_vis = (not checked) and bool(i2v)
|
| 5625 |
t2v_vis = (not checked) and bool(t2v)
|
| 5626 |
+
t2m_vis = (not checked) and bool(t2m)
|
| 5627 |
|
| 5628 |
return (
|
| 5629 |
# Chat UI group
|
|
|
|
| 5647 |
gr.update(visible=i2v_vis), # image_to_video_prompt
|
| 5648 |
gr.update(visible=not checked), # text_to_video_toggle
|
| 5649 |
gr.update(visible=t2v_vis), # text_to_video_prompt
|
| 5650 |
+
gr.update(visible=not checked), # text_to_music_toggle
|
| 5651 |
+
gr.update(visible=t2m_vis), # text_to_music_prompt
|
| 5652 |
gr.update(visible=not checked), # model_dropdown
|
| 5653 |
gr.update(visible=not checked), # quick_start_md
|
| 5654 |
gr.update(visible=not checked), # quick_examples_col
|
|
|
|
| 5656 |
|
| 5657 |
beta_toggle.change(
|
| 5658 |
toggle_beta,
|
| 5659 |
+
inputs=[beta_toggle, image_generation_toggle, image_to_image_toggle, image_to_video_toggle, text_to_video_toggle, text_to_music_toggle],
|
| 5660 |
outputs=[
|
| 5661 |
sidebar_chatbot,
|
| 5662 |
sidebar_msg,
|
|
|
|
| 5677 |
image_to_video_prompt,
|
| 5678 |
text_to_video_toggle,
|
| 5679 |
text_to_video_prompt,
|
| 5680 |
+
text_to_music_toggle,
|
| 5681 |
+
text_to_music_prompt,
|
| 5682 |
model_dropdown,
|
| 5683 |
quick_start_md,
|
| 5684 |
quick_examples_col,
|