Spaces:
Sleeping
Sleeping
| """ | |
| Image generation tools for visualizing song analysis results. | |
| """ | |
| import os | |
| from typing import Dict | |
| from loguru import logger | |
| from smolagents import Tool | |
| from api_utils import make_api_call_with_retry | |
| def caption_gen_tool(analysis_json: Dict, title: str, artist: str) -> str: | |
| """ | |
| Generate a descriptive caption for image generation based on song analysis. | |
| Uses LLM to create a high-quality image prompt based on the analysis. | |
| Args: | |
| analysis_json: Dictionary containing the song analysis results | |
| title: Song title (required) | |
| artist: Song artist | |
| Returns: | |
| A descriptive caption suitable for image generation | |
| """ | |
| logger.info("Generating image caption from analysis results") | |
| # Use the provided title and artist | |
| logger.info(f"Using song: '{title}' by '{artist}' for caption generation") | |
| mood = analysis_json.get("mood") or "emotional" | |
| themes = ", ".join(analysis_json.get("main_themes") or ["music"]) | |
| summary = analysis_json.get("summary") or "" | |
| conclusion = analysis_json.get("conclusion") or "" | |
| # Create an API prompt to generate a high-quality image caption | |
| prompt = f"""Generate a detailed, vivid, and artistic image generation prompt based on the following song analysis. | |
| This prompt will be used by an AI image generator to create a visual representation of the song's essence. | |
| Song: {title} by {artist} | |
| Mood: {mood} | |
| Themes: {themes} | |
| Summary: {summary[:200] if summary else ""} | |
| Conclusion: {conclusion[:200] if conclusion else ""} | |
| Your task is to create a single paragraph (approximately 100-150 words) that vividly describes a scene or abstract image | |
| that captures the emotional essence and themes of this song. The description should be detailed, visual, and evocative. | |
| DO NOT include any text, words, or lyrics in the image description. Focus on colors, composition, mood, symbols, and visuals only. | |
| ONLY output the final image generation prompt with no additional text, explanations, or formatting. | |
| """ | |
| # Use the same model as in lyrics analysis | |
| model_to_use = "openrouter/google/gemini-2.0-flash-lite-preview-02-05:free" | |
| logger.info("Using {} for caption generation", model_to_use) | |
| # Call the API to generate a caption | |
| logger.info("Generating image caption for song: '{}' by '{}'", title, artist) | |
| response_text = make_api_call_with_retry(model_to_use, prompt) | |
| # Clean up the response if needed | |
| caption = response_text.strip() | |
| logger.debug(f"Generated image caption: {caption[:100]}...") | |
| return caption | |
| class GenerateImageTool(Tool): | |
| """Tool for generating images based on song analysis""" | |
| name = "generate_image" | |
| description = "Generates an image based on the song analysis results" | |
| inputs = { | |
| "analysis_json": {"type": "any", "description": "JSON dictionary containing the analysis results"}, | |
| "title": {"type": "string", "description": "Title of the song"}, | |
| "artist": {"type": "string", "description": "Artist of the song"} | |
| } | |
| output_type = "string" | |
| def generate_with_gemini(self, caption: str) -> str: | |
| """ | |
| Generate image using Gemini API directly | |
| Args: | |
| caption: The prompt text for image generation | |
| Returns: | |
| HTML img tag with the image or error message | |
| """ | |
| try: | |
| # Правильный импорт библиотеки | |
| from google import genai | |
| from google.genai import types | |
| from io import BytesIO | |
| import base64 | |
| # Get API key from environment variable | |
| api_key = os.environ.get("GEMINI_API_KEY") | |
| if not api_key: | |
| logger.error("GEMINI_API_KEY not found in environment variables") | |
| return "<p>Error: Gemini API key not found. Please set the GEMINI_API_KEY environment variable.</p>" | |
| logger.info("Initializing Gemini client") | |
| # Новый способ настройки клиента | |
| client = genai.Client(api_key=api_key) | |
| logger.info("Generating image with Gemini") | |
| response = client.models.generate_content( | |
| model="gemini-2.0-flash-exp-image-generation", | |
| contents=caption, | |
| config=types.GenerateContentConfig( | |
| response_modalities=['Text', 'Image'] | |
| ) | |
| ) | |
| # Process the response | |
| for part in response.candidates[0].content.parts: | |
| if part.text is not None: | |
| logger.info(f"Gemini response text: {part.text[:100]}...") | |
| elif part.inline_data is not None: | |
| # Извлекаем данные изображения | |
| image_data = part.inline_data.data | |
| # Преобразуем в base64 для HTML, если нужно | |
| if isinstance(image_data, bytes): | |
| image_b64 = base64.b64encode(image_data).decode('utf-8') | |
| else: | |
| # Если данные уже в base64 | |
| image_b64 = image_data | |
| img_html = f'<img src="data:image/png;base64,{image_b64}" alt="Generated image based on song analysis" style="max-width:100%; border-radius:10px; box-shadow:0 4px 8px rgba(0,0,0,0.1);">' | |
| return img_html | |
| return "<p>Error: No image generated by Gemini API.</p>" | |
| except ImportError: | |
| logger.error("Google GenAI package not installed") | |
| return "<p>Error: Google GenAI package not installed. Install with 'pip install google-generativeai'</p>" | |
| except Exception as e: | |
| logger.error(f"Error generating image with Gemini: {str(e)}") | |
| return f"<p>Error generating image with Gemini: {str(e)}</p>" | |
| def forward(self, analysis_json: Dict, title: str, artist: str) -> str: | |
| """ | |
| Generates an image based on the analysis results using Gemini API. | |
| Args: | |
| analysis_json: Dictionary containing the analysis results | |
| title: Song title | |
| artist: Song artist (required) | |
| Returns: | |
| HTML img tag with the image or error message | |
| """ | |
| try: | |
| # Generate caption for the image | |
| caption = caption_gen_tool(analysis_json, title=title, artist=artist) | |
| logger.info("Caption generated successfully") | |
| logger.warning("OpenRouter failed, falling back to Gemini API") | |
| # Fall back to Gemini API | |
| result = self.generate_with_gemini(caption) | |
| return result | |
| except Exception as e: | |
| logger.error(f"Error in image generation: {str(e)}") | |
| return f"<p>Error in image generation: {str(e)}</p>" | |