Aduc_sdr

Paused

App Files Files Community

euiia commited on Sep 4

Commit

83aa216

verified ·

1 Parent(s): 61b63f7

Update managers/gemini_manager.py

Browse files

Files changed (1) hide show

managers/gemini_manager.py +40 -147

managers/gemini_manager.py CHANGED Viewed

@@ -2,12 +2,12 @@
 #
 # Copyright (C) August 4, 2025  Carlos Rodrigues dos Santos
 #
-# Version: 1.1.0
 #
-# This file defines the GeminiManager, a specialist responsible for all Natural
-# Language Processing, reasoning, and vision-language tasks. It acts as the
-# Scriptwriter, Editor, and Cinematic Director for the ADUC framework, generating
-# storyboards, prompts, and making creative decisions.
 import os
 import logging
@@ -17,6 +17,7 @@ import gradio as gr
 from PIL import Image
 import google.generativeai as genai
 import re
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
@@ -28,13 +29,11 @@ def robust_json_parser(raw_text: str) -> dict:
     """
     clean_text = raw_text.strip()
     try:
-        # Try to find JSON delimited by ```json ... ```
         match = re.search(r'```json\s*(\{.*?\})\s*```', clean_text, re.DOTALL)
         if match:
             json_str = match.group(1)
             return json.loads(json_str)
-        # If not found, try to find the first '{' and the last '}'
         start_index = clean_text.find('{')
         end_index = clean_text.rfind('}')
         if start_index != -1 and end_index != -1 and end_index > start_index:
@@ -48,169 +47,63 @@ def robust_json_parser(raw_text: str) -> dict:
 class GeminiManager:
     """
-    Manages interactions with the Google Gemini API, acting as the primary
-    reasoning and language specialist for the ADUC framework.
     """
     def __init__(self):
         self.api_key = os.environ.get("GEMINI_API_KEY")
         if self.api_key:
             genai.configure(api_key=self.api_key)
-            self.model = genai.GenerativeModel('gemini-2.5-pro')
-            logger.info("Gemini Specialist (1.5 Pro) initialized successfully.")
         else:
             self.model = None
-            logger.warning("Gemini API key not found. Specialist disabled.")
     def _check_model(self):
         """Raises an error if the Gemini API is not configured."""
         if not self.model:
             raise gr.Error("The Google Gemini API key is not configured (GEMINI_API_KEY).")
-    def _read_prompt_template(self, filename: str) -> str:
-        """Reads a prompt template file from the 'prompts' directory."""
-        try:
-            # Assuming the 'prompts' directory is in the root of the project
-            prompts_dir = Path(__file__).resolve().parent.parent / "prompts"
-            with open(prompts_dir / filename, "r", encoding="utf-8") as f:
-                return f.read()
-        except FileNotFoundError:
-            raise gr.Error(f"Prompt template file not found: prompts/{filename}")
-    def generate_storyboard(self, prompt: str, num_keyframes: int, ref_image_paths: list[str]) -> list[str]:
-        """Delegated task: Acts as a Scriptwriter to generate a storyboard."""
         self._check_model()
-        try:
-            template = self._read_prompt_template("unified_storyboard_prompt.txt")
-            storyboard_prompt = template.format(user_prompt=prompt, num_fragments=num_keyframes)
-            model_contents = [storyboard_prompt] + [Image.open(p) for p in ref_image_paths]
-            logger.info("Calling Gemini to generate storyboard...")
-            response = self.model.generate_content(model_contents)
-            logger.info(f"Gemini responded with (raw storyboard): {response.text}")
-            storyboard_data = robust_json_parser(response.text)
-            storyboard = storyboard_data.get("scene_storyboard", [])
-            if not storyboard or len(storyboard) != num_keyframes:
-                raise ValueError(f"Incorrect number of scenes generated. Expected {num_keyframes}, got {len(storyboard)}.")
-            return storyboard
-        except Exception as e:
-            raise gr.Error(f"The Scriptwriter (Gemini) failed: {e}")
-    def select_keyframes_from_pool(self, storyboard: list, base_image_paths: list[str], pool_image_paths: list[str]) -> list[str]:
-        """Delegated task: Acts as a Photographer/Editor to select keyframes."""
-        self._check_model()
-        if not pool_image_paths:
-            raise gr.Error("The 'image pool' (Additional Images) is empty.")
-        try:
-            template = self._read_prompt_template("keyframe_selection_prompt.txt")
-            image_map = {f"IMG-{i+1}": path for i, path in enumerate(pool_image_paths)}
-            base_image_map = {f"BASE-{i+1}": path for i, path in enumerate(base_image_paths)}
-            model_contents = ["# Reference Images (Story Base)"]
-            for identifier, path in base_image_map.items():
-                model_contents.extend([f"Identifier: {identifier}", Image.open(path)])
-            model_contents.append("\n# Image Pool (Scene Bank)")
-            for identifier, path in image_map.items():
-                model_contents.extend([f"Identifier: {identifier}", Image.open(path)])
-            storyboard_str = "\n".join([f"- Scene {i+1}: {s}" for i, s in enumerate(storyboard)])
-            selection_prompt = template.format(storyboard_str=storyboard_str, image_identifiers=list(image_map.keys()))
-            model_contents.append(selection_prompt)
-            logger.info("Calling Gemini to select keyframes from pool...")
-            response = self.model.generate_content(model_contents)
-            logger.info(f"Gemini responded with (raw keyframe selection): {response.text}")
-            selection_data = robust_json_parser(response.text)
-            selected_identifiers = selection_data.get("selected_image_identifiers", [])
-            if len(selected_identifiers) != len(storyboard):
-                raise ValueError("The AI did not select the correct number of images for the scenes.")
-            selected_paths = [image_map[identifier] for identifier in selected_identifiers]
-            return selected_paths
-        except Exception as e:
-            raise gr.Error(f"The Photographer (Gemini) failed to select images: {e}")
-    def get_anticipatory_keyframe_prompt(self, global_prompt: str, scene_history: str, current_scene_desc: str, future_scene_desc: str, last_image_path: str, fixed_ref_paths: list[str]) -> str:
-        """Delegated task: Acts as an Art Director to generate an image prompt."""
-        self._check_model()
         try:
-            template = self._read_prompt_template("anticipatory_keyframe_prompt.txt")
-            director_prompt = template.format(
-                historico_prompt=scene_history,
-                cena_atual=current_scene_desc,
-                cena_futura=future_scene_desc
-            )
-            model_contents = [
-                "# CONTEXT:",
-                f"- Global Story Goal: {global_prompt}",
-                "# VISUAL ASSETS:",
-                "Current Base Image [IMG-BASE]:",
-                Image.open(last_image_path)
-            ]
-            ref_counter = 1
-            for path in fixed_ref_paths:
-                if path != last_image_path:
-                    model_contents.extend([f"General Reference Image [IMG-REF-{ref_counter}]:", Image.open(path)])
-                    ref_counter += 1
-            model_contents.append(director_prompt)
-            logger.info("Calling Gemini to generate anticipatory keyframe prompt...")
-            response = self.model.generate_content(model_contents)
-            logger.info(f"Gemini responded with (raw keyframe prompt): {response.text}")
-            final_flux_prompt = response.text.strip().replace("`", "").replace("\"", "")
-            return final_flux_prompt
         except Exception as e:
-            raise gr.Error(f"The Art Director (Gemini) failed: {e}")
-    def get_cinematic_decision(self, global_prompt: str, story_history: str,
-                               past_keyframe_path: str, present_keyframe_path: str, future_keyframe_path: str,
-                               past_scene_desc: str, present_scene_desc: str, future_scene_desc: str) -> dict:
         """
-        Delegated task: Acts as a Film Director to make editing decisions and generate motion prompts.
         """
-        self._check_model()
         try:
-            template = self._read_prompt_template("cinematic_director_prompt.txt")
-            prompt_text = template.format(
-                global_prompt=global_prompt,
-                story_history=story_history,
-                past_scene_desc=past_scene_desc,
-                present_scene_desc=present_scene_desc,
-                future_scene_desc=future_scene_desc
-            )
-            model_contents = [
-                prompt_text,
-                "[PAST_IMAGE]:", Image.open(past_keyframe_path),
-                "[PRESENT_IMAGE]:", Image.open(present_keyframe_path),
-                "[FUTURE_IMAGE]:", Image.open(future_keyframe_path)
-            ]
-            logger.info("Calling Gemini to generate cinematic decision...")
-            response = self.model.generate_content(model_contents)
-            logger.info(f"Gemini responded with (raw cinematic decision): {response.text}")
-            decision_data = robust_json_parser(response.text)
-            if "transition_type" not in decision_data or "motion_prompt" not in decision_data:
-                raise ValueError("AI response (Cinematographer) is malformed. Missing 'transition_type' or 'motion_prompt'.")
-            return decision_data
         except Exception as e:
-            logger.error(f"The Film Director (Gemini) failed: {e}. Using fallback to 'continuous'.")
-            return {
-                "transition_type": "continuous",
-                "motion_prompt": f"A smooth, continuous cinematic transition from '{present_scene_desc}' to '{future_scene_desc}'."
-            }
 # --- Singleton Instance ---
-gemini_manager_singleton = GeminiManager()

 #
 # Copyright (C) August 4, 2025  Carlos Rodrigues dos Santos
 #
+# Version: 1.1.1
 #
+# This file defines the GeminiManager, a specialist responsible for raw communication
+# with the Google Gemini API. It acts as a lean API client, handling requests,
+# parsing responses, and managing API-level errors. It does not contain any
+# high-level prompt engineering or creative logic.
 import os
 import logging
 from PIL import Image
 import google.generativeai as genai
 import re
+from typing import List, Union, Any
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
     """
     clean_text = raw_text.strip()
     try:
         match = re.search(r'```json\s*(\{.*?\})\s*```', clean_text, re.DOTALL)
         if match:
             json_str = match.group(1)
             return json.loads(json_str)
         start_index = clean_text.find('{')
         end_index = clean_text.rfind('}')
         if start_index != -1 and end_index != -1 and end_index > start_index:
 class GeminiManager:
     """
+    Manages raw interactions with the Google Gemini API.
     """
     def __init__(self):
         self.api_key = os.environ.get("GEMINI_API_KEY")
         if self.api_key:
             genai.configure(api_key=self.api_key)
+            self.model = genai.GenerativeModel('gemini-1.5-pro-latest')
+            logger.info("GeminiManager (Communication Layer) initialized successfully.")
         else:
             self.model = None
+            logger.warning("Gemini API key not found. GeminiManager disabled.")
     def _check_model(self):
         """Raises an error if the Gemini API is not configured."""
         if not self.model:
             raise gr.Error("The Google Gemini API key is not configured (GEMINI_API_KEY).")
+    def _generate_content(self, prompt_parts: List[Any]) -> str:
+        """Internal method to make the API call."""
         self._check_model()
+        logger.info("Calling Gemini API...")
+        response = self.model.generate_content(prompt_parts)
+        logger.info(f"Gemini responded with raw text: {response.text}")
+        return response.text
+    def get_raw_text(self, prompt_parts: List[Any]) -> str:
+        """
+        Sends a prompt to the Gemini API and returns the raw text response.
+        Args:
+            prompt_parts (List[Any]): A list containing strings and/or PIL.Image objects.
+        Returns:
+            str: The raw string response from the API.
+        """
         try:
+            return self._generate_content(prompt_parts)
         except Exception as e:
+            logger.error(f"Gemini API call failed: {e}", exc_info=True)
+            raise gr.Error(f"Gemini API communication failed: {e}")
+    def get_json_object(self, prompt_parts: List[Any]) -> dict:
         """
+        Sends a prompt to the Gemini API, expects a JSON response, parses it, and returns a dictionary.
+        Args:
+            prompt_parts (List[Any]): A list containing strings and/or PIL.Image objects.
+        Returns:
+            dict: The parsed JSON object from the API response.
         """
         try:
+            raw_response = self._generate_content(prompt_parts)
+            return robust_json_parser(raw_response)
         except Exception as e:
+            logger.error(f"Gemini API call or JSON parsing failed: {e}", exc_info=True)
+            raise gr.Error(f"Gemini API communication or response parsing failed: {e}")
 # --- Singleton Instance ---
+gemini_manager_singleton = GeminiManager()```