appoint-ready

Running on CPU Upgrade

App Files Files Community

Vaishnav Muraleedharan commited on Sep 24

Commit

9b39b7c

1 Parent(s): 88c6024

chore: format code to maintain consistent style

Browse files

Files changed (9) hide show

__init__.py +0 -1
app.py +31 -11
auth.py +53 -47
cache.py +19 -11
evaluation.py +21 -21
gemini.py +16 -21
gemini_tts.py +63 -28
interview_simulator.py +134 -110
medgemma.py +26 -18

__init__.py CHANGED Viewed

@@ -11,4 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

app.py CHANGED Viewed

@@ -12,18 +12,36 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from evaluation import evaluate_report, evaluation_prompt
-from flask import Flask, send_from_directory, request, jsonify, Response, stream_with_context, send_file
 from flask_cors import CORS
-import os, time, json, re
 from gemini import gemini_get_text_response
 from interview_simulator import stream_interview
-from cache import create_cache_zip
 from medgemma import medgemma_get_text_response
-app = Flask(__name__, static_folder=os.environ.get("FRONTEND_BUILD", "frontend/build"), static_url_path="/")
 CORS(app, resources={r"/api/*": {"origins": "http://localhost:3000"}})
 @app.route("/")
 def serve():
     """Serves the main index.html file."""
@@ -35,7 +53,7 @@ def stream_conversation():
     """Streams the conversation with the interview simulator."""
     patient = request.args.get("patient", "Patient")
     condition = request.args.get("condition", "unknown condition")
     def generate():
         try:
             for message in stream_interview(patient, condition):
@@ -43,9 +61,10 @@ def stream_conversation():
         except Exception as e:
             yield f"data: Error: {str(e)}\n\n"
             raise e
     return Response(stream_with_context(generate()), mimetype="text/event-stream")
 @app.route("/api/evaluate_report", methods=["POST"])
 def evaluate_report_call():
     """Evaluates the provided medical report."""
@@ -55,10 +74,10 @@ def evaluate_report_call():
         return jsonify({"error": "Report is required"}), 400
     condition = data.get("condition", "")
     if not condition:
-        return jsonify({"error": "Condition is required"}), 400
     evaluation_text = evaluate_report(report, condition)
     return jsonify({"evaluation": evaluation_text})
@@ -81,6 +100,7 @@ def static_proxy(path):
         return send_from_directory(app.static_folder, path)
     else:
         return send_from_directory(app.static_folder, "index.html")
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860, threaded=True)

 # See the License for the specific language governing permissions and
 # limitations under the License.
+import json
+import os
+import re
+import time
+from flask import (
+    Flask,
+    Response,
+    jsonify,
+    request,
+    send_file,
+    send_from_directory,
+    stream_with_context,
+)
 from flask_cors import CORS
+from cache import create_cache_zip
+from evaluation import evaluate_report, evaluation_prompt
 from gemini import gemini_get_text_response
 from interview_simulator import stream_interview
 from medgemma import medgemma_get_text_response
+app = Flask(
+    __name__,
+    static_folder=os.environ.get("FRONTEND_BUILD", "frontend/build"),
+    static_url_path="/",
+)
 CORS(app, resources={r"/api/*": {"origins": "http://localhost:3000"}})
 @app.route("/")
 def serve():
     """Serves the main index.html file."""
     """Streams the conversation with the interview simulator."""
     patient = request.args.get("patient", "Patient")
     condition = request.args.get("condition", "unknown condition")
     def generate():
         try:
             for message in stream_interview(patient, condition):
         except Exception as e:
             yield f"data: Error: {str(e)}\n\n"
             raise e
     return Response(stream_with_context(generate()), mimetype="text/event-stream")
 @app.route("/api/evaluate_report", methods=["POST"])
 def evaluate_report_call():
     """Evaluates the provided medical report."""
         return jsonify({"error": "Report is required"}), 400
     condition = data.get("condition", "")
     if not condition:
+        return jsonify({"error": "Condition is required"}), 400
     evaluation_text = evaluate_report(report, condition)
     return jsonify({"evaluation": evaluation_text})
         return send_from_directory(app.static_folder, path)
     else:
         return send_from_directory(app.static_folder, "index.html")
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860, threaded=True)

auth.py CHANGED Viewed

@@ -12,66 +12,72 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import json
 import datetime
-from google.oauth2 import service_account
 import google.auth.transport.requests
 def create_credentials(secret_key_json) -> service_account.Credentials:
-  """Creates Google Cloud credentials from the provided service account key.
-  Returns:
-      service_account.Credentials: The created credentials object.
-  Raises:
-      ValueError: If the environment variable is not set or is empty, or if the
-          JSON format is invalid.
-  """
-  if not secret_key_json:
-    raise ValueError("Userdata variable 'GCP_MEDGEMMA_SERVICE_ACCOUNT_KEY' is not set or is empty.")
-  try:
-    service_account_info = json.loads(secret_key_json)
-  except (SyntaxError, ValueError) as e:
-    raise ValueError("Invalid service account key JSON format.") from e
-  return service_account.Credentials.from_service_account_info(
-    service_account_info,
-    scopes=['https://www.googleapis.com/auth/cloud-platform']
-  )
-def refresh_credentials(credentials: service_account.Credentials) -> service_account.Credentials:
-  """Refreshes the provided Google Cloud credentials if they are about to expire
-    (within 5 minutes) or if they don't have an expiry time set.
-  Args:
-      credentials: The credentials object to refresh.
-  Returns:
-      service_account.Credentials: The refreshed credentials object.
-  """
-  if credentials.expiry:
-    expiry_time = credentials.expiry.replace(tzinfo=datetime.timezone.utc)
-    # Calculate the time remaining until expiration
-    time_remaining = expiry_time - datetime.datetime.now(datetime.timezone.utc)
-    # Check if the token is about to expire (e.g., within 5 minutes)
-    if time_remaining < datetime.timedelta(minutes=5):
         request = google.auth.transport.requests.Request()
         credentials.refresh(request)
-  else:
-    # If no expiry is set, always attempt to refresh (e.g., for certain credential types)
-    request = google.auth.transport.requests.Request()
-    credentials.refresh(request)
-  return credentials
-def get_access_token_refresh_if_needed(credentials: service_account.Credentials) -> str:
-  """Gets the access token from the credentials, refreshing them if needed.
-  Args:
-      credentials: The credentials object.
-  Returns:
-      str: The access token.
-  """
-  credentials = refresh_credentials(credentials)
-  return credentials.token

 # See the License for the specific language governing permissions and
 # limitations under the License.
 import datetime
+import json
 import google.auth.transport.requests
+from google.oauth2 import service_account
 def create_credentials(secret_key_json) -> service_account.Credentials:
+    """Creates Google Cloud credentials from the provided service account key.
+    Returns:
+        service_account.Credentials: The created credentials object.
+    Raises:
+        ValueError: If the environment variable is not set or is empty, or if the
+            JSON format is invalid.
+    """
+    if not secret_key_json:
+        raise ValueError(
+            "Userdata variable 'GCP_MEDGEMMA_SERVICE_ACCOUNT_KEY' is not set or is empty."
+        )
+    try:
+        service_account_info = json.loads(secret_key_json)
+    except (SyntaxError, ValueError) as e:
+        raise ValueError("Invalid service account key JSON format.") from e
+    return service_account.Credentials.from_service_account_info(
+        service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"]
+    )
+def refresh_credentials(
+    credentials: service_account.Credentials,
+) -> service_account.Credentials:
+    """Refreshes the provided Google Cloud credentials if they are about to expire
+      (within 5 minutes) or if they don't have an expiry time set.
+    Args:
+        credentials: The credentials object to refresh.
+    Returns:
+        service_account.Credentials: The refreshed credentials object.
+    """
+    if credentials.expiry:
+        expiry_time = credentials.expiry.replace(tzinfo=datetime.timezone.utc)
+        # Calculate the time remaining until expiration
+        time_remaining = expiry_time - datetime.datetime.now(datetime.timezone.utc)
+        # Check if the token is about to expire (e.g., within 5 minutes)
+        if time_remaining < datetime.timedelta(minutes=5):
+            request = google.auth.transport.requests.Request()
+            credentials.refresh(request)
+    else:
+        # If no expiry is set, always attempt to refresh (e.g., for certain credential types)
         request = google.auth.transport.requests.Request()
         credentials.refresh(request)
+    return credentials
+def get_access_token_refresh_if_needed(credentials: service_account.Credentials) -> str:
+    """Gets the access token from the credentials, refreshing them if needed.
+    Args:
+        credentials: The credentials object.
+    Returns:
+        str: The access token.
+    """
+    credentials = refresh_credentials(credentials)
+    return credentials.token

cache.py CHANGED Viewed

@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from diskcache import Cache
 import os
 import shutil
 import tempfile
 import zipfile
-import logging
 cache = Cache(os.environ.get("CACHE_DIR", "/cache"))
 # Print cache statistics after loading
@@ -28,16 +29,17 @@ try:
 except Exception as e:
     print(f"Could not retrieve cache statistics: {e}")
 def create_cache_zip():
     temp_dir = tempfile.gettempdir()
-    base_name = os.path.join(temp_dir, "cache_archive") # A more descriptive name
     archive_path = base_name + ".zip"
     cache_directory = os.environ.get("CACHE_DIR", "/cache")
     if not os.path.isdir(cache_directory):
         logging.error(f"Cache directory not found at {cache_directory}")
         return None, f"Cache directory not found on server: {cache_directory}"
     logging.info("Forcing a cache checkpoint for safe backup...")
     try:
         # Open and immediately close a connection.
@@ -45,15 +47,19 @@ def create_cache_zip():
         # into the main .db file, ensuring the on-disk files are consistent.
         with Cache(cache_directory) as temp_cache:
             temp_cache.close()
         # Clean up temporary files before archiving.
-        tmp_path = os.path.join(cache_directory, 'tmp')
         if os.path.isdir(tmp_path):
             logging.info(f"Removing temporary cache directory: {tmp_path}")
             shutil.rmtree(tmp_path)
-        logging.info(f"Checkpoint complete. Creating zip archive of {cache_directory} to {archive_path}")
-        with zipfile.ZipFile(archive_path, 'w', zipfile.ZIP_DEFLATED, compresslevel=9) as zipf:
             for root, _, files in os.walk(cache_directory):
                 for file in files:
                     file_path = os.path.join(root, file)
@@ -61,7 +67,9 @@ def create_cache_zip():
                     zipf.write(file_path, arcname)
         logging.info("Zip archive created successfully.")
         return archive_path, None
     except Exception as e:
-        logging.error(f"Error creating zip archive of cache directory: {e}", exc_info=True)
         return None, f"Error creating zip archive: {e}"

 # See the License for the specific language governing permissions and
 # limitations under the License.
+import logging
 import os
 import shutil
 import tempfile
 import zipfile
+from diskcache import Cache
 cache = Cache(os.environ.get("CACHE_DIR", "/cache"))
 # Print cache statistics after loading
 except Exception as e:
     print(f"Could not retrieve cache statistics: {e}")
 def create_cache_zip():
     temp_dir = tempfile.gettempdir()
+    base_name = os.path.join(temp_dir, "cache_archive")  # A more descriptive name
     archive_path = base_name + ".zip"
     cache_directory = os.environ.get("CACHE_DIR", "/cache")
     if not os.path.isdir(cache_directory):
         logging.error(f"Cache directory not found at {cache_directory}")
         return None, f"Cache directory not found on server: {cache_directory}"
     logging.info("Forcing a cache checkpoint for safe backup...")
     try:
         # Open and immediately close a connection.
         # into the main .db file, ensuring the on-disk files are consistent.
         with Cache(cache_directory) as temp_cache:
             temp_cache.close()
         # Clean up temporary files before archiving.
+        tmp_path = os.path.join(cache_directory, "tmp")
         if os.path.isdir(tmp_path):
             logging.info(f"Removing temporary cache directory: {tmp_path}")
             shutil.rmtree(tmp_path)
+        logging.info(
+            f"Checkpoint complete. Creating zip archive of {cache_directory} to {archive_path}"
+        )
+        with zipfile.ZipFile(
+            archive_path, "w", zipfile.ZIP_DEFLATED, compresslevel=9
+        ) as zipf:
             for root, _, files in os.walk(cache_directory):
                 for file in files:
                     file_path = os.path.join(root, file)
                     zipf.write(file_path, arcname)
         logging.info("Zip archive created successfully.")
         return archive_path, None
     except Exception as e:
+        logging.error(
+            f"Error creating zip archive of cache directory: {e}", exc_info=True
+        )
         return None, f"Error creating zip archive: {e}"

evaluation.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import re
 from medgemma import medgemma_get_text_response
@@ -40,30 +41,29 @@ REPORT TEMPLATE START
 REPORT TEMPLATE END
 """
 def evaluate_report(report, condition):
     """Evaluate the pre-visit report based on the condition using MedGemma LLM."""
-    evaluation_text = medgemma_get_text_response([
-        {
-            "role": "system",
-            "content": [
-                {
-                    "type": "text",
-                    "text": f"{evaluation_prompt(condition)}"
-                }
-            ]
-        },
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "text",
-                    "text": f"Here is the report text:\n{report}"
-                }
-            ]
-        },
-    ])
     # Remove any LLM "thinking" blocks (special tokens sometimes present in output)
-    evaluation_text = re.sub(r'<unused94>.*?<unused95>', '', evaluation_text, flags=re.DOTALL)
     return evaluation_text

 # limitations under the License.
 import re
 from medgemma import medgemma_get_text_response
 REPORT TEMPLATE END
 """
 def evaluate_report(report, condition):
     """Evaluate the pre-visit report based on the condition using MedGemma LLM."""
+    evaluation_text = medgemma_get_text_response(
+        [
+            {
+                "role": "system",
+                "content": [
+                    {"type": "text", "text": f"{evaluation_prompt(condition)}"}
+                ],
+            },
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": f"Here is the report text:\n{report}"}
+                ],
+            },
+        ]
+    )
     # Remove any LLM "thinking" blocks (special tokens sometimes present in output)
+    evaluation_text = re.sub(
+        r"<unused94>.*?<unused95>", "", evaluation_text, flags=re.DOTALL
+    )
     return evaluation_text

gemini.py CHANGED Viewed

@@ -13,47 +13,42 @@
 # limitations under the License.
 import os
 import requests
 from cache import cache  # new import replacing duplicate cache initialization
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 # Decorate the function to cache its results indefinitely.
 @cache.memoize()
-def gemini_get_text_response(prompt: str,
-                                    stop_sequences: list = None,
-                                    temperature: float = 0.1,
-                                    max_output_tokens: int = 4000,
-                                    top_p: float = 0.8,
-                                    top_k: int = 10):
     """
     Makes a text generation request to the Gemini API.
     """
     api_url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key={GEMINI_API_KEY}"
-    headers = {
-        'Content-Type': 'application/json'
-    }
     data = {
-        "contents": [
-            {
-                "parts": [
-                    {
-                        "text": prompt
-                    }
-                ]
-            }
-        ],
         "generationConfig": {
             "stopSequences": stop_sequences or ["Title"],
             "temperature": temperature,
             "maxOutputTokens": max_output_tokens,
             "topP": top_p,
-            "topK": top_k
-        }
     }
     response = requests.post(api_url, headers=headers, json=data)
     response.raise_for_status()  # Raise an exception for bad status codes
-    return response.json()["candidates"][0]["content"]["parts"][0]["text"]

 # limitations under the License.
 import os
 import requests
 from cache import cache  # new import replacing duplicate cache initialization
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 # Decorate the function to cache its results indefinitely.
 @cache.memoize()
+def gemini_get_text_response(
+    prompt: str,
+    stop_sequences: list = None,
+    temperature: float = 0.1,
+    max_output_tokens: int = 4000,
+    top_p: float = 0.8,
+    top_k: int = 10,
+):
     """
     Makes a text generation request to the Gemini API.
     """
     api_url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key={GEMINI_API_KEY}"
+    headers = {"Content-Type": "application/json"}
     data = {
+        "contents": [{"parts": [{"text": prompt}]}],
         "generationConfig": {
             "stopSequences": stop_sequences or ["Title"],
             "temperature": temperature,
             "maxOutputTokens": max_output_tokens,
             "topP": top_p,
+            "topK": top_k,
+        },
     }
     response = requests.post(api_url, headers=headers, json=data)
     response.raise_for_status()  # Raise an exception for bad status codes
+    return response.json()["candidates"][0]["content"]["parts"][0]["text"]

gemini_tts.py CHANGED Viewed

@@ -12,16 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import google.generativeai as genai
 import os
-import struct
 import re
-import logging
-from cache import cache
 # Add these imports for MP3 conversion
 from pydub import AudioSegment
-import io
 # --- Constants ---
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
@@ -30,12 +32,16 @@ TTS_MODEL = "gemini-2.5-flash-preview-tts"
 DEFAULT_RAW_AUDIO_MIME = "audio/L16;rate=24000"
 # --- Configuration ---
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 genai.configure(api_key=GEMINI_API_KEY)
 class TTSGenerationError(Exception):
     """Custom exception for TTS generation failures."""
     pass
@@ -46,7 +52,7 @@ def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]:
     e.g., "audio/L16;rate=24000" -> {"bits_per_sample": 16, "rate": 24000}
     """
     bits_per_sample = 16  # Default
-    rate = 24000          # Default
     parts = mime_type.split(";")
     for param in parts:
@@ -56,15 +62,16 @@ def parse_audio_mime_type(mime_type: str) -> dict[str, int | None]:
                 rate_str = param.split("=", 1)[1]
                 rate = int(rate_str)
             except (ValueError, IndexError):
-                pass # Keep default if parsing fails
-        elif re.match(r"audio/l\d+", param): # Matches audio/L<digits>
-             try:
-                bits_str = param.split("l",1)[1]
                 bits_per_sample = int(bits_str)
-             except (ValueError, IndexError):
-                pass # Keep default
     return {"bits_per_sample": bits_per_sample, "rate": rate}
 def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes:
     """
     Generates a WAV file header for the given raw audio data and parameters.
@@ -82,13 +89,26 @@ def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes:
     header = struct.pack(
         "<4sI4s4sIHHIIHH4sI",
-        b"RIFF", chunk_size, b"WAVE", b"fmt ",
-        16, 1, num_channels, sample_rate, byte_rate, block_align,
-        bits_per_sample, b"data", data_size
     )
     return header + audio_data
 # --- End of helper functions ---
 def _synthesize_gemini_tts_impl(text: str, gemini_voice_name: str) -> tuple[bytes, str]:
     """
     Synthesizes English text using the Gemini API via the google-genai library.
@@ -109,11 +129,9 @@ def _synthesize_gemini_tts_impl(text: str, gemini_voice_name: str) -> tuple[byte
             "response_modalities": ["AUDIO"],
             "speech_config": {
                 "voice_config": {
-                    "prebuilt_voice_config": {
-                        "voice_name": gemini_voice_name
-                    }
                 }
-            }
         }
         response = model.generate_content(
@@ -137,8 +155,11 @@ def _synthesize_gemini_tts_impl(text: str, gemini_voice_name: str) -> tuple[byte
     # --- Audio processing ---
     if final_mime_type:
         final_mime_type_lower = final_mime_type.lower()
-        needs_wav_conversion = any(p in final_mime_type_lower for p in ("audio/l16", "audio/l24", "audio/l8")) or \
-                               not final_mime_type_lower.startswith(("audio/wav", "audio/mpeg", "audio/ogg", "audio/opus"))
         if needs_wav_conversion:
             processed_audio_data = convert_to_wav(audio_data_bytes, final_mime_type)
@@ -147,7 +168,10 @@ def _synthesize_gemini_tts_impl(text: str, gemini_voice_name: str) -> tuple[byte
             processed_audio_data = audio_data_bytes
             processed_audio_mime = final_mime_type
     else:
-        logging.warning("MIME type not determined. Assuming raw audio and attempting WAV conversion (defaulting to %s).", DEFAULT_RAW_AUDIO_MIME)
         processed_audio_data = convert_to_wav(audio_data_bytes, DEFAULT_RAW_AUDIO_MIME)
         processed_audio_mime = "audio/wav"
@@ -155,7 +179,9 @@ def _synthesize_gemini_tts_impl(text: str, gemini_voice_name: str) -> tuple[byte
     if processed_audio_data:
         try:
             # Load audio into AudioSegment
-            audio_segment = AudioSegment.from_file(io.BytesIO(processed_audio_data), format="wav")
             mp3_buffer = io.BytesIO()
             audio_segment.export(mp3_buffer, format="mp3")
             mp3_bytes = mp3_buffer.getvalue()
@@ -169,11 +195,15 @@ def _synthesize_gemini_tts_impl(text: str, gemini_voice_name: str) -> tuple[byte
         logging.error(error_message)
         raise TTSGenerationError(error_message)
 # Always create the memoized function first, so we can access its .key() method
 _memoized_tts_func = cache.memoize()(_synthesize_gemini_tts_impl)
 if GENERATE_SPEECH:
-    def synthesize_gemini_tts_with_error_handling(*args, **kwargs) -> tuple[bytes | None, str | None]:
         """
         A wrapper for the memoized TTS function that catches errors and returns (None, None).
         This makes the audio generation more resilient to individual failures.
@@ -183,7 +213,10 @@ if GENERATE_SPEECH:
             return _memoized_tts_func(*args, **kwargs)
         except TTSGenerationError as e:
             # If generation fails, log the error and return None, None.
-            logging.error("Handled TTS Generation Error: %s. Continuing without audio for this segment.", e)
             return None, None
     synthesize_gemini_tts = synthesize_gemini_tts_with_error_handling
@@ -206,7 +239,9 @@ else:
             return result  # Cache hit
         # Cache miss
-        logging.info("GENERATE_SPEECH is false and no cached result found for key: %s", key)
         return None, None
-    synthesize_gemini_tts = read_only_synthesize_gemini_tts

 # See the License for the specific language governing permissions and
 # limitations under the License.
+import io
+import logging
 import os
 import re
+import struct
+import google.generativeai as genai
 # Add these imports for MP3 conversion
 from pydub import AudioSegment
+from cache import cache
 # --- Constants ---
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 DEFAULT_RAW_AUDIO_MIME = "audio/L16;rate=24000"
 # --- Configuration ---
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
 genai.configure(api_key=GEMINI_API_KEY)
 class TTSGenerationError(Exception):
     """Custom exception for TTS generation failures."""
     pass
     e.g., "audio/L16;rate=24000" -> {"bits_per_sample": 16, "rate": 24000}
     """
     bits_per_sample = 16  # Default
+    rate = 24000  # Default
     parts = mime_type.split(";")
     for param in parts:
                 rate_str = param.split("=", 1)[1]
                 rate = int(rate_str)
             except (ValueError, IndexError):
+                pass  # Keep default if parsing fails
+        elif re.match(r"audio/l\d+", param):  # Matches audio/L<digits>
+            try:
+                bits_str = param.split("l", 1)[1]
                 bits_per_sample = int(bits_str)
+            except (ValueError, IndexError):
+                pass  # Keep default
     return {"bits_per_sample": bits_per_sample, "rate": rate}
 def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes:
     """
     Generates a WAV file header for the given raw audio data and parameters.
     header = struct.pack(
         "<4sI4s4sIHHIIHH4sI",
+        b"RIFF",
+        chunk_size,
+        b"WAVE",
+        b"fmt ",
+        16,
+        1,
+        num_channels,
+        sample_rate,
+        byte_rate,
+        block_align,
+        bits_per_sample,
+        b"data",
+        data_size,
     )
     return header + audio_data
 # --- End of helper functions ---
 def _synthesize_gemini_tts_impl(text: str, gemini_voice_name: str) -> tuple[bytes, str]:
     """
     Synthesizes English text using the Gemini API via the google-genai library.
             "response_modalities": ["AUDIO"],
             "speech_config": {
                 "voice_config": {
+                    "prebuilt_voice_config": {"voice_name": gemini_voice_name}
                 }
+            },
         }
         response = model.generate_content(
     # --- Audio processing ---
     if final_mime_type:
         final_mime_type_lower = final_mime_type.lower()
+        needs_wav_conversion = any(
+            p in final_mime_type_lower for p in ("audio/l16", "audio/l24", "audio/l8")
+        ) or not final_mime_type_lower.startswith(
+            ("audio/wav", "audio/mpeg", "audio/ogg", "audio/opus")
+        )
         if needs_wav_conversion:
             processed_audio_data = convert_to_wav(audio_data_bytes, final_mime_type)
             processed_audio_data = audio_data_bytes
             processed_audio_mime = final_mime_type
     else:
+        logging.warning(
+            "MIME type not determined. Assuming raw audio and attempting WAV conversion (defaulting to %s).",
+            DEFAULT_RAW_AUDIO_MIME,
+        )
         processed_audio_data = convert_to_wav(audio_data_bytes, DEFAULT_RAW_AUDIO_MIME)
         processed_audio_mime = "audio/wav"
     if processed_audio_data:
         try:
             # Load audio into AudioSegment
+            audio_segment = AudioSegment.from_file(
+                io.BytesIO(processed_audio_data), format="wav"
+            )
             mp3_buffer = io.BytesIO()
             audio_segment.export(mp3_buffer, format="mp3")
             mp3_bytes = mp3_buffer.getvalue()
         logging.error(error_message)
         raise TTSGenerationError(error_message)
 # Always create the memoized function first, so we can access its .key() method
 _memoized_tts_func = cache.memoize()(_synthesize_gemini_tts_impl)
 if GENERATE_SPEECH:
+    def synthesize_gemini_tts_with_error_handling(
+        *args, **kwargs
+    ) -> tuple[bytes | None, str | None]:
         """
         A wrapper for the memoized TTS function that catches errors and returns (None, None).
         This makes the audio generation more resilient to individual failures.
             return _memoized_tts_func(*args, **kwargs)
         except TTSGenerationError as e:
             # If generation fails, log the error and return None, None.
+            logging.error(
+                "Handled TTS Generation Error: %s. Continuing without audio for this segment.",
+                e,
+            )
             return None, None
     synthesize_gemini_tts = synthesize_gemini_tts_with_error_handling
             return result  # Cache hit
         # Cache miss
+        logging.info(
+            "GENERATE_SPEECH is false and no cached result found for key: %s", key
+        )
         return None, None
+    synthesize_gemini_tts = read_only_synthesize_gemini_tts

interview_simulator.py CHANGED Viewed

@@ -12,70 +12,89 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import json
-import re
 import os
-import base64
 from gemini import gemini_get_text_response
-from medgemma import medgemma_get_text_response
 from gemini_tts import synthesize_gemini_tts
 INTERVIEWER_VOICE = "Aoede"
 def read_symptoms_json():
     # Load the list of symptoms for each condition from a JSON file
-    with open("symptoms.json", 'r') as f:
         return json.load(f)
 def read_patient_and_conditions_json():
     # Load all patient and condition data from the frontend assets
-    with open(os.path.join(os.environ.get("FRONTEND_BUILD", "frontend/build"), "assets", "patients_and_conditions.json"), 'r') as f:
         return json.load(f)
 def get_patient(patient_name):
     """Helper function to locate a patient record by name. Raises StopIteration if not found."""
     return next(p for p in PATIENTS if p["name"] == patient_name)
 def read_fhir_json(patient):
     # Load the FHIR (EHR) JSON file for a given patient
-    with open(os.path.join(os.environ.get("FRONTEND_BUILD", "frontend/build"), patient["fhirFile"].lstrip("/")), 'r') as f:
         return json.load(f)
 def get_ehr_summary_per_patient(patient_name):
     # Returns a concise EHR summary for the patient, using LLM if not already cached
     patient = get_patient(patient_name)
     if patient.get("ehr_summary"):
         return patient["ehr_summary"]
     # Use MedGemma to summarize the EHR for the patient
-    ehr_summary = medgemma_get_text_response([
-        {
-            "role": "system",
-            "content": [
-                {
-                    "type": "text",
-                    "text": f"""You are a medical assistant summarizing the EHR (FHIR) records for the patient {patient_name}.
                     Provide a concise summary of the patient's medical history, including any existing conditions, medications, and relevant past treatments.
-                    Do not include personal opinions or assumptions, only factual information."""
-                }
-            ]
-        },
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "text",
-                    "text": json.dumps(read_fhir_json(patient))
-                }
-            ]
-        }
-    ])
     patient["ehr_summary"] = ehr_summary
     return ehr_summary
 PATIENTS = read_patient_and_conditions_json()["patients"]
 SYMPTOMS = read_symptoms_json()
 def patient_roleplay_instructions(patient_name, condition_name, previous_answers):
     """
     Generates structured instructions for the LLM to roleplay as a patient, including persona, scenario, and symptom logic.
@@ -120,6 +139,7 @@ def patient_roleplay_instructions(patient_name, condition_name, previous_answers
         ---
     """
 def interviewer_roleplay_instructions(patient_name):
     # Returns detailed instructions for the LLM to roleplay as the interviewer/clinical assistant
     return f"""
@@ -153,6 +173,7 @@ def interviewer_roleplay_instructions(patient_name):
         3.  **End Interview:** You MUST continue the interview until you have asked 20 questions OR the patient is unable to provide more information. When the interview is complete, you MUST conclude by printing this exact phrase: "Thank you for answering my questions. I have everything needed to prepare a report for your visit. End interview."
     """
 def report_writer_instructions(patient_name: str) -> str:
     """
     Generates the system prompt with clear instructions, role, and constraints for the LLM.
@@ -202,7 +223,10 @@ The final output MUST be ONLY the full, updated Markdown medical report.
 DO NOT include any introductory phrases, explanations, or any text other than the report itself.
 </output_format>"""
-def write_report(patient_name: str, interview_text: str, existing_report: str = None) -> str:
     """
     Constructs the full prompt, sends it to the LLM, and processes the response.
     This function handles both the initial creation and subsequent updates of a report.
@@ -212,7 +236,7 @@ def write_report(patient_name: str, interview_text: str, existing_report: str =
     # If no existing report is provided, load a default template from a string.
     if not existing_report:
-        with open("report_template.txt", 'r') as f:
             existing_report = f.read()
     # Construct the user prompt with the specific task and data
@@ -237,149 +261,149 @@ Now, generate the complete and updated medical report based on all system and us
     # Assemble the full message payload for the LLM API
     messages = [
-        {
-            "role": "system",
-            "content": [{"type": "text", "text": instructions}]
-        },
-        {
-            "role": "user",
-            "content": [{"type": "text", "text": user_prompt}]
-        }
     ]
     report = medgemma_get_text_response(messages)
-    cleaned_report = re.sub(r'<unused94>.*?</unused95>', '', report, flags=re.DOTALL)
     cleaned_report = cleaned_report.strip()
     # The LLM sometimes wraps the markdown report in a markdown code block.
     # This regex checks if the entire string is a code block and extracts the content.
-    match = re.match(r'^\s*```(?:markdown)?\s*(.*?)\s*```\s*$', cleaned_report, re.DOTALL | re.IGNORECASE)
     if match:
         cleaned_report = match.group(1)
     return cleaned_report.strip()
 def stream_interview(patient_name, condition_name):
-    print(f"Starting interview simulation for patient: {patient_name}, condition: {condition_name}")
     # Prepare roleplay instructions and initial dialog (using existing helper functions)
     interviewer_instructions = interviewer_roleplay_instructions(patient_name)
     # Determine voices for TTS
     patient = get_patient(patient_name)
     patient_voice = patient["voice"]
     dialog = [
         {
             "role": "system",
-            "content": [
-                {
-                    "type": "text",
-                    "text": interviewer_instructions
-                }
-            ]
         },
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "text",
-                    "text": "start interview"
-                }
-            ]
-        }
     ]
     write_report_text = ""
     full_interview_q_a = ""
     number_of_questions_limit = 30
     for i in range(number_of_questions_limit):
         # Get the next interviewer question from MedGemma
         interviewer_question_text = medgemma_get_text_response(
-            messages=dialog,
-            temperature=0.1,
-            max_tokens=2048,
-            stream=False
         )
         # Process optional "thinking" text (if present in the LLM output)
-        thinking_search = re.search('<unused94>(.+?)<unused95>', interviewer_question_text, re.DOTALL)
         if thinking_search:
             thinking_text = thinking_search.group(1)
-            interviewer_question_text = interviewer_question_text.replace(f'<unused94>{thinking_text}<unused95>', "")
             if i == 0:
                 # Only yield the "thinking" summary for the first question
                 thinking_text = gemini_get_text_response(
                     f"""Provide a summary of up to 100 words containing only the reasoning and planning from this text,
-                    do not include instructions, use first person: {thinking_text}""")
-                yield json.dumps({
-                        "speaker": "interviewer thinking",
-                    "text": thinking_text
-                })
         # Clean up the text for TTS and display
-        clean_interviewer_text = interviewer_question_text.replace("End interview.", "").strip()
         # Generate audio for the interviewer's question using Gemini TTS
-        audio_data, mime_type = synthesize_gemini_tts(f"Speak in a slightly upbeat and brisk manner, as a friendly clinician: {clean_interviewer_text}", INTERVIEWER_VOICE)
         audio_b64 = None
         if audio_data and mime_type:
             audio_b64 = f"data:{mime_type};base64,{base64.b64encode(audio_data).decode('utf-8')}"
         # Yield interviewer message (text and audio)
-        yield json.dumps({
-            "speaker": "interviewer",
-            "text": clean_interviewer_text,
-            "audio": audio_b64
-        })
-        dialog.append({
-            "role": "assistant",
-            "content": [{
-                "type": "text",
-                "text": interviewer_question_text
-            }]
-        })
         if "End interview" in interviewer_question_text:
             # End the interview loop if the LLM signals completion
             break
         # Get the patient's response from Gemini (roleplay LLM)
-        patient_response_text = gemini_get_text_response(f"""
         {patient_roleplay_instructions(patient_name, condition_name, full_interview_q_a)}\n\n
-        Question: {interviewer_question_text}""")
         # Generate audio for the patient's response
-        audio_data, mime_type = synthesize_gemini_tts(f"Say this in faster speed, using a sick tone: {patient_response_text}", patient_voice)
         audio_b64 = None
         if audio_data and mime_type:
             audio_b64 = f"data:{mime_type};base64,{base64.b64encode(audio_data).decode('utf-8')}"
         # Yield patient message (text and audio)
-        yield json.dumps({
-            "speaker": "patient",
-            "text": patient_response_text,
-            "audio": audio_b64
-        })
-        dialog.append({
-            "role": "user",
-            "content": [{
-                "type": "text",
-                "text": patient_response_text
-            }]
-        })
         # Track the full Q&A for context in future LLM calls
-        most_recent_q_a = f"Q: {interviewer_question_text}\nA: {patient_response_text}\n"
-        full_interview_q_a_with_new_q_a = "PREVIOUS Q&A:\n" + full_interview_q_a + "\nNEW Q&A:\n" + most_recent_q_a
         # Update the report after each Q&A
-        write_report_text = write_report(patient_name, full_interview_q_a_with_new_q_a, write_report_text)
         full_interview_q_a += most_recent_q_a
-        yield json.dumps({
-            "speaker": "report",
-            "text": write_report_text
-        })
-    print(f"""Interview simulation completed for patient: {patient_name}, condition: {condition_name}.
           Patient profile used:
-          {patient_roleplay_instructions(patient_name, condition_name, full_interview_q_a)}""")
     # Add this at the end to signal end of stream
-    yield json.dumps({"event": "end"})

 # See the License for the specific language governing permissions and
 # limitations under the License.
+import base64
 import json
 import os
+import re
 from gemini import gemini_get_text_response
 from gemini_tts import synthesize_gemini_tts
+from medgemma import medgemma_get_text_response
 INTERVIEWER_VOICE = "Aoede"
 def read_symptoms_json():
     # Load the list of symptoms for each condition from a JSON file
+    with open("symptoms.json", "r") as f:
         return json.load(f)
 def read_patient_and_conditions_json():
     # Load all patient and condition data from the frontend assets
+    with open(
+        os.path.join(
+            os.environ.get("FRONTEND_BUILD", "frontend/build"),
+            "assets",
+            "patients_and_conditions.json",
+        ),
+        "r",
+    ) as f:
         return json.load(f)
 def get_patient(patient_name):
     """Helper function to locate a patient record by name. Raises StopIteration if not found."""
     return next(p for p in PATIENTS if p["name"] == patient_name)
 def read_fhir_json(patient):
     # Load the FHIR (EHR) JSON file for a given patient
+    with open(
+        os.path.join(
+            os.environ.get("FRONTEND_BUILD", "frontend/build"),
+            patient["fhirFile"].lstrip("/"),
+        ),
+        "r",
+    ) as f:
         return json.load(f)
 def get_ehr_summary_per_patient(patient_name):
     # Returns a concise EHR summary for the patient, using LLM if not already cached
     patient = get_patient(patient_name)
     if patient.get("ehr_summary"):
         return patient["ehr_summary"]
     # Use MedGemma to summarize the EHR for the patient
+    ehr_summary = medgemma_get_text_response(
+        [
+            {
+                "role": "system",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": f"""You are a medical assistant summarizing the EHR (FHIR) records for the patient {patient_name}.
                     Provide a concise summary of the patient's medical history, including any existing conditions, medications, and relevant past treatments.
+                    Do not include personal opinions or assumptions, only factual information.""",
+                    }
+                ],
+            },
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": json.dumps(read_fhir_json(patient))}
+                ],
+            },
+        ]
+    )
     patient["ehr_summary"] = ehr_summary
     return ehr_summary
 PATIENTS = read_patient_and_conditions_json()["patients"]
 SYMPTOMS = read_symptoms_json()
 def patient_roleplay_instructions(patient_name, condition_name, previous_answers):
     """
     Generates structured instructions for the LLM to roleplay as a patient, including persona, scenario, and symptom logic.
         ---
     """
 def interviewer_roleplay_instructions(patient_name):
     # Returns detailed instructions for the LLM to roleplay as the interviewer/clinical assistant
     return f"""
         3.  **End Interview:** You MUST continue the interview until you have asked 20 questions OR the patient is unable to provide more information. When the interview is complete, you MUST conclude by printing this exact phrase: "Thank you for answering my questions. I have everything needed to prepare a report for your visit. End interview."
     """
 def report_writer_instructions(patient_name: str) -> str:
     """
     Generates the system prompt with clear instructions, role, and constraints for the LLM.
 DO NOT include any introductory phrases, explanations, or any text other than the report itself.
 </output_format>"""
+def write_report(
+    patient_name: str, interview_text: str, existing_report: str = None
+) -> str:
     """
     Constructs the full prompt, sends it to the LLM, and processes the response.
     This function handles both the initial creation and subsequent updates of a report.
     # If no existing report is provided, load a default template from a string.
     if not existing_report:
+        with open("report_template.txt", "r") as f:
             existing_report = f.read()
     # Construct the user prompt with the specific task and data
     # Assemble the full message payload for the LLM API
     messages = [
+        {"role": "system", "content": [{"type": "text", "text": instructions}]},
+        {"role": "user", "content": [{"type": "text", "text": user_prompt}]},
     ]
     report = medgemma_get_text_response(messages)
+    cleaned_report = re.sub(r"<unused94>.*?</unused95>", "", report, flags=re.DOTALL)
     cleaned_report = cleaned_report.strip()
     # The LLM sometimes wraps the markdown report in a markdown code block.
     # This regex checks if the entire string is a code block and extracts the content.
+    match = re.match(
+        r"^\s*```(?:markdown)?\s*(.*?)\s*```\s*$",
+        cleaned_report,
+        re.DOTALL | re.IGNORECASE,
+    )
     if match:
         cleaned_report = match.group(1)
     return cleaned_report.strip()
 def stream_interview(patient_name, condition_name):
+    print(
+        f"Starting interview simulation for patient: {patient_name}, condition: {condition_name}"
+    )
     # Prepare roleplay instructions and initial dialog (using existing helper functions)
     interviewer_instructions = interviewer_roleplay_instructions(patient_name)
     # Determine voices for TTS
     patient = get_patient(patient_name)
     patient_voice = patient["voice"]
     dialog = [
         {
             "role": "system",
+            "content": [{"type": "text", "text": interviewer_instructions}],
         },
+        {"role": "user", "content": [{"type": "text", "text": "start interview"}]},
     ]
     write_report_text = ""
     full_interview_q_a = ""
     number_of_questions_limit = 30
     for i in range(number_of_questions_limit):
         # Get the next interviewer question from MedGemma
         interviewer_question_text = medgemma_get_text_response(
+            messages=dialog, temperature=0.1, max_tokens=2048, stream=False
         )
         # Process optional "thinking" text (if present in the LLM output)
+        thinking_search = re.search(
+            "<unused94>(.+?)<unused95>", interviewer_question_text, re.DOTALL
+        )
         if thinking_search:
             thinking_text = thinking_search.group(1)
+            interviewer_question_text = interviewer_question_text.replace(
+                f"<unused94>{thinking_text}<unused95>", ""
+            )
             if i == 0:
                 # Only yield the "thinking" summary for the first question
                 thinking_text = gemini_get_text_response(
                     f"""Provide a summary of up to 100 words containing only the reasoning and planning from this text,
+                    do not include instructions, use first person: {thinking_text}"""
+                )
+                yield json.dumps(
+                    {"speaker": "interviewer thinking", "text": thinking_text}
+                )
         # Clean up the text for TTS and display
+        clean_interviewer_text = interviewer_question_text.replace(
+            "End interview.", ""
+        ).strip()
         # Generate audio for the interviewer's question using Gemini TTS
+        audio_data, mime_type = synthesize_gemini_tts(
+            f"Speak in a slightly upbeat and brisk manner, as a friendly clinician: {clean_interviewer_text}",
+            INTERVIEWER_VOICE,
+        )
         audio_b64 = None
         if audio_data and mime_type:
             audio_b64 = f"data:{mime_type};base64,{base64.b64encode(audio_data).decode('utf-8')}"
         # Yield interviewer message (text and audio)
+        yield json.dumps(
+            {
+                "speaker": "interviewer",
+                "text": clean_interviewer_text,
+                "audio": audio_b64,
+            }
+        )
+        dialog.append(
+            {
+                "role": "assistant",
+                "content": [{"type": "text", "text": interviewer_question_text}],
+            }
+        )
         if "End interview" in interviewer_question_text:
             # End the interview loop if the LLM signals completion
             break
         # Get the patient's response from Gemini (roleplay LLM)
+        patient_response_text = gemini_get_text_response(
+            f"""
         {patient_roleplay_instructions(patient_name, condition_name, full_interview_q_a)}\n\n
+        Question: {interviewer_question_text}"""
+        )
         # Generate audio for the patient's response
+        audio_data, mime_type = synthesize_gemini_tts(
+            f"Say this in faster speed, using a sick tone: {patient_response_text}",
+            patient_voice,
+        )
         audio_b64 = None
         if audio_data and mime_type:
             audio_b64 = f"data:{mime_type};base64,{base64.b64encode(audio_data).decode('utf-8')}"
         # Yield patient message (text and audio)
+        yield json.dumps(
+            {"speaker": "patient", "text": patient_response_text, "audio": audio_b64}
+        )
+        dialog.append(
+            {
+                "role": "user",
+                "content": [{"type": "text", "text": patient_response_text}],
+            }
+        )
         # Track the full Q&A for context in future LLM calls
+        most_recent_q_a = (
+            f"Q: {interviewer_question_text}\nA: {patient_response_text}\n"
+        )
+        full_interview_q_a_with_new_q_a = (
+            "PREVIOUS Q&A:\n" + full_interview_q_a + "\nNEW Q&A:\n" + most_recent_q_a
+        )
         # Update the report after each Q&A
+        write_report_text = write_report(
+            patient_name, full_interview_q_a_with_new_q_a, write_report_text
+        )
         full_interview_q_a += most_recent_q_a
+        yield json.dumps({"speaker": "report", "text": write_report_text})
+    print(
+        f"""Interview simulation completed for patient: {patient_name}, condition: {condition_name}.
           Patient profile used:
+          {patient_roleplay_instructions(patient_name, condition_name, full_interview_q_a)}"""
+    )
     # Add this at the end to signal end of stream
+    yield json.dumps({"event": "end"})

medgemma.py CHANGED Viewed

@@ -12,18 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # MedGemma endpoint
 import requests
 from auth import create_credentials, get_access_token_refresh_if_needed
-import os
 from cache import cache
-_endpoint_url = os.environ.get('GCP_MEDGEMMA_ENDPOINT')
 # Create credentials
-secret_key_json = os.environ.get('GCP_MEDGEMMA_SERVICE_ACCOUNT_KEY')
 medgemma_credentials = create_credentials(secret_key_json)
 # https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.endpoints.chat/completions
 @cache.memoize()
 def medgemma_get_text_response(
@@ -36,7 +39,7 @@ def medgemma_get_text_response(
     stop: list[str] | str | None = None,
     frequency_penalty: float | None = None,
     presence_penalty: float | None = None,
-    model: str="tgi"
 ):
     """
     Makes a chat completion request to the configured LLM API (OpenAI-compatible).
@@ -47,26 +50,31 @@ def medgemma_get_text_response(
     }
     # Based on the openai format
-    payload = {
-                "messages": messages,
-                "max_tokens": max_tokens
-              }
-    if temperature is not None: payload["temperature"] = temperature
-    if top_p is not None: payload["top_p"] = top_p
-    if seed is not None: payload["seed"] = seed
-    if stop is not None: payload["stop"] = stop
-    if frequency_penalty is not None: payload["frequency_penalty"] = frequency_penalty
-    if presence_penalty is not None: payload["presence_penalty"] = presence_penalty
-    response = requests.post(_endpoint_url, headers=headers, json=payload, stream=stream, timeout=60)
     try:
         response.raise_for_status()
         return response.json()["choices"][0]["message"]["content"]
     except requests.exceptions.JSONDecodeError:
         # Log the problematic response for easier debugging in the future.
-        print(f"Error: Failed to decode JSON from MedGemma. Status: {response.status_code}, Response: {response.text}")
         # Re-raise the exception so the caller knows something went wrong.
         raise

 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 # MedGemma endpoint
 import requests
 from auth import create_credentials, get_access_token_refresh_if_needed
 from cache import cache
+_endpoint_url = os.environ.get("GCP_MEDGEMMA_ENDPOINT")
 # Create credentials
+secret_key_json = os.environ.get("GCP_MEDGEMMA_SERVICE_ACCOUNT_KEY")
 medgemma_credentials = create_credentials(secret_key_json)
 # https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.endpoints.chat/completions
 @cache.memoize()
 def medgemma_get_text_response(
     stop: list[str] | str | None = None,
     frequency_penalty: float | None = None,
     presence_penalty: float | None = None,
+    model: str = "tgi",
 ):
     """
     Makes a chat completion request to the configured LLM API (OpenAI-compatible).
     }
     # Based on the openai format
+    payload = {"messages": messages, "max_tokens": max_tokens}
+    if temperature is not None:
+        payload["temperature"] = temperature
+    if top_p is not None:
+        payload["top_p"] = top_p
+    if seed is not None:
+        payload["seed"] = seed
+    if stop is not None:
+        payload["stop"] = stop
+    if frequency_penalty is not None:
+        payload["frequency_penalty"] = frequency_penalty
+    if presence_penalty is not None:
+        payload["presence_penalty"] = presence_penalty
+    response = requests.post(
+        _endpoint_url, headers=headers, json=payload, stream=stream, timeout=60
+    )
     try:
         response.raise_for_status()
         return response.json()["choices"][0]["message"]["content"]
     except requests.exceptions.JSONDecodeError:
         # Log the problematic response for easier debugging in the future.
+        print(
+            f"Error: Failed to decode JSON from MedGemma. Status: {response.status_code}, Response: {response.text}"
+        )
         # Re-raise the exception so the caller knows something went wrong.
         raise