Spaces:

nazdridoy
/

inferoxy-hub

Running

App Files Files Community

nazdridoy commited on Aug 21

Commit

c40f3d0

verified ·

1 Parent(s): 4ddd600

feat(network): add timeouts and retry logic

Browse files

- [feat] Apply `INFERENCE_TIMEOUT` and token inactivity timeout to chat completion (chat_handler.py:L62-76)
- [feat] Implement `IMAGE_GENERATION_TIMEOUT` for image creation (image_handler.py:L70-75)
- [feat] Add retry and timeout logic to `get_proxy_token()` and `report_token_status()` requests (hf_token_utils.py:L29-66, L84-110)
- [feat] Enhance network error handling in chat and image handlers (chat_handler.py:L91-131, image_handler.py:L82-117)
- [update] Update `report_token_status()` to handle "timeout" and "connection_error" types (hf_token_utils.py:L70-71)

Files changed (3) hide show

chat_handler.py +85 -21
hf_token_utils.py +102 -21
image_handler.py +65 -13

chat_handler.py CHANGED Viewed

@@ -4,8 +4,12 @@ Handles chat completion requests with streaming responses.
 """
 import os
 from huggingface_hub import InferenceClient
 from huggingface_hub.errors import HfHubHTTPError
 from hf_token_utils import get_proxy_token, report_token_status
 from utils import (
     validate_proxy_key,
@@ -13,6 +17,9 @@ from utils import (
     format_error_message
 )
 def chat_respond(
     message,
@@ -34,8 +41,9 @@ def chat_respond(
     proxy_api_key = os.getenv("PROXY_KEY")
     try:
-        # Get token from HF-Inferoxy proxy server
         print(f"🔑 Chat: Requesting token from proxy...")
         token, token_id = get_proxy_token(api_key=proxy_api_key)
         print(f"✅ Chat: Got token: {token_id}")
@@ -58,7 +66,7 @@ def chat_respond(
             api_key=token
         )
-        print(f"🚀 Chat: Client created, starting inference...")
         chat_completion_kwargs = {
             "model": model,
@@ -71,33 +79,89 @@ def chat_respond(
         response = ""
-        print(f"📡 Chat: Making streaming request...")
-        stream = client.chat_completion(**chat_completion_kwargs)
-        print(f"🔄 Chat: Got stream, starting to iterate...")
-        for message in stream:
-            choices = message.choices
-            token_content = ""
-            if len(choices) and choices[0].delta.content:
-                token_content = choices[0].delta.content
-            response += token_content
-            yield response
         # Report successful token usage
-        report_token_status(token_id, "success", api_key=proxy_api_key)
     except HfHubHTTPError as e:
-        # Report HF Hub errors
-        if 'token_id' in locals():
-            report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
-        yield format_error_message("HuggingFace API Error", str(e))
     except Exception as e:
-        # Report other errors
-        if 'token_id' in locals():
-            report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
-        yield format_error_message("Unexpected Error", str(e))
 def handle_chat_submit(message, history, system_msg, model_name, max_tokens, temperature, top_p):

 """
 import os
+import time
+import threading
+from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
 from huggingface_hub import InferenceClient
 from huggingface_hub.errors import HfHubHTTPError
+from requests.exceptions import ConnectionError, Timeout, RequestException
 from hf_token_utils import get_proxy_token, report_token_status
 from utils import (
     validate_proxy_key,
     format_error_message
 )
+# Timeout configuration for inference requests
+INFERENCE_TIMEOUT = 120  # 2 minutes max for inference
 def chat_respond(
     message,
     proxy_api_key = os.getenv("PROXY_KEY")
+    token_id = None
     try:
+        # Get token from HF-Inferoxy proxy server with timeout handling
         print(f"🔑 Chat: Requesting token from proxy...")
         token, token_id = get_proxy_token(api_key=proxy_api_key)
         print(f"✅ Chat: Got token: {token_id}")
             api_key=token
         )
+        print(f"🚀 Chat: Client created, starting inference with timeout...")
         chat_completion_kwargs = {
             "model": model,
         response = ""
+        print(f"📡 Chat: Making streaming request with {INFERENCE_TIMEOUT}s timeout...")
+        # Create streaming function for timeout handling
+        def create_stream():
+            return client.chat_completion(**chat_completion_kwargs)
+        # Execute with timeout using ThreadPoolExecutor
+        with ThreadPoolExecutor(max_workers=1) as executor:
+            future = executor.submit(create_stream)
+            try:
+                # Get the stream with timeout
+                stream = future.result(timeout=INFERENCE_TIMEOUT)
+                print(f"🔄 Chat: Got stream, starting to iterate...")
+                # Track streaming time to detect hangs
+                last_token_time = time.time()
+                token_timeout = 30  # 30 seconds between tokens
+                for message in stream:
+                    current_time = time.time()
+                    # Check if we've been waiting too long for a token
+                    if current_time - last_token_time > token_timeout:
+                        raise TimeoutError(f"No response received for {token_timeout} seconds during streaming")
+                    choices = message.choices
+                    token_content = ""
+                    if len(choices) and choices[0].delta.content:
+                        token_content = choices[0].delta.content
+                        last_token_time = current_time  # Reset timer when we get content
+                    response += token_content
+                    yield response
+            except FutureTimeoutError:
+                future.cancel()  # Cancel the running task
+                raise TimeoutError(f"Chat request timed out after {INFERENCE_TIMEOUT} seconds")
         # Report successful token usage
+        if token_id:
+            report_token_status(token_id, "success", api_key=proxy_api_key)
+    except ConnectionError as e:
+        # Handle proxy connection errors
+        error_msg = f"Cannot connect to HF-Inferoxy server: {str(e)}"
+        print(f"🔌 Chat connection error: {error_msg}")
+        if token_id:
+            report_token_status(token_id, "error", error_msg, api_key=proxy_api_key)
+        yield format_error_message("Connection Error", "Unable to connect to the proxy server. Please check if it's running.")
+    except TimeoutError as e:
+        # Handle timeout errors
+        error_msg = f"Request timed out: {str(e)}"
+        print(f"⏰ Chat timeout: {error_msg}")
+        if token_id:
+            report_token_status(token_id, "error", error_msg, api_key=proxy_api_key)
+        yield format_error_message("Timeout Error", "The request took too long. The server may be overloaded. Please try again.")
     except HfHubHTTPError as e:
+        # Handle HuggingFace API errors
+        error_msg = str(e)
+        print(f"🤗 Chat HF error: {error_msg}")
+        if token_id:
+            report_token_status(token_id, "error", error_msg, api_key=proxy_api_key)
+        # Provide more user-friendly error messages
+        if "401" in error_msg:
+            yield format_error_message("Authentication Error", "Invalid or expired API token. The proxy will provide a new token on retry.")
+        elif "402" in error_msg:
+            yield format_error_message("Quota Exceeded", "API quota exceeded. The proxy will try alternative providers.")
+        elif "429" in error_msg:
+            yield format_error_message("Rate Limited", "Too many requests. Please wait a moment and try again.")
+        else:
+            yield format_error_message("HuggingFace API Error", error_msg)
     except Exception as e:
+        # Handle all other errors
+        error_msg = str(e)
+        print(f"❌ Chat unexpected error: {error_msg}")
+        if token_id:
+            report_token_status(token_id, "error", error_msg, api_key=proxy_api_key)
+        yield format_error_message("Unexpected Error", f"An unexpected error occurred: {error_msg}")
 def handle_chat_submit(message, history, system_msg, model_name, max_tokens, temperature, top_p):

hf_token_utils.py CHANGED Viewed

@@ -2,11 +2,18 @@
 import os
 import requests
 import json
 from typing import Dict, Optional, Any, Tuple
 def get_proxy_token(proxy_url: str = "http://scw.nazdev.tech:11155", api_key: str = None) -> Tuple[str, str]:
     """
-    Get a valid token from the proxy server.
     Args:
         proxy_url: URL of the HF-Inferoxy server
@@ -16,24 +23,68 @@ def get_proxy_token(proxy_url: str = "http://scw.nazdev.tech:11155", api_key: st
         Tuple of (token, token_id)
     Raises:
         Exception: If token provisioning fails
     """
     headers = {}
     if api_key:
         headers["Authorization"] = f"Bearer {api_key}"
-    response = requests.get(f"{proxy_url}/keys/provision", headers=headers)
-    if response.status_code != 200:
-        raise Exception(f"Failed to provision token: {response.text}")
-    data = response.json()
-    token = data["token"]
-    token_id = data["token_id"]
-    # For convenience, also set environment variable
-    os.environ["HF_TOKEN"] = token
-    return token, token_id
 def report_token_status(
     token_id: str,
@@ -43,7 +94,7 @@ def report_token_status(
     api_key: str = None
 ) -> bool:
     """
-    Report token usage status back to the proxy server.
     Args:
         token_id: ID of the token to report (from get_proxy_token)
@@ -66,6 +117,10 @@ def report_token_status(
             error_type = "invalid_credentials"
         elif "402 Client Error" in error and "exceeded your monthly included credits" in error:
             error_type = "credits_exceeded"
         if error_type:
             payload["error_type"] = error_type
@@ -73,11 +128,37 @@ def report_token_status(
     headers = {"Content-Type": "application/json"}
     if api_key:
         headers["Authorization"] = f"Bearer {api_key}"
-    try:
-        response = requests.post(f"{proxy_url}/keys/report", json=payload, headers=headers)
-        return response.status_code == 200
-    except Exception as e:
-        # Silently fail to avoid breaking the client application
-        # In production, consider logging this error
-        return False

 import os
 import requests
 import json
+import time
 from typing import Dict, Optional, Any, Tuple
+from requests.exceptions import ConnectionError, Timeout, RequestException
+# Timeout and retry configuration
+REQUEST_TIMEOUT = 30  # 30 seconds timeout
+RETRY_ATTEMPTS = 2
+RETRY_DELAY = 1  # 1 second delay between retries
 def get_proxy_token(proxy_url: str = "http://scw.nazdev.tech:11155", api_key: str = None) -> Tuple[str, str]:
     """
+    Get a valid token from the proxy server with timeout and retry logic.
     Args:
         proxy_url: URL of the HF-Inferoxy server
         Tuple of (token, token_id)
     Raises:
+        ConnectionError: If unable to connect to proxy server
+        TimeoutError: If request times out
         Exception: If token provisioning fails
     """
     headers = {}
     if api_key:
         headers["Authorization"] = f"Bearer {api_key}"
+    print(f"🔗 Connecting to proxy: {proxy_url}")
+    for attempt in range(RETRY_ATTEMPTS):
+        try:
+            print(f"🔄 Token provision attempt {attempt + 1}/{RETRY_ATTEMPTS}")
+            response = requests.get(
+                f"{proxy_url}/keys/provision",
+                headers=headers,
+                timeout=REQUEST_TIMEOUT
+            )
+            if response.status_code == 200:
+                data = response.json()
+                token = data["token"]
+                token_id = data["token_id"]
+                # For convenience, also set environment variable
+                os.environ["HF_TOKEN"] = token
+                print(f"✅ Token provisioned successfully: {token_id}")
+                return token, token_id
+            else:
+                error_msg = f"HTTP {response.status_code}: {response.text}"
+                print(f"❌ Provision failed: {error_msg}")
+                if attempt == RETRY_ATTEMPTS - 1:  # Last attempt
+                    raise Exception(f"Failed to provision token: {error_msg}")
+        except ConnectionError as e:
+            error_msg = f"Connection failed to proxy server: {str(e)}"
+            print(f"🔌 {error_msg}")
+            if attempt == RETRY_ATTEMPTS - 1:  # Last attempt
+                raise ConnectionError(f"Cannot connect to HF-Inferoxy at {proxy_url}. Please check if the server is running.")
+        except Timeout as e:
+            error_msg = f"Request timeout after {REQUEST_TIMEOUT}s: {str(e)}"
+            print(f"⏰ {error_msg}")
+            if attempt == RETRY_ATTEMPTS - 1:  # Last attempt
+                raise TimeoutError(f"Timeout connecting to HF-Inferoxy. Server may be overloaded.")
+        except RequestException as e:
+            error_msg = f"Request error: {str(e)}"
+            print(f"🚫 {error_msg}")
+            if attempt == RETRY_ATTEMPTS - 1:  # Last attempt
+                raise Exception(f"Network error connecting to proxy: {str(e)}")
+        # Wait before retry
+        if attempt < RETRY_ATTEMPTS - 1:
+            print(f"⏱️ Retrying in {RETRY_DELAY}s...")
+            time.sleep(RETRY_DELAY)
 def report_token_status(
     token_id: str,
     api_key: str = None
 ) -> bool:
     """
+    Report token usage status back to the proxy server with timeout handling.
     Args:
         token_id: ID of the token to report (from get_proxy_token)
             error_type = "invalid_credentials"
         elif "402 Client Error" in error and "exceeded your monthly included credits" in error:
             error_type = "credits_exceeded"
+        elif "timeout" in error.lower() or "timed out" in error.lower():
+            error_type = "timeout"
+        elif "connection" in error.lower():
+            error_type = "connection_error"
         if error_type:
             payload["error_type"] = error_type
     headers = {"Content-Type": "application/json"}
     if api_key:
         headers["Authorization"] = f"Bearer {api_key}"
+    print(f"📊 Reporting {status} for token {token_id}")
+    for attempt in range(RETRY_ATTEMPTS):
+        try:
+            response = requests.post(
+                f"{proxy_url}/keys/report",
+                json=payload,
+                headers=headers,
+                timeout=REQUEST_TIMEOUT
+            )
+            if response.status_code == 200:
+                print(f"✅ Status reported successfully")
+                return True
+            else:
+                print(f"⚠️ Report failed: HTTP {response.status_code}")
+        except ConnectionError as e:
+            print(f"🔌 Report connection error: {str(e)}")
+        except Timeout as e:
+            print(f"⏰ Report timeout: {str(e)}")
+        except RequestException as e:
+            print(f"🚫 Report request error: {str(e)}")
+        # Don't retry on last attempt
+        if attempt < RETRY_ATTEMPTS - 1:
+            print(f"⏱️ Retrying report in {RETRY_DELAY}s...")
+            time.sleep(RETRY_DELAY)
+    print(f"❌ Failed to report status after {RETRY_ATTEMPTS} attempts")
+    return False

image_handler.py CHANGED Viewed

@@ -4,8 +4,12 @@ Handles text-to-image generation with multiple providers.
 """
 import os
 from huggingface_hub import InferenceClient
 from huggingface_hub.errors import HfHubHTTPError
 from hf_token_utils import get_proxy_token, report_token_status
 from utils import (
     IMAGE_CONFIG,
@@ -14,6 +18,9 @@ from utils import (
     format_success_message
 )
 def validate_dimensions(width, height):
     """Validate that dimensions are divisible by 8 (required by most diffusion models)"""
@@ -43,8 +50,9 @@ def generate_image(
     proxy_api_key = os.getenv("PROXY_KEY")
     try:
-        # Get token from HF-Inferoxy proxy server
         print(f"🔑 Image: Requesting token from proxy...")
         token, token_id = get_proxy_token(api_key=proxy_api_key)
         print(f"✅ Image: Got token: {token_id}")
@@ -76,29 +84,73 @@ def generate_image(
             generation_params["seed"] = seed
         print(f"📐 Image: Dimensions: {width}x{height}, steps: {num_inference_steps}, guidance: {guidance_scale}")
-        print(f"📡 Image: Making generation request...")
-        # Generate image
-        image = client.text_to_image(**generation_params)
         print(f"🖼️ Image: Generation completed! Image type: {type(image)}")
         # Report successful token usage
-        report_token_status(token_id, "success", api_key=proxy_api_key)
         return image, format_success_message("Image generated", f"using {model_name} on {provider}")
     except HfHubHTTPError as e:
-        # Report HF Hub errors
-        if 'token_id' in locals():
-            report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
-        return None, format_error_message("HuggingFace API Error", str(e))
     except Exception as e:
-        # Report other errors
-        if 'token_id' in locals():
-            report_token_status(token_id, "error", str(e), api_key=proxy_api_key)
-        return None, format_error_message("Unexpected Error", str(e))
 def handle_image_generation(prompt_val, model_val, provider_val, negative_prompt_val, width_val, height_val, steps_val, guidance_val, seed_val):

 """
 import os
+import time
+import threading
+from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
 from huggingface_hub import InferenceClient
 from huggingface_hub.errors import HfHubHTTPError
+from requests.exceptions import ConnectionError, Timeout, RequestException
 from hf_token_utils import get_proxy_token, report_token_status
 from utils import (
     IMAGE_CONFIG,
     format_success_message
 )
+# Timeout configuration for image generation
+IMAGE_GENERATION_TIMEOUT = 300  # 5 minutes max for image generation
 def validate_dimensions(width, height):
     """Validate that dimensions are divisible by 8 (required by most diffusion models)"""
     proxy_api_key = os.getenv("PROXY_KEY")
+    token_id = None
     try:
+        # Get token from HF-Inferoxy proxy server with timeout handling
         print(f"🔑 Image: Requesting token from proxy...")
         token, token_id = get_proxy_token(api_key=proxy_api_key)
         print(f"✅ Image: Got token: {token_id}")
             generation_params["seed"] = seed
         print(f"📐 Image: Dimensions: {width}x{height}, steps: {num_inference_steps}, guidance: {guidance_scale}")
+        print(f"📡 Image: Making generation request with {IMAGE_GENERATION_TIMEOUT}s timeout...")
+        # Create generation function for timeout handling
+        def generate_image_task():
+            return client.text_to_image(**generation_params)
+        # Execute with timeout using ThreadPoolExecutor
+        with ThreadPoolExecutor(max_workers=1) as executor:
+            future = executor.submit(generate_image_task)
+            try:
+                # Generate image with timeout
+                image = future.result(timeout=IMAGE_GENERATION_TIMEOUT)
+            except FutureTimeoutError:
+                future.cancel()  # Cancel the running task
+                raise TimeoutError(f"Image generation timed out after {IMAGE_GENERATION_TIMEOUT} seconds")
         print(f"🖼️ Image: Generation completed! Image type: {type(image)}")
         # Report successful token usage
+        if token_id:
+            report_token_status(token_id, "success", api_key=proxy_api_key)
         return image, format_success_message("Image generated", f"using {model_name} on {provider}")
+    except ConnectionError as e:
+        # Handle proxy connection errors
+        error_msg = f"Cannot connect to HF-Inferoxy server: {str(e)}"
+        print(f"🔌 Image connection error: {error_msg}")
+        if token_id:
+            report_token_status(token_id, "error", error_msg, api_key=proxy_api_key)
+        return None, format_error_message("Connection Error", "Unable to connect to the proxy server. Please check if it's running.")
+    except TimeoutError as e:
+        # Handle timeout errors
+        error_msg = f"Image generation timed out: {str(e)}"
+        print(f"⏰ Image timeout: {error_msg}")
+        if token_id:
+            report_token_status(token_id, "error", error_msg, api_key=proxy_api_key)
+        return None, format_error_message("Timeout Error", f"Image generation took too long (>{IMAGE_GENERATION_TIMEOUT//60} minutes). Try reducing image size or steps.")
     except HfHubHTTPError as e:
+        # Handle HuggingFace API errors
+        error_msg = str(e)
+        print(f"🤗 Image HF error: {error_msg}")
+        if token_id:
+            report_token_status(token_id, "error", error_msg, api_key=proxy_api_key)
+        # Provide more user-friendly error messages
+        if "401" in error_msg:
+            return None, format_error_message("Authentication Error", "Invalid or expired API token. The proxy will provide a new token on retry.")
+        elif "402" in error_msg:
+            return None, format_error_message("Quota Exceeded", "API quota exceeded. The proxy will try alternative providers.")
+        elif "429" in error_msg:
+            return None, format_error_message("Rate Limited", "Too many requests. Please wait a moment and try again.")
+        elif "content policy" in error_msg.lower() or "safety" in error_msg.lower():
+            return None, format_error_message("Content Policy", "Image prompt was rejected by content policy. Please try a different prompt.")
+        else:
+            return None, format_error_message("HuggingFace API Error", error_msg)
     except Exception as e:
+        # Handle all other errors
+        error_msg = str(e)
+        print(f"❌ Image unexpected error: {error_msg}")
+        if token_id:
+            report_token_status(token_id, "error", error_msg, api_key=proxy_api_key)
+        return None, format_error_message("Unexpected Error", f"An unexpected error occurred: {error_msg}")
 def handle_image_generation(prompt_val, model_val, provider_val, negative_prompt_val, width_val, height_val, steps_val, guidance_val, seed_val):