Spaces:

utarn
/

ai_ocr

Sleeping

File size: 17,788 Bytes
import gradio as gr
import requests
import json
import base64
import os
from typing import List, Optional, Tuple, Any
import mimetypes

class OmniAPIClient:
    """Client for interacting with the Omni API"""
    
    def __init__(self, base_url: str = "https://api.modelharbor.com"):
        self.base_url = base_url.rstrip('/')
        self.chat_endpoint = f"{self.base_url}/v1/chat/completions"
        self.models_endpoint = f"{self.base_url}/v1/models"
    
    def encode_file_to_base64(self, file_path: str) -> str:
        """Encode file to base64 string"""
        with open(file_path, "rb") as file:
            return base64.b64encode(file.read()).decode('utf-8')
    
    def get_mime_type(self, file_path: str) -> str:
        """Get MIME type of file"""
        mime_type, _ = mimetypes.guess_type(file_path)
        return mime_type or "application/octet-stream"
    
    def create_file_content(self, file_path: str, file_type: str) -> dict:
        """Create file content object based on API format"""
        file_name = os.path.basename(file_path)
        mime_type = self.get_mime_type(file_path)
        
        # Check if the file is an image
        if mime_type and mime_type.startswith('image/'):
            # Handle images with the new format
            file_data_b64 = self.encode_file_to_base64(file_path)
            return {
                "type": "image_url",
                "image_url": {
                    "url": f"data:{mime_type};base64,{file_data_b64}"
                }
            }
        else:
            # Handle other files with existing logic
            file_data_b64 = self.encode_file_to_base64(file_path)
            return {
                "type": "file",
                "file": {
                    "filename": file_name,
                    "file_data": f"data:{mime_type};base64,{file_data_b64}"
                }
            }
    
    def build_message_content(self, text: str, files: List[str]) -> List[dict]:
        """Build message content with text and files"""
        content_parts = []
        
        # Add text content first
        if text.strip():
            content_parts.append({
                "type": "text",
                "text": text
            })
        
        # Add files in order
        for file_path in files:
            if file_path and os.path.exists(file_path):
                file_content = self.create_file_content(file_path, "file")
                content_parts.append(file_content)
        
        return content_parts
    
    def get_available_models(self, api_key: str = "") -> Tuple[bool, List[str]]:
        """Fetch available models from the API"""
        try:
            # print(f"DEBUG: Fetching models from: {self.models_endpoint}")  # Debug line (commented out)
            headers = {"Content-Type": "application/json"}
            if api_key:
                headers["Authorization"] = f"Bearer {api_key}"
            
            response = requests.get(
                self.models_endpoint,
                headers=headers,
                timeout=10
            )
            
            if response.status_code == 200:
                try:
                    data = response.json()
                    # Handle different response formats
                    if "data" in data and isinstance(data["data"], list):
                        # OpenAI-style format: {"data": [{"id": "model1"}, {"id": "model2"}]}
                        models = [model.get("id", "") for model in data["data"] if model.get("id")]
                    elif "models" in data and isinstance(data["models"], list):
                        # Custom format: {"models": ["model1", "model2"]}
                        models = data["models"]
                    elif isinstance(data, list):
                        # Direct list format: ["model1", "model2"]
                        models = data
                    else:
                        # Fallback: try to extract any string values
                        models = []
                        if isinstance(data, dict):
                            for key, value in data.items():
                                if isinstance(value, list):
                                    models.extend([str(item) for item in value if item])
                    
                    return True, models if models else ["qwen/qwen3-235b-a22b-instruct-2507"]  # fallback model
                except json.JSONDecodeError:
                    return False, ["qwen/qwen3-235b-a22b-instruct-2507"]
            else:
                return False, ["qwen/qwen3-235b-a22b-instruct-2507"]
                
        except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
            return False, ["qwen/qwen3-235b-a22b-instruct-2507"]
        except Exception:
            return False, ["qwen/qwen3-235b-a22b-instruct-2507"]

    def send_chat_completion(self, text: str, files: List[str], api_key: str = "", model: str = "qwen/qwen3-235b-a22b-instruct-2507", max_tokens: int = 16384, stream: bool = False) -> Tuple[bool, Any]:
        """Send chat completion request to the API"""
        try:
            # Build message content
            content_parts = self.build_message_content(text, files)
            
            # If no content parts, return error
            if not content_parts:
                return False, {"error": "No text or valid files provided"}
            
            # Build request payload
            payload = {
                "model": model,
                "messages": [
                    {
                        "role": "user",
                        "content": content_parts
                    }
                ],
                "max_tokens": max_tokens,
                "stream": stream
            }
            
            # Build headers
            headers = {
                "Content-Type": "application/json"
            }
            
            if api_key:
                headers["Authorization"] = f"Bearer {api_key}"
            
            # Send request
            response = requests.post(
                self.chat_endpoint,
                json=payload,
                headers=headers,
                timeout=60
            )
            
            # Check response
            if response.status_code == 200:
                try:
                    response_data = response.json()
                    return True, response_data
                except json.JSONDecodeError:
                    return False, {"error": "Invalid JSON response", "raw_response": response.text}
            else:
                try:
                    error_data = response.json()
                    return False, {"error": f"API Error ({response.status_code})", "details": error_data}
                except json.JSONDecodeError:
                    return False, {"error": f"HTTP {response.status_code}", "raw_response": response.text}
                    
        except requests.exceptions.Timeout:
            return False, {"error": "Request timeout"}
        except requests.exceptions.ConnectionError:
            return False, {"error": "Connection error"}
        except Exception as e:
            return False, {"error": f"Unexpected error: {str(e)}"}


def create_ui():
    """Create the Gradio UI"""
    
    def fetch_models(base_url, api_key):
        """Fetch available models from the API"""
        if not base_url:
            return gr.Dropdown(choices=["qwen/qwen3-235b-a22b-instruct-2507"], value="qwen/qwen3-235b-a22b-instruct-2507")
        
        try:
            client = OmniAPIClient(base_url)
            success, models = client.get_available_models(api_key)
            
            if success and models:
                return gr.Dropdown(choices=models, value=models[0] if models else "qwen/qwen3-235b-a22b-instruct-2507")
            else:
                return gr.Dropdown(choices=["qwen/qwen3-235b-a22b-instruct-2507"], value="qwen/qwen3-235b-a22b-instruct-2507")
        except Exception:
            return gr.Dropdown(choices=["qwen/qwen3-235b-a22b-instruct-2507"], value="qwen/qwen3-235b-a22b-instruct-2507")
    
    def send_request(base_url, api_key, model, max_tokens, text, files):
        """Handle request submission"""
        try:
            # Validate inputs
            if not base_url:
                return "❌ Base URL is required", ""
            
            if not text.strip() and not files:
                return "❌ Please provide either text or upload files", ""
            
            # Create client
            client = OmniAPIClient(base_url)
            
            # Filter out None/empty files - handle various file input states
            valid_files = []
            if files is not None:
                # Handle single file or list of files
                if isinstance(files, list):
                    valid_files = [f.name for f in files if f is not None and hasattr(f, 'name')]
                elif hasattr(files, 'name'):
                    # Single file object
                    valid_files = [files.name]
            
            # Send request
            success, response = client.send_chat_completion(
                text=text,
                files=valid_files,
                api_key=api_key,
                model=model,
                max_tokens=max_tokens
            )
            
            if success:
                # Format successful response
                formatted_response = json.dumps(response, indent=2)
                
                # Extract the assistant's reply if available
                if "choices" in response and len(response["choices"]) > 0:
                    choice = response["choices"][0]
                    if "message" in choice and "content" in choice["message"]:
                        # Check if model contains 'typhoon'
                        if "typhoon" in model.lower():
                            try:
                                # Try to get natural_text first
                                assistant_reply = choice["message"]["content"]["natural_text"]
                            except (KeyError, TypeError):
                                # Fallback to content if natural_text is not available
                                assistant_reply = choice["message"]["content"]
                        else:
                            assistant_reply = choice["message"]["content"]
                        
                        status = f"✅ Request successful\n\n**Assistant Reply:**\n{assistant_reply}"
                    else:
                        status = "✅ Request successful"
                else:
                    status = "✅ Request successful"
                
                return status, formatted_response
            else:
                # Format error response
                error_response = json.dumps(response, indent=2)
                return f"❌ Request failed", error_response
                
        except Exception as e:
            return f"❌ Error: {str(e)}", ""
    
    def clear_form():
        """Clear all form inputs"""
        return "", "", "", None
    
    # Custom CSS for better layout
    css = """
    .gradio-container {
        max-width: 1200px;
    }
    .config-panel {
        background-color: #f8f9fa;
        border-radius: 8px;
        padding: 15px;
        margin-bottom: 20px;
    }
    .input-panel {
        border-right: 1px solid #e0e0e0;
        padding-right: 20px;
    }
    .output-panel {
        padding-left: 20px;
    }
    """
    
    with gr.Blocks(css=css, title="Omni API Chat Interface") as interface:
       gr.Markdown("# 🤖 Omni API Chat Interface")
       gr.Markdown("Interact with the Omni API using text, PDFs, images, and audio files")
       
       # Configuration section
       with gr.Group(elem_classes=["config-panel"]):
           gr.Markdown("## ⚙️ Configuration")
           with gr.Row():
               base_url = gr.Textbox(
                   label="API Base URL",
                   value="https://api.modelharbor.com",
                   placeholder="https://api.modelharbor.com"
               )
               api_key = gr.Textbox(
                   label="API Key (Optional)",
                   type="password",
                   placeholder="Enter your API key if required"
               )
           
           with gr.Row():
               with gr.Column(scale=3):
                   model = gr.Dropdown(
                       label="Model",
                       choices=["qwen/qwen3-235b-a22b-instruct-2507"],
                       value="qwen/qwen3-235b-a22b-instruct-2507",
                       interactive=True
                   )
               with gr.Column(scale=1):
                   refresh_models_btn = gr.Button("🔄", size="sm")
               with gr.Column(scale=2):
                   max_tokens = gr.Number(
                       label="Max Tokens",
                       value=16384,
                       minimum=1,
                       maximum=32000
                   )
       
       # Main interface
       with gr.Row():
           # Input panel (left side)
           with gr.Column(scale=1, elem_classes=["input-panel"]):
               gr.Markdown("## 📝 Input")
               
               text_input = gr.Textbox(
                   label="Your Message",
                   placeholder="Type your message here...",
                   lines=5
               )
               
               file_upload = gr.File(
                   label="Upload Files",
                   file_count="multiple",
                   file_types=[
                       ".pdf", ".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp",
                       ".mp3", ".wav", ".m4a", ".flac", ".ogg"
                   ]
               )
               
               with gr.Row():
                   send_btn = gr.Button("🚀 Send Request", variant="primary", size="lg")
                   clear_btn = gr.Button("🗑️ Clear", variant="secondary")
            
           # Output panel (right side)
           with gr.Column(scale=1, elem_classes=["output-panel"]):
               gr.Markdown("## 📤 Response")
               
               status_output = gr.Textbox(
                   label="Status",
                   placeholder="Response status will appear here...",
                   lines=8,
                   max_lines=15,
                   interactive=False
               )
               
               response_output = gr.Code(
                   label="Raw Response",
                   language="json",
                   interactive=False
               )
        
       # Example section
       with gr.Accordion("📚 Usage Examples", open=False):
           gr.Markdown("""
           ### Example Requests:
           
           **Text Only:**
           - Message: "Hello, how are you?"
           - Files: None
           
           **PDF Analysis:**
           - Message: "Please summarize this document"
           - Files: document.pdf
           
           **Image OCR:**
           - Message: "Extract text from this image"
           - Files: receipt.jpg
           
           **Audio Transcription:**
           - Message: "Transcribe this audio file"
           - Files: meeting.mp3
           
           **Multi-modal:**
           - Message: "Analyze these files and provide insights"
           - Files: report.pdf, chart.png, recording.wav
           
           ### Supported File Types:
           - **PDFs**: .pdf
           - **Images**: .jpg, .jpeg, .png, .gif, .bmp, .webp
           - **Audio**: .mp3, .wav, .m4a, .flac, .ogg
           """)
        
       # Event handlers
       send_btn.click(
           fn=send_request,
           inputs=[base_url, api_key, model, max_tokens, text_input, file_upload],
           outputs=[status_output, response_output]
       )
        
       clear_btn.click(
           fn=clear_form,
           outputs=[text_input, status_output, response_output, file_upload]
       )
        
       # Refresh models when button is clicked
       refresh_models_btn.click(
           fn=fetch_models,
           inputs=[base_url, api_key],
           outputs=[model]
       )
        
        # Auto-refresh models when base URL changes
        base_url.blur(
            fn=fetch_models,
            inputs=[base_url, api_key],
            outputs=[model]
        )
        
        # Auto-refresh models when API key changes
        api_key.blur(
            fn=fetch_models,
            inputs=[base_url, api_key],
            outputs=[model]
        )
        
        # Allow Enter key to submit (when text input is focused)
        text_input.submit(
            fn=send_request,
            inputs=[base_url, api_key, model, max_tokens, text_input, file_upload],
            outputs=[status_output, response_output]
        )
        
        # Preload models when interface loads
        interface.load(
            fn=fetch_models,
            inputs=[base_url, api_key],
            outputs=[model]
        )
    
    return interface


if __name__ == "__main__":
    # Create and launch the interface
    demo = create_ui()
    
    # Launch with custom settings
    demo.launch(
        server_name="127.0.0.1",  # Use localhost instead of 0.0.0.0
        server_port=7890,         # Use different port to avoid conflicts
        share=False,              # Set to True to create public link
        debug=True,              # Disable debug mode to reduce console errors
        show_error=True,          # Show detailed error messages
        inbrowser=True,           # Auto-open in browser
        prevent_thread_lock=False # Ensure proper threading
    )