Spaces:

utarn
/

ai_ocr

Sleeping

App Files Files Community

ai_ocr / app.py

utarn

init

0168600 about 2 months ago

raw

history blame

17.8 kB

	import gradio as gr
	import requests
	import json
	import base64
	import os
	from typing import List, Optional, Tuple, Any
	import mimetypes

	class OmniAPIClient:
	"""Client for interacting with the Omni API"""

	def __init__(self, base_url: str = "https://api.modelharbor.com"):
	self.base_url = base_url.rstrip('/')
	self.chat_endpoint = f"{self.base_url}/v1/chat/completions"
	self.models_endpoint = f"{self.base_url}/v1/models"

	def encode_file_to_base64(self, file_path: str) -> str:
	"""Encode file to base64 string"""
	with open(file_path, "rb") as file:
	return base64.b64encode(file.read()).decode('utf-8')

	def get_mime_type(self, file_path: str) -> str:
	"""Get MIME type of file"""
	mime_type, _ = mimetypes.guess_type(file_path)
	return mime_type or "application/octet-stream"

	def create_file_content(self, file_path: str, file_type: str) -> dict:
	"""Create file content object based on API format"""
	file_name = os.path.basename(file_path)
	mime_type = self.get_mime_type(file_path)

	# Check if the file is an image
	if mime_type and mime_type.startswith('image/'):
	# Handle images with the new format
	file_data_b64 = self.encode_file_to_base64(file_path)
	return {
	"type": "image_url",
	"image_url": {
	"url": f"data:{mime_type};base64,{file_data_b64}"
	}
	}
	else:
	# Handle other files with existing logic
	file_data_b64 = self.encode_file_to_base64(file_path)
	return {
	"type": "file",
	"file": {
	"filename": file_name,
	"file_data": f"data:{mime_type};base64,{file_data_b64}"
	}
	}

	def build_message_content(self, text: str, files: List[str]) -> List[dict]:
	"""Build message content with text and files"""
	content_parts = []

	# Add text content first
	if text.strip():
	content_parts.append({
	"type": "text",
	"text": text
	})

	# Add files in order
	for file_path in files:
	if file_path and os.path.exists(file_path):
	file_content = self.create_file_content(file_path, "file")
	content_parts.append(file_content)

	return content_parts

	def get_available_models(self, api_key: str = "") -> Tuple[bool, List[str]]:
	"""Fetch available models from the API"""
	try:
	# print(f"DEBUG: Fetching models from: {self.models_endpoint}") # Debug line (commented out)
	headers = {"Content-Type": "application/json"}
	if api_key:
	headers["Authorization"] = f"Bearer {api_key}"

	response = requests.get(
	self.models_endpoint,
	headers=headers,
	timeout=10
	)

	if response.status_code == 200:
	try:
	data = response.json()
	# Handle different response formats
	if "data" in data and isinstance(data["data"], list):
	# OpenAI-style format: {"data": [{"id": "model1"}, {"id": "model2"}]}
	models = [model.get("id", "") for model in data["data"] if model.get("id")]
	elif "models" in data and isinstance(data["models"], list):
	# Custom format: {"models": ["model1", "model2"]}
	models = data["models"]
	elif isinstance(data, list):
	# Direct list format: ["model1", "model2"]
	models = data
	else:
	# Fallback: try to extract any string values
	models = []
	if isinstance(data, dict):
	for key, value in data.items():
	if isinstance(value, list):
	models.extend([str(item) for item in value if item])

	return True, models if models else ["qwen/qwen3-235b-a22b-instruct-2507"] # fallback model
	except json.JSONDecodeError:
	return False, ["qwen/qwen3-235b-a22b-instruct-2507"]
	else:
	return False, ["qwen/qwen3-235b-a22b-instruct-2507"]

	except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
	return False, ["qwen/qwen3-235b-a22b-instruct-2507"]
	except Exception:
	return False, ["qwen/qwen3-235b-a22b-instruct-2507"]

	def send_chat_completion(self, text: str, files: List[str], api_key: str = "", model: str = "qwen/qwen3-235b-a22b-instruct-2507", max_tokens: int = 16384, stream: bool = False) -> Tuple[bool, Any]:
	"""Send chat completion request to the API"""
	try:
	# Build message content
	content_parts = self.build_message_content(text, files)

	# If no content parts, return error
	if not content_parts:
	return False, {"error": "No text or valid files provided"}

	# Build request payload
	payload = {
	"model": model,
	"messages": [
	{
	"role": "user",
	"content": content_parts
	}
	],
	"max_tokens": max_tokens,
	"stream": stream
	}

	# Build headers
	headers = {
	"Content-Type": "application/json"
	}

	if api_key:
	headers["Authorization"] = f"Bearer {api_key}"

	# Send request
	response = requests.post(
	self.chat_endpoint,
	json=payload,
	headers=headers,
	timeout=60
	)

	# Check response
	if response.status_code == 200:
	try:
	response_data = response.json()
	return True, response_data
	except json.JSONDecodeError:
	return False, {"error": "Invalid JSON response", "raw_response": response.text}
	else:
	try:
	error_data = response.json()
	return False, {"error": f"API Error ({response.status_code})", "details": error_data}
	except json.JSONDecodeError:
	return False, {"error": f"HTTP {response.status_code}", "raw_response": response.text}

	except requests.exceptions.Timeout:
	return False, {"error": "Request timeout"}
	except requests.exceptions.ConnectionError:
	return False, {"error": "Connection error"}
	except Exception as e:
	return False, {"error": f"Unexpected error: {str(e)}"}


	def create_ui():
	"""Create the Gradio UI"""

	def fetch_models(base_url, api_key):
	"""Fetch available models from the API"""
	if not base_url:
	return gr.Dropdown(choices=["qwen/qwen3-235b-a22b-instruct-2507"], value="qwen/qwen3-235b-a22b-instruct-2507")

	try:
	client = OmniAPIClient(base_url)
	success, models = client.get_available_models(api_key)

	if success and models:
	return gr.Dropdown(choices=models, value=models[0] if models else "qwen/qwen3-235b-a22b-instruct-2507")
	else:
	return gr.Dropdown(choices=["qwen/qwen3-235b-a22b-instruct-2507"], value="qwen/qwen3-235b-a22b-instruct-2507")
	except Exception:
	return gr.Dropdown(choices=["qwen/qwen3-235b-a22b-instruct-2507"], value="qwen/qwen3-235b-a22b-instruct-2507")

	def send_request(base_url, api_key, model, max_tokens, text, files):
	"""Handle request submission"""
	try:
	# Validate inputs
	if not base_url:
	return "❌ Base URL is required", ""

	if not text.strip() and not files:
	return "❌ Please provide either text or upload files", ""

	# Create client
	client = OmniAPIClient(base_url)

	# Filter out None/empty files - handle various file input states
	valid_files = []
	if files is not None:
	# Handle single file or list of files
	if isinstance(files, list):
	valid_files = [f.name for f in files if f is not None and hasattr(f, 'name')]
	elif hasattr(files, 'name'):
	# Single file object
	valid_files = [files.name]

	# Send request
	success, response = client.send_chat_completion(
	text=text,
	files=valid_files,
	api_key=api_key,
	model=model,
	max_tokens=max_tokens
	)

	if success:
	# Format successful response
	formatted_response = json.dumps(response, indent=2)

	# Extract the assistant's reply if available
	if "choices" in response and len(response["choices"]) > 0:
	choice = response["choices"][0]
	if "message" in choice and "content" in choice["message"]:
	# Check if model contains 'typhoon'
	if "typhoon" in model.lower():
	try:
	# Try to get natural_text first
	assistant_reply = choice["message"]["content"]["natural_text"]
	except (KeyError, TypeError):
	# Fallback to content if natural_text is not available
	assistant_reply = choice["message"]["content"]
	else:
	assistant_reply = choice["message"]["content"]

	status = f"✅ Request successful\n\nAssistant Reply:\n{assistant_reply}"
	else:
	status = "✅ Request successful"
	else:
	status = "✅ Request successful"

	return status, formatted_response
	else:
	# Format error response
	error_response = json.dumps(response, indent=2)
	return f"❌ Request failed", error_response

	except Exception as e:
	return f"❌ Error: {str(e)}", ""

	def clear_form():
	"""Clear all form inputs"""
	return "", "", "", None

	# Custom CSS for better layout
	css = """
	.gradio-container {
	max-width: 1200px;
	}
	.config-panel {
	background-color: #f8f9fa;
	border-radius: 8px;
	padding: 15px;
	margin-bottom: 20px;
	}
	.input-panel {
	border-right: 1px solid #e0e0e0;
	padding-right: 20px;
	}
	.output-panel {
	padding-left: 20px;
	}
	"""

	with gr.Blocks(css=css, title="Omni API Chat Interface") as interface:
	gr.Markdown("# 🤖 Omni API Chat Interface")
	gr.Markdown("Interact with the Omni API using text, PDFs, images, and audio files")

	# Configuration section
	with gr.Group(elem_classes=["config-panel"]):
	gr.Markdown("## ⚙️ Configuration")
	with gr.Row():
	base_url = gr.Textbox(
	label="API Base URL",
	value="https://api.modelharbor.com",
	placeholder="https://api.modelharbor.com"
	)
	api_key = gr.Textbox(
	label="API Key (Optional)",
	type="password",
	placeholder="Enter your API key if required"
	)

	with gr.Row():
	with gr.Column(scale=3):
	model = gr.Dropdown(
	label="Model",
	choices=["qwen/qwen3-235b-a22b-instruct-2507"],
	value="qwen/qwen3-235b-a22b-instruct-2507",
	interactive=True
	)
	with gr.Column(scale=1):
	refresh_models_btn = gr.Button("🔄", size="sm")
	with gr.Column(scale=2):
	max_tokens = gr.Number(
	label="Max Tokens",
	value=16384,
	minimum=1,
	maximum=32000
	)

	# Main interface
	with gr.Row():
	# Input panel (left side)
	with gr.Column(scale=1, elem_classes=["input-panel"]):
	gr.Markdown("## 📝 Input")

	text_input = gr.Textbox(
	label="Your Message",
	placeholder="Type your message here...",
	lines=5
	)

	file_upload = gr.File(
	label="Upload Files",
	file_count="multiple",
	file_types=[
	".pdf", ".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp",
	".mp3", ".wav", ".m4a", ".flac", ".ogg"
	]
	)

	with gr.Row():
	send_btn = gr.Button("🚀 Send Request", variant="primary", size="lg")
	clear_btn = gr.Button("🗑️ Clear", variant="secondary")

	# Output panel (right side)
	with gr.Column(scale=1, elem_classes=["output-panel"]):
	gr.Markdown("## 📤 Response")

	status_output = gr.Textbox(
	label="Status",
	placeholder="Response status will appear here...",
	lines=8,
	max_lines=15,
	interactive=False
	)

	response_output = gr.Code(
	label="Raw Response",
	language="json",
	interactive=False
	)

	# Example section
	with gr.Accordion("📚 Usage Examples", open=False):
	gr.Markdown("""
	### Example Requests:

	Text Only:
	- Message: "Hello, how are you?"
	- Files: None

	PDF Analysis:
	- Message: "Please summarize this document"
	- Files: document.pdf

	Image OCR:
	- Message: "Extract text from this image"
	- Files: receipt.jpg

	Audio Transcription:
	- Message: "Transcribe this audio file"
	- Files: meeting.mp3

	Multi-modal:
	- Message: "Analyze these files and provide insights"
	- Files: report.pdf, chart.png, recording.wav

	### Supported File Types:
	- PDFs: .pdf
	- Images: .jpg, .jpeg, .png, .gif, .bmp, .webp
	- Audio: .mp3, .wav, .m4a, .flac, .ogg
	""")

	# Event handlers
	send_btn.click(
	fn=send_request,
	inputs=[base_url, api_key, model, max_tokens, text_input, file_upload],
	outputs=[status_output, response_output]
	)

	clear_btn.click(
	fn=clear_form,
	outputs=[text_input, status_output, response_output, file_upload]
	)

	# Refresh models when button is clicked
	refresh_models_btn.click(
	fn=fetch_models,
	inputs=[base_url, api_key],
	outputs=[model]
	)

	# Auto-refresh models when base URL changes
	base_url.blur(
	fn=fetch_models,
	inputs=[base_url, api_key],
	outputs=[model]
	)

	# Auto-refresh models when API key changes
	api_key.blur(
	fn=fetch_models,
	inputs=[base_url, api_key],
	outputs=[model]
	)

	# Allow Enter key to submit (when text input is focused)
	text_input.submit(
	fn=send_request,
	inputs=[base_url, api_key, model, max_tokens, text_input, file_upload],
	outputs=[status_output, response_output]
	)

	# Preload models when interface loads
	interface.load(
	fn=fetch_models,
	inputs=[base_url, api_key],
	outputs=[model]
	)

	return interface


	if __name__ == "__main__":
	# Create and launch the interface
	demo = create_ui()

	# Launch with custom settings
	demo.launch(
	server_name="127.0.0.1", # Use localhost instead of 0.0.0.0
	server_port=7890, # Use different port to avoid conflicts
	share=False, # Set to True to create public link
	debug=True, # Disable debug mode to reduce console errors
	show_error=True, # Show detailed error messages
	inbrowser=True, # Auto-open in browser
	prevent_thread_lock=False # Ensure proper threading
	)