Spaces:

algoryn
/

dots-ocr-idcard

Paused

dots-ocr-idcard / scripts /test_production.py

feat(api): fast FastAPI app + model loader refactor; add mock mode for tests\n\n- Add pyproject + setuptools config and console entrypoint\n- Implement enhanced field extraction + MRZ heuristics\n- Add response builder with compatibility for legacy MRZ fields\n- New preprocessing pipeline for PDFs/images\n- HF Spaces GPU: cache ENV, optional flash-attn, configurable base image\n- Add Make targets for Spaces GPU and local CPU\n- Add httpx for TestClient; tests pass in mock mode\n- Remove embedded model files and legacy app/modules

211e423 about 2 months ago

raw

history blame contribute delete

2.97 kB

	#!/usr/bin/env python3
	"""Production API Test Script

	Quick test script specifically for the production Dots.OCR API.
	"""

	import requests
	import json
	import sys
	from pathlib import Path

	def test_production_api():
	"""Test the production API endpoint."""

	api_url = "https://algoryn-dots-ocr-idcard.hf.space"
	print(f"🔍 Testing Production API at {api_url}")

	# Health check
	try:
	print("📡 Checking API health...")
	health_response = requests.get(f"{api_url}/health", timeout=10)
	health_response.raise_for_status()
	health_data = health_response.json()
	print(f"✅ Health check passed: {health_data}")
	except Exception as e:
	print(f"❌ Health check failed: {e}")
	return False

	# Test with front image
	front_image = Path(__file__).parent / "tom_id_card_front.jpg"
	if not front_image.exists():
	print(f"❌ Test image not found: {front_image}")
	return False

	print(f"📸 Testing OCR with {front_image.name}")

	try:
	with open(front_image, 'rb') as f:
	files = {'file': f}
	response = requests.post(
	f"{api_url}/v1/id/ocr",
	files=files,
	timeout=60 # Longer timeout for production
	)
	response.raise_for_status()
	result = response.json()

	print(f"✅ OCR test passed")
	print(f" Request ID: {result.get('request_id')}")
	print(f" Media type: {result.get('media_type')}")
	print(f" Processing time: {result.get('processing_time'):.2f}s")
	print(f" Detections: {len(result.get('detections', []))}")

	# Show extracted fields
	for i, detection in enumerate(result.get('detections', [])):
	fields = detection.get('extracted_fields', {})
	field_count = len([f for f in fields.values() if f is not None])
	print(f" Page {i+1}: {field_count} fields extracted")

	# Show some key fields
	key_fields = ['document_number', 'surname', 'given_names', 'nationality']
	for field in key_fields:
	if field in fields and fields[field] is not None:
	value = fields[field].get('value', 'N/A') if isinstance(fields[field], dict) else str(fields[field])
	confidence = fields[field].get('confidence', 'N/A') if isinstance(fields[field], dict) else 'N/A'
	print(f" {field}: {value} (confidence: {confidence})")

	return True

	except Exception as e:
	print(f"❌ OCR test failed: {e}")
	if hasattr(e, 'response') and e.response is not None:
	print(f" Status code: {e.response.status_code}")
	print(f" Response: {e.response.text}")
	return False

	if __name__ == "__main__":
	success = test_production_api()
	sys.exit(0 if success else 1)