dots-ocr-idcard / scripts /test_production.py
tommulder's picture
feat(api): fast FastAPI app + model loader refactor; add mock mode for tests\n\n- Add pyproject + setuptools config and console entrypoint\n- Implement enhanced field extraction + MRZ heuristics\n- Add response builder with compatibility for legacy MRZ fields\n- New preprocessing pipeline for PDFs/images\n- HF Spaces GPU: cache ENV, optional flash-attn, configurable base image\n- Add Make targets for Spaces GPU and local CPU\n- Add httpx for TestClient; tests pass in mock mode\n- Remove embedded model files and legacy app/modules
211e423
#!/usr/bin/env python3
"""Production API Test Script
Quick test script specifically for the production Dots.OCR API.
"""
import requests
import json
import sys
from pathlib import Path
def test_production_api():
"""Test the production API endpoint."""
api_url = "https://algoryn-dots-ocr-idcard.hf.space"
print(f"πŸ” Testing Production API at {api_url}")
# Health check
try:
print("πŸ“‘ Checking API health...")
health_response = requests.get(f"{api_url}/health", timeout=10)
health_response.raise_for_status()
health_data = health_response.json()
print(f"βœ… Health check passed: {health_data}")
except Exception as e:
print(f"❌ Health check failed: {e}")
return False
# Test with front image
front_image = Path(__file__).parent / "tom_id_card_front.jpg"
if not front_image.exists():
print(f"❌ Test image not found: {front_image}")
return False
print(f"πŸ“Έ Testing OCR with {front_image.name}")
try:
with open(front_image, 'rb') as f:
files = {'file': f}
response = requests.post(
f"{api_url}/v1/id/ocr",
files=files,
timeout=60 # Longer timeout for production
)
response.raise_for_status()
result = response.json()
print(f"βœ… OCR test passed")
print(f" Request ID: {result.get('request_id')}")
print(f" Media type: {result.get('media_type')}")
print(f" Processing time: {result.get('processing_time'):.2f}s")
print(f" Detections: {len(result.get('detections', []))}")
# Show extracted fields
for i, detection in enumerate(result.get('detections', [])):
fields = detection.get('extracted_fields', {})
field_count = len([f for f in fields.values() if f is not None])
print(f" Page {i+1}: {field_count} fields extracted")
# Show some key fields
key_fields = ['document_number', 'surname', 'given_names', 'nationality']
for field in key_fields:
if field in fields and fields[field] is not None:
value = fields[field].get('value', 'N/A') if isinstance(fields[field], dict) else str(fields[field])
confidence = fields[field].get('confidence', 'N/A') if isinstance(fields[field], dict) else 'N/A'
print(f" {field}: {value} (confidence: {confidence})")
return True
except Exception as e:
print(f"❌ OCR test failed: {e}")
if hasattr(e, 'response') and e.response is not None:
print(f" Status code: {e.response.status_code}")
print(f" Response: {e.response.text}")
return False
if __name__ == "__main__":
success = test_production_api()
sys.exit(0 if success else 1)