Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoProcessor, AutoModelForVision2Seq | |
| import torch | |
| from PIL import Image | |
| # Check if we have enough memory, otherwise use CPU | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| torch_dtype = torch.float16 if device == "cuda" else torch.float32 | |
| def load_model(): | |
| try: | |
| print("Loading OLM OCR model...") | |
| # Load with optimizations for limited resources | |
| processor = AutoProcessor.from_pretrained("allenai/olmOCR-2-7B-1025-FP8") | |
| model = AutoModelForVision2Seq.from_pretrained( | |
| "allenai/olmOCR-2-7B-1025-FP8", | |
| torch_dtype=torch_dtype, | |
| device_map="auto" if device == "cuda" else None, | |
| low_cpu_mem_usage=True | |
| ) | |
| if device == "cpu": | |
| model = model.to(device) | |
| print("Model loaded successfully!") | |
| return processor, model | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| return None, None | |
| processor, model = load_model() | |
| def extract_text_from_image(image): | |
| if processor is None or model is None: | |
| return "Model failed to load. The model might be too large for this environment." | |
| try: | |
| if image is None: | |
| return "Please upload an image first." | |
| # Convert and process image | |
| image = image.convert('RGB') | |
| inputs = processor(images=image, return_tensors="pt").to(device) | |
| # Generate with optimizations | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=256, # Reduced for faster processing | |
| do_sample=False, | |
| num_beams=1 # Faster but less accurate | |
| ) | |
| text = processor.decode(outputs[0], skip_special_tokens=True) | |
| return text | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| demo = gr.Interface( | |
| extract_text_from_image, | |
| gr.Image(type="pil"), | |
| gr.Textbox(lines=5), | |
| title="OLM OCR" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |