img / app.py
Unique00225's picture
Update app.py
f4d5db9 verified
import gradio as gr
from transformers import AutoProcessor, AutoModelForVision2Seq
import torch
from PIL import Image
# Check if we have enough memory, otherwise use CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if device == "cuda" else torch.float32
@gr.cache_resource
def load_model():
try:
print("Loading OLM OCR model...")
# Load with optimizations for limited resources
processor = AutoProcessor.from_pretrained("allenai/olmOCR-2-7B-1025-FP8")
model = AutoModelForVision2Seq.from_pretrained(
"allenai/olmOCR-2-7B-1025-FP8",
torch_dtype=torch_dtype,
device_map="auto" if device == "cuda" else None,
low_cpu_mem_usage=True
)
if device == "cpu":
model = model.to(device)
print("Model loaded successfully!")
return processor, model
except Exception as e:
print(f"Error loading model: {e}")
return None, None
processor, model = load_model()
def extract_text_from_image(image):
if processor is None or model is None:
return "Model failed to load. The model might be too large for this environment."
try:
if image is None:
return "Please upload an image first."
# Convert and process image
image = image.convert('RGB')
inputs = processor(images=image, return_tensors="pt").to(device)
# Generate with optimizations
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=256, # Reduced for faster processing
do_sample=False,
num_beams=1 # Faster but less accurate
)
text = processor.decode(outputs[0], skip_special_tokens=True)
return text
except Exception as e:
return f"Error: {str(e)}"
demo = gr.Interface(
extract_text_from_image,
gr.Image(type="pil"),
gr.Textbox(lines=5),
title="OLM OCR"
)
if __name__ == "__main__":
demo.launch()