import gradio as gr
from transformers import AutoProcessor, AutoModelForVision2Seq
import torch
from PIL import Image

# Check if we have enough memory, otherwise use CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if device == "cuda" else torch.float32

@gr.cache_resource
def load_model():
    try:
        print("Loading OLM OCR model...")
        
        # Load with optimizations for limited resources
        processor = AutoProcessor.from_pretrained("allenai/olmOCR-2-7B-1025-FP8")
        model = AutoModelForVision2Seq.from_pretrained(
            "allenai/olmOCR-2-7B-1025-FP8",
            torch_dtype=torch_dtype,
            device_map="auto" if device == "cuda" else None,
            low_cpu_mem_usage=True
        )
        
        if device == "cpu":
            model = model.to(device)
            
        print("Model loaded successfully!")
        return processor, model
        
    except Exception as e:
        print(f"Error loading model: {e}")
        return None, None

processor, model = load_model()

def extract_text_from_image(image):
    if processor is None or model is None:
        return "Model failed to load. The model might be too large for this environment."
    
    try:
        if image is None:
            return "Please upload an image first."
        
        # Convert and process image
        image = image.convert('RGB')
        inputs = processor(images=image, return_tensors="pt").to(device)
        
        # Generate with optimizations
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=256,  # Reduced for faster processing
                do_sample=False,
                num_beams=1  # Faster but less accurate
            )
        
        text = processor.decode(outputs[0], skip_special_tokens=True)
        return text
        
    except Exception as e:
        return f"Error: {str(e)}"

demo = gr.Interface(
    extract_text_from_image,
    gr.Image(type="pil"),
    gr.Textbox(lines=5),
    title="OLM OCR"
)

if __name__ == "__main__":
    demo.launch()