Spaces:
Paused
Paused
| import gradio as gr | |
| from transformers import ViTForImageClassification | |
| import torch | |
| from PIL import Image | |
| import torchvision.transforms as transforms | |
| # Load the model | |
| model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224") | |
| model.eval() | |
| # Define the image preprocessing pipeline | |
| transform = transforms.Compose([ | |
| transforms.Resize((224, 224)), | |
| transforms.ToTensor(), | |
| transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), | |
| ]) | |
| def predict_image(img): | |
| # Apply the transformations | |
| tensor_img = transform(img).unsqueeze(0) | |
| # Make prediction | |
| with torch.no_grad(): | |
| outputs = model(tensor_img) | |
| predictions = outputs.logits.argmax(-1) | |
| return model.config.id2label[predictions.item()] | |
| # Create the interface | |
| iface = gr.Interface( | |
| fn=predict_image, | |
| inputs=gr.Image(shape=(224, 224)), | |
| outputs="text", | |
| live=True, | |
| capture_session=True, | |
| title="Image recognition", | |
| description="Upload an image you want to categorize.", | |
| theme="Monochrome" | |
| ) | |
| iface.launch() |