|
|
|
|
|
import streamlit as st |
|
|
from transformers import BlipProcessor, BlipForConditionalGeneration |
|
|
from PIL import Image |
|
|
import requests |
|
|
from io import BytesIO |
|
|
from deep_translator import GoogleTranslator |
|
|
import torch |
|
|
|
|
|
|
|
|
st.set_page_config(page_title="AI Image Caption Generator", page_icon="πΌοΈ") |
|
|
|
|
|
st.title("πΌοΈ AI Image Caption Generator") |
|
|
st.write("Upload an image or paste a Google Image URL to get multiple captions generated in your preferred language!") |
|
|
|
|
|
|
|
|
languages = { |
|
|
"English": "en", |
|
|
"Urdu": "ur", |
|
|
"Hindi": "hi", |
|
|
"French": "fr", |
|
|
"Spanish": "es", |
|
|
"Arabic": "ar" |
|
|
} |
|
|
|
|
|
|
|
|
upload_option = st.radio("Choose Image Input Method:", ("Upload from Computer", "Paste Image URL")) |
|
|
|
|
|
uploaded_file = None |
|
|
image_url = None |
|
|
|
|
|
if upload_option == "Upload from Computer": |
|
|
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"]) |
|
|
else: |
|
|
image_url = st.text_input("Paste Image URL (must be direct link ending with .jpg/.png/.jpeg)") |
|
|
|
|
|
|
|
|
selected_language = st.selectbox("π Choose Output Language", list(languages.keys())) |
|
|
|
|
|
|
|
|
image = None |
|
|
|
|
|
if uploaded_file: |
|
|
image = Image.open(uploaded_file).convert('RGB') |
|
|
elif image_url: |
|
|
try: |
|
|
response = requests.get(image_url) |
|
|
if response.status_code == 200: |
|
|
image = Image.open(BytesIO(response.content)).convert('RGB') |
|
|
else: |
|
|
st.error("Failed to fetch image. Please check the URL.") |
|
|
except Exception as e: |
|
|
st.error(f"Error fetching image: {e}") |
|
|
|
|
|
if image: |
|
|
st.image(image, caption="Selected Image", width=300) |
|
|
|
|
|
with st.spinner("Generating captions... please wait β³"): |
|
|
|
|
|
resized_image = image.resize((384, 384)) |
|
|
|
|
|
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") |
|
|
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") |
|
|
|
|
|
inputs = processor(resized_image, return_tensors="pt") |
|
|
|
|
|
|
|
|
out = model.generate( |
|
|
**inputs, |
|
|
num_beams=5, |
|
|
num_return_sequences=3, |
|
|
max_length=50, |
|
|
early_stopping=True |
|
|
) |
|
|
|
|
|
captions = [processor.decode(o, skip_special_tokens=True) for o in out] |
|
|
|
|
|
st.success("π Captions Generated Successfully!") |
|
|
|
|
|
st.subheader(f"Here are the captions in {selected_language}:") |
|
|
|
|
|
for idx, cap in enumerate(captions): |
|
|
try: |
|
|
translated_caption = GoogleTranslator(source='auto', target=languages[selected_language]).translate(cap) |
|
|
except Exception as e: |
|
|
translated_caption = f"(Translation Error: Showing English) {cap}" |
|
|
|
|
|
st.text_area(f"βοΈ Caption {idx+1}", value=translated_caption, height=80) |
|
|
|
|
|
st.caption("Tip: You can edit, copy, or download captions easily! βοΈ") |
|
|
else: |
|
|
st.info("Please upload an image or paste a valid URL to proceed.") |
|
|
|