File size: 3,029 Bytes
cd0dc09
3ec9940
cd0dc09
 
acf65e0
 
2bad46b
cd0dc09
3ec9940
d193001
acf65e0
cd0dc09
acf65e0
ccc1c04
acf65e0
d193001
 
 
 
 
 
 
 
 
 
acf65e0
 
 
 
 
 
 
 
 
 
cd0dc09
d193001
acf65e0
 
 
 
2a206a6
cd0dc09
 
acf65e0
 
 
 
 
 
 
 
 
cd0dc09
acf65e0
 
2a206a6
acf65e0
 
 
cd0dc09
acf65e0
 
cd0dc09
acf65e0
2a206a6
acf65e0
 
 
 
 
 
 
 
2a206a6
acf65e0
 
 
 
 
cd0dc09
2a206a6
acf65e0
d193001
acf65e0
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# app.py
import streamlit as st
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import requests
from io import BytesIO
from deep_translator import GoogleTranslator
import torch

# Streamlit page config
st.set_page_config(page_title="AI Image Caption Generator", page_icon="πŸ–ΌοΈ")

st.title("πŸ–ΌοΈ AI Image Caption Generator")
st.write("Upload an image or paste a Google Image URL to get multiple captions generated in your preferred language!")

# Language options
languages = {
    "English": "en",
    "Urdu": "ur",
    "Hindi": "hi",
    "French": "fr",
    "Spanish": "es",
    "Arabic": "ar"
}

# Image input options
upload_option = st.radio("Choose Image Input Method:", ("Upload from Computer", "Paste Image URL"))

uploaded_file = None
image_url = None

if upload_option == "Upload from Computer":
    uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
else:
    image_url = st.text_input("Paste Image URL (must be direct link ending with .jpg/.png/.jpeg)")

# Language selection
selected_language = st.selectbox("🌐 Choose Output Language", list(languages.keys()))

# Load image based on input
image = None

if uploaded_file:
    image = Image.open(uploaded_file).convert('RGB')
elif image_url:
    try:
        response = requests.get(image_url)
        if response.status_code == 200:
            image = Image.open(BytesIO(response.content)).convert('RGB')
        else:
            st.error("Failed to fetch image. Please check the URL.")
    except Exception as e:
        st.error(f"Error fetching image: {e}")

if image:
    st.image(image, caption="Selected Image", width=300)

    with st.spinner("Generating captions... please wait ⏳"):
        # Resize for model input
        resized_image = image.resize((384, 384))

        processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
        model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

        inputs = processor(resized_image, return_tensors="pt")

        # Generate multiple captions
        out = model.generate(
            **inputs,
            num_beams=5,
            num_return_sequences=3,
            max_length=50,
            early_stopping=True
        )

        captions = [processor.decode(o, skip_special_tokens=True) for o in out]

    st.success("πŸ“ Captions Generated Successfully!")

    st.subheader(f"Here are the captions in {selected_language}:")

    for idx, cap in enumerate(captions):
        try:
            translated_caption = GoogleTranslator(source='auto', target=languages[selected_language]).translate(cap)
        except Exception as e:
            translated_caption = f"(Translation Error: Showing English) {cap}"

        st.text_area(f"✏️ Caption {idx+1}", value=translated_caption, height=80)

    st.caption("Tip: You can edit, copy, or download captions easily! ✍️")
else:
    st.info("Please upload an image or paste a valid URL to proceed.")