Spaces:

amnakhan1122
/

Image_caption_Generator

Sleeping

App Files Files Community

amnakhan1122 commited on Apr 26

Commit

2a206a6

verified ·

1 Parent(s): e1b1112

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -7

app.py CHANGED Viewed

@@ -2,24 +2,60 @@
 import streamlit as st
 from transformers import BlipProcessor, BlipForConditionalGeneration
 from PIL import Image
 import torch
-st.title("AI Image Caption Generator")
-st.write("Upload an image and get a caption generated by an AI model!")
 uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
 if uploaded_file:
     image = Image.open(uploaded_file).convert('RGB')
-    st.image(image, caption="Uploaded Image", use_column_width=True)
-    st.write("Generating caption...")
     processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
     model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
     inputs = processor(image, return_tensors="pt")
-    out = model.generate(**inputs)
-    caption = processor.decode(out[0], skip_special_tokens=True)
-    st.success(f"📝 Caption: {caption}")

 import streamlit as st
 from transformers import BlipProcessor, BlipForConditionalGeneration
 from PIL import Image
+from googletrans import Translator
 import torch
+# Initialize Translator
+translator = Translator()
+st.title("🖼️ AI Image Caption Generator - Advanced Version")
+st.write("Upload an image and get multiple captions generated by AI, in your preferred language!")
 uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
+# Language options
+languages = {
+    "English": "en",
+    "Urdu": "ur",
+    "Hindi": "hi",
+    "French": "fr",
+    "Spanish": "es",
+    "Arabic": "ar"
+}
+selected_language = st.selectbox("Choose Output Language", list(languages.keys()))
 if uploaded_file:
+    # Display image
     image = Image.open(uploaded_file).convert('RGB')
+    st.image(image, caption="Uploaded Image", width=300)
+    st.write("Generating captions... please wait ⏳")
+    # Resize for model input
+    image = image.resize((384, 384))
     processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
     model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
     inputs = processor(image, return_tensors="pt")
+    # Generate multiple captions
+    out = model.generate(
+        **inputs,
+        num_beams=5,                 # Use beam search
+        num_return_sequences=3,      # Generate 3 different captions
+        max_length=50,
+        early_stopping=True
+    )
+    captions = [processor.decode(o, skip_special_tokens=True) for o in out]
+    st.success("📝 Captions Generated!")
+    st.subheader("Here are the captions:")
+    for idx, cap in enumerate(captions):
+        # Translate the caption based on selected language
+        translated_caption = translator.translate(cap, dest=languages[selected_language]).text
+        st.text_input(f"Caption {idx+1} ({selected_language})", value=translated_caption)