amnakhan1122 commited on
Commit
2a206a6
·
verified ·
1 Parent(s): e1b1112

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -7
app.py CHANGED
@@ -2,24 +2,60 @@
2
  import streamlit as st
3
  from transformers import BlipProcessor, BlipForConditionalGeneration
4
  from PIL import Image
 
5
  import torch
6
 
7
- st.title("AI Image Caption Generator")
8
- st.write("Upload an image and get a caption generated by an AI model!")
 
 
 
9
 
10
  uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
11
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  if uploaded_file:
 
13
  image = Image.open(uploaded_file).convert('RGB')
14
- st.image(image, caption="Uploaded Image", use_column_width=True)
15
 
16
- st.write("Generating caption...")
 
 
 
17
 
18
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
19
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
20
 
21
  inputs = processor(image, return_tensors="pt")
22
- out = model.generate(**inputs)
23
- caption = processor.decode(out[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- st.success(f"📝 Caption: {caption}")
 
 
 
 
2
  import streamlit as st
3
  from transformers import BlipProcessor, BlipForConditionalGeneration
4
  from PIL import Image
5
+ from googletrans import Translator
6
  import torch
7
 
8
+ # Initialize Translator
9
+ translator = Translator()
10
+
11
+ st.title("🖼️ AI Image Caption Generator - Advanced Version")
12
+ st.write("Upload an image and get multiple captions generated by AI, in your preferred language!")
13
 
14
  uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
15
 
16
+ # Language options
17
+ languages = {
18
+ "English": "en",
19
+ "Urdu": "ur",
20
+ "Hindi": "hi",
21
+ "French": "fr",
22
+ "Spanish": "es",
23
+ "Arabic": "ar"
24
+ }
25
+
26
+ selected_language = st.selectbox("Choose Output Language", list(languages.keys()))
27
+
28
  if uploaded_file:
29
+ # Display image
30
  image = Image.open(uploaded_file).convert('RGB')
31
+ st.image(image, caption="Uploaded Image", width=300)
32
 
33
+ st.write("Generating captions... please wait ⏳")
34
+
35
+ # Resize for model input
36
+ image = image.resize((384, 384))
37
 
38
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
39
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
40
 
41
  inputs = processor(image, return_tensors="pt")
42
+
43
+ # Generate multiple captions
44
+ out = model.generate(
45
+ **inputs,
46
+ num_beams=5, # Use beam search
47
+ num_return_sequences=3, # Generate 3 different captions
48
+ max_length=50,
49
+ early_stopping=True
50
+ )
51
+
52
+ captions = [processor.decode(o, skip_special_tokens=True) for o in out]
53
+
54
+ st.success("📝 Captions Generated!")
55
+
56
+ st.subheader("Here are the captions:")
57
 
58
+ for idx, cap in enumerate(captions):
59
+ # Translate the caption based on selected language
60
+ translated_caption = translator.translate(cap, dest=languages[selected_language]).text
61
+ st.text_input(f"Caption {idx+1} ({selected_language})", value=translated_caption)