amnakhan1122 commited on
Commit
acf65e0
ยท
verified ยท
1 Parent(s): 5e2816a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -28
app.py CHANGED
@@ -2,16 +2,29 @@
2
  import streamlit as st
3
  from transformers import BlipProcessor, BlipForConditionalGeneration
4
  from PIL import Image
 
 
5
  from googletrans import Translator
6
  import torch
7
 
8
  # Initialize Translator
9
  translator = Translator()
10
 
11
- st.title("๐Ÿ–ผ๏ธ AI Image Caption Generator - Advanced Version")
12
- st.write("Upload an image and get multiple captions generated by AI, in your preferred language!")
13
 
14
- uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # Language options
17
  languages = {
@@ -23,39 +36,58 @@ languages = {
23
  "Arabic": "ar"
24
  }
25
 
26
- selected_language = st.selectbox("Choose Output Language", list(languages.keys()))
 
 
 
27
 
28
  if uploaded_file:
29
- # Display image
30
  image = Image.open(uploaded_file).convert('RGB')
31
- st.image(image, caption="Uploaded Image", width=300)
 
 
 
 
 
 
 
 
32
 
33
- st.write("Generating captions... please wait โณ")
 
34
 
35
- # Resize for model input
36
- image = image.resize((384, 384))
 
37
 
38
- processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
39
- model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
40
 
41
- inputs = processor(image, return_tensors="pt")
42
-
43
- # Generate multiple captions
44
- out = model.generate(
45
- **inputs,
46
- num_beams=5, # Use beam search
47
- num_return_sequences=3, # Generate 3 different captions
48
- max_length=50,
49
- early_stopping=True
50
- )
51
 
52
- captions = [processor.decode(o, skip_special_tokens=True) for o in out]
 
 
 
 
 
 
 
53
 
54
- st.success("๐Ÿ“ Captions Generated!")
55
-
56
- st.subheader("Here are the captions:")
 
 
57
 
58
  for idx, cap in enumerate(captions):
59
- # Translate the caption based on selected language
60
- translated_caption = translator.translate(cap, dest=languages[selected_language]).text
61
- st.text_input(f"Caption {idx+1} ({selected_language})", value=translated_caption)
 
 
 
 
 
 
 
 
2
  import streamlit as st
3
  from transformers import BlipProcessor, BlipForConditionalGeneration
4
  from PIL import Image
5
+ import requests
6
+ from io import BytesIO
7
  from googletrans import Translator
8
  import torch
9
 
10
  # Initialize Translator
11
  translator = Translator()
12
 
13
+ st.set_page_config(page_title="AI Image Caption Generator", page_icon="๐Ÿ–ผ๏ธ")
 
14
 
15
+ st.title("๐Ÿ–ผ๏ธ AI Image Caption Generator")
16
+ st.write("Upload an image or paste a Google Image URL to get multiple captions generated by AI, in your preferred language!")
17
+
18
+ # Image input options
19
+ upload_option = st.radio("Choose Image Input Method:", ("Upload from Computer", "Paste Image URL"))
20
+
21
+ uploaded_file = None
22
+ image_url = None
23
+
24
+ if upload_option == "Upload from Computer":
25
+ uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
26
+ else:
27
+ image_url = st.text_input("Paste Image URL (must be direct link ending with .jpg/.png/.jpeg)")
28
 
29
  # Language options
30
  languages = {
 
36
  "Arabic": "ar"
37
  }
38
 
39
+ selected_language = st.selectbox("๐ŸŒ Choose Output Language", list(languages.keys()))
40
+
41
+ # Load image based on input
42
+ image = None
43
 
44
  if uploaded_file:
 
45
  image = Image.open(uploaded_file).convert('RGB')
46
+ elif image_url:
47
+ try:
48
+ response = requests.get(image_url)
49
+ if response.status_code == 200:
50
+ image = Image.open(BytesIO(response.content)).convert('RGB')
51
+ else:
52
+ st.error("Failed to fetch image. Please check the URL.")
53
+ except Exception as e:
54
+ st.error(f"Error fetching image: {e}")
55
 
56
+ if image:
57
+ st.image(image, caption="Selected Image", width=300)
58
 
59
+ with st.spinner("Generating captions... please wait โณ"):
60
+ # Resize for model input
61
+ resized_image = image.resize((384, 384))
62
 
63
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
64
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
65
 
66
+ inputs = processor(resized_image, return_tensors="pt")
 
 
 
 
 
 
 
 
 
67
 
68
+ # Generate multiple captions
69
+ out = model.generate(
70
+ **inputs,
71
+ num_beams=5,
72
+ num_return_sequences=3,
73
+ max_length=50,
74
+ early_stopping=True
75
+ )
76
 
77
+ captions = [processor.decode(o, skip_special_tokens=True) for o in out]
78
+
79
+ st.success("๐Ÿ“ Captions Generated Successfully!")
80
+
81
+ st.subheader(f"Here are the captions in {selected_language}:")
82
 
83
  for idx, cap in enumerate(captions):
84
+ try:
85
+ translated_caption = translator.translate(cap, dest=languages[selected_language]).text
86
+ except Exception as e:
87
+ translated_caption = f"(Translation Error: Showing English) {cap}"
88
+
89
+ st.text_area(f"โœ๏ธ Caption {idx+1}", value=translated_caption, height=80)
90
+
91
+ st.caption("Tip: You can edit, copy, or download captions easily! โœ๏ธ")
92
+ else:
93
+ st.info("Please upload an image or paste a valid URL to proceed.")