SandaAbhishekSagar commited on
Commit
add94a1
·
1 Parent(s): 1c1d558

pushing experiment code

Browse files
Files changed (3) hide show
  1. app.py +135 -25
  2. image_generator.py +0 -50
  3. translate.py +0 -27
app.py CHANGED
@@ -26,34 +26,144 @@
26
  # interface.launch()
27
 
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  import gradio as gr
30
- from translate import translate_text
31
- from image_generator import generate_image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- def chatbot(input_text, src_lang="auto"):
34
- """Process user input, translate it, and generate an image."""
35
- # Translate input to English
36
- translated_text = translate_text(input_text, src_lang, "en")
37
 
38
- # Generate an image based on the translated text
39
- image_path = generate_image(f"A scene depicting: {translated_text}")
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- return translated_text, image_path
42
-
43
- # Gradio Interface
44
- interface = gr.Interface(
45
- fn=chatbot,
46
- inputs=[
47
- gr.Textbox(label="Enter text in any language"),
48
- gr.Textbox(label="Source Language (optional, e.g., 'es' for Spanish)", value="auto"),
49
- ],
50
- outputs=[
51
- gr.Textbox(label="Translated Text"),
52
- gr.Image(label="Generated Image"),
53
- ],
54
- title="LinguaVision - Multilingual Chatbot with Image Generation",
55
- description="Enter text in any language to translate it into English and generate an image based on the text.\n NOTE: This tool takes approximately 12 minutes to execute.",
56
- )
57
 
58
  if __name__ == "__main__":
59
- interface.launch(share=True)
 
 
26
  # interface.launch()
27
 
28
 
29
+ # import gradio as gr
30
+ # from translate import translate_text
31
+ # from image_generator import generate_image
32
+
33
+ # def chatbot(input_text, src_lang="auto"):
34
+ # """Process user input, translate it, and generate an image."""
35
+ # # Translate input to English
36
+ # translated_text = translate_text(input_text, src_lang, "en")
37
+
38
+ # # Generate an image based on the translated text
39
+ # image_path = generate_image(f"A scene depicting: {translated_text}")
40
+
41
+ # return translated_text, image_path
42
+
43
+ # # Gradio Interface
44
+ # interface = gr.Interface(
45
+ # fn=chatbot,
46
+ # inputs=[
47
+ # gr.Textbox(label="Enter text in any language"),
48
+ # gr.Textbox(label="Source Language (optional, e.g., 'es' for Spanish)", value="auto"),
49
+ # ],
50
+ # outputs=[
51
+ # gr.Textbox(label="Translated Text"),
52
+ # gr.Image(label="Generated Image"),
53
+ # ],
54
+ # title="LinguaVision - Multilingual Chatbot with Image Generation",
55
+ # description="Enter text in any language to translate it into English and generate an image based on the text.\n NOTE: This tool takes approximately 12 minutes to execute.",
56
+ # )
57
+
58
+ # if __name__ == "__main__":
59
+ # interface.launch(share=True)
60
+
61
+ import torch
62
+ from transformers import MarianMTModel, MarianTokenizer
63
+ from diffusers import StableDiffusionPipeline, DDIMScheduler
64
  import gradio as gr
65
+ from typing import Tuple, Optional
66
+ import logging
67
+
68
+ class LinguaVisionSystem:
69
+ def __init__(self, device: str = "cuda" if torch.cuda.is_available() else "cpu"):
70
+ self.device = device
71
+ self.logger = logging.getLogger(__name__)
72
+
73
+ # Initialize translation pipeline
74
+ self.translation_config = {
75
+ "model_name": "Helsinki-NLP/opus-mt-mul-en",
76
+ "max_length": 128,
77
+ "num_beams": 4
78
+ }
79
+ self._init_translation_pipeline()
80
+
81
+ # Initialize image generation pipeline
82
+ self.image_config = {
83
+ "model_id": "stabilityai/stable-diffusion-2-1-base",
84
+ "safety_checker": None, # Disable for performance
85
+ "scheduler": DDIMScheduler
86
+ }
87
+ self._init_image_pipeline()
88
+
89
+ def _init_translation_pipeline(self) -> None:
90
+ try:
91
+ self.tokenizer = MarianTokenizer.from_pretrained(
92
+ self.translation_config["model_name"]
93
+ )
94
+ self.translation_model = MarianMTModel.from_pretrained(
95
+ self.translation_config["model_name"]
96
+ ).to(self.device)
97
+ except Exception as e:
98
+ self.logger.error(f"Translation pipeline initialization failed: {e}")
99
+ raise
100
+
101
+ def _init_image_pipeline(self) -> None:
102
+ try:
103
+ self.image_pipeline = StableDiffusionPipeline.from_pretrained(
104
+ self.image_config["model_id"],
105
+ scheduler=self.image_config["scheduler"](),
106
+ safety_checker=self.image_config["safety_checker"]
107
+ ).to(self.device)
108
+ except Exception as e:
109
+ self.logger.error(f"Image pipeline initialization failed: {e}")
110
+ raise
111
+
112
+ @torch.inference_mode()
113
+ def translate_text(self, text: str) -> Optional[str]:
114
+ try:
115
+ inputs = self.tokenizer(
116
+ text,
117
+ return_tensors="pt",
118
+ padding=True,
119
+ truncation=True,
120
+ max_length=self.translation_config["max_length"]
121
+ ).to(self.device)
122
+
123
+ translated = self.translation_model.generate(
124
+ **inputs,
125
+ num_beams=self.translation_config["num_beams"],
126
+ early_stopping=True
127
+ )
128
+
129
+ return self.tokenizer.decode(translated[0], skip_special_tokens=True)
130
+ except Exception as e:
131
+ self.logger.error(f"Translation failed: {e}")
132
+ return None
133
+
134
+ def process_input(self, text: str) -> Tuple[str, str]:
135
+ translated_text = self.translate_text(text)
136
+ if translated_text:
137
+ image = self.image_pipeline(
138
+ prompt=f"A photorealistic scene depicting: {translated_text}",
139
+ num_inference_steps=50,
140
+ guidance_scale=7.5
141
+ ).images[0]
142
+
143
+ image_path = "output.png"
144
+ image.save(image_path)
145
+ return translated_text, image_path
146
+ return "Translation failed", None
147
 
148
+ def create_interface() -> gr.Interface:
149
+ system = LinguaVisionSystem()
 
 
150
 
151
+ interface = gr.Interface(
152
+ fn=system.process_input,
153
+ inputs=gr.Textbox(
154
+ label="Enter text in any language",
155
+ placeholder="Type your text here..."
156
+ ),
157
+ outputs=[
158
+ gr.Textbox(label="English Translation"),
159
+ gr.Image(label="Generated Visualization")
160
+ ],
161
+ title="LinguaVision: AI-Powered Language Learning Assistant",
162
+ description="Transform text into visuals for enhanced language learning"
163
+ )
164
 
165
+ return interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  if __name__ == "__main__":
168
+ interface = create_interface()
169
+ interface.launch(server_name="0.0.0.0", server_port=7860)
image_generator.py DELETED
@@ -1,50 +0,0 @@
1
- # from diffusers import StableDiffusionPipeline
2
-
3
- # def generate_image(prompt):
4
- # model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
5
- # model.to("cuda") # Use GPU for faster generation
6
- # image = model(prompt).images[0]
7
- # image.save("output.png")
8
- # return "output.png"
9
-
10
- # if __name__ == "__main__":
11
- # prompt = "A friendly person saying 'How are you?'"
12
- # print("Generated Image Path:", generate_image(prompt))
13
-
14
-
15
- # from diffusers import StableDiffusionPipeline
16
- # import torch
17
-
18
- # def generate_image(prompt):
19
- # model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
20
-
21
- # # Use GPU if available, otherwise fallback to CPU
22
- # device = "cuda" if torch.cuda.is_available() else "cpu"
23
- # model.to(device)
24
-
25
- # image = model(prompt).images[0]
26
- # image.save("output.png")
27
- # return "output.png"
28
-
29
- # if __name__ == "__main__":
30
- # prompt = "A friendly person saying 'How are you?'"
31
- # print("Generated Image Path:", generate_image(prompt))
32
-
33
-
34
- from diffusers import StableDiffusionPipeline
35
- import torch
36
-
37
- # Preload the model globally
38
- device = "cuda" if torch.cuda.is_available() else "cpu"
39
- model = StableDiffusionPipeline.from_pretrained(
40
- "stabilityai/stable-diffusion-2-1-base",
41
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
42
- )
43
- model.to(device)
44
-
45
- def generate_image(prompt):
46
- """Generate an image from a text prompt."""
47
- image = model(prompt).images[0]
48
- output_path = "output.png"
49
- image.save(output_path)
50
- return output_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
translate.py DELETED
@@ -1,27 +0,0 @@
1
- # from transformers import MarianMTModel, MarianTokenizer
2
-
3
- # def translate_text(text, src_lang="es", tgt_lang="en"):
4
- # model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"
5
- # tokenizer = MarianTokenizer.from_pretrained(model_name)
6
- # model = MarianMTModel.from_pretrained(model_name)
7
- # inputs = tokenizer(text, return_tensors="pt", padding=True)
8
- # translated = model.generate(**inputs)
9
- # return tokenizer.decode(translated[0], skip_special_tokens=True)
10
-
11
- # if __name__ == "__main__":
12
- # input_text = "¿Cómo estás?"
13
- # print("Translated Text:", translate_text(input_text, src_lang="es", tgt_lang="en"))
14
-
15
-
16
- from transformers import MarianMTModel, MarianTokenizer
17
-
18
- # Preload the translation model globally
19
- model_name = "Helsinki-NLP/opus-mt-mul-en"
20
- tokenizer = MarianTokenizer.from_pretrained(model_name)
21
- translation_model = MarianMTModel.from_pretrained(model_name)
22
-
23
- def translate_text(text, src_lang="auto", tgt_lang="en"):
24
- """Translate text from any language to English."""
25
- inputs = tokenizer(text, return_tensors="pt", padding=True)
26
- translated = translation_model.generate(**inputs)
27
- return tokenizer.decode(translated[0], skip_special_tokens=True)