SandaAbhishekSagar commited on
Commit
6c3f4f2
·
1 Parent(s): add94a1

pushing original code

Browse files
Files changed (3) hide show
  1. app.py +25 -135
  2. image_generator.py +50 -0
  3. translate.py +27 -0
app.py CHANGED
@@ -26,144 +26,34 @@
26
  # interface.launch()
27
 
28
 
29
- # import gradio as gr
30
- # from translate import translate_text
31
- # from image_generator import generate_image
32
-
33
- # def chatbot(input_text, src_lang="auto"):
34
- # """Process user input, translate it, and generate an image."""
35
- # # Translate input to English
36
- # translated_text = translate_text(input_text, src_lang, "en")
37
-
38
- # # Generate an image based on the translated text
39
- # image_path = generate_image(f"A scene depicting: {translated_text}")
40
-
41
- # return translated_text, image_path
42
-
43
- # # Gradio Interface
44
- # interface = gr.Interface(
45
- # fn=chatbot,
46
- # inputs=[
47
- # gr.Textbox(label="Enter text in any language"),
48
- # gr.Textbox(label="Source Language (optional, e.g., 'es' for Spanish)", value="auto"),
49
- # ],
50
- # outputs=[
51
- # gr.Textbox(label="Translated Text"),
52
- # gr.Image(label="Generated Image"),
53
- # ],
54
- # title="LinguaVision - Multilingual Chatbot with Image Generation",
55
- # description="Enter text in any language to translate it into English and generate an image based on the text.\n NOTE: This tool takes approximately 12 minutes to execute.",
56
- # )
57
-
58
- # if __name__ == "__main__":
59
- # interface.launch(share=True)
60
-
61
- import torch
62
- from transformers import MarianMTModel, MarianTokenizer
63
- from diffusers import StableDiffusionPipeline, DDIMScheduler
64
  import gradio as gr
65
- from typing import Tuple, Optional
66
- import logging
67
-
68
- class LinguaVisionSystem:
69
- def __init__(self, device: str = "cuda" if torch.cuda.is_available() else "cpu"):
70
- self.device = device
71
- self.logger = logging.getLogger(__name__)
72
-
73
- # Initialize translation pipeline
74
- self.translation_config = {
75
- "model_name": "Helsinki-NLP/opus-mt-mul-en",
76
- "max_length": 128,
77
- "num_beams": 4
78
- }
79
- self._init_translation_pipeline()
80
-
81
- # Initialize image generation pipeline
82
- self.image_config = {
83
- "model_id": "stabilityai/stable-diffusion-2-1-base",
84
- "safety_checker": None, # Disable for performance
85
- "scheduler": DDIMScheduler
86
- }
87
- self._init_image_pipeline()
88
-
89
- def _init_translation_pipeline(self) -> None:
90
- try:
91
- self.tokenizer = MarianTokenizer.from_pretrained(
92
- self.translation_config["model_name"]
93
- )
94
- self.translation_model = MarianMTModel.from_pretrained(
95
- self.translation_config["model_name"]
96
- ).to(self.device)
97
- except Exception as e:
98
- self.logger.error(f"Translation pipeline initialization failed: {e}")
99
- raise
100
-
101
- def _init_image_pipeline(self) -> None:
102
- try:
103
- self.image_pipeline = StableDiffusionPipeline.from_pretrained(
104
- self.image_config["model_id"],
105
- scheduler=self.image_config["scheduler"](),
106
- safety_checker=self.image_config["safety_checker"]
107
- ).to(self.device)
108
- except Exception as e:
109
- self.logger.error(f"Image pipeline initialization failed: {e}")
110
- raise
111
-
112
- @torch.inference_mode()
113
- def translate_text(self, text: str) -> Optional[str]:
114
- try:
115
- inputs = self.tokenizer(
116
- text,
117
- return_tensors="pt",
118
- padding=True,
119
- truncation=True,
120
- max_length=self.translation_config["max_length"]
121
- ).to(self.device)
122
-
123
- translated = self.translation_model.generate(
124
- **inputs,
125
- num_beams=self.translation_config["num_beams"],
126
- early_stopping=True
127
- )
128
-
129
- return self.tokenizer.decode(translated[0], skip_special_tokens=True)
130
- except Exception as e:
131
- self.logger.error(f"Translation failed: {e}")
132
- return None
133
-
134
- def process_input(self, text: str) -> Tuple[str, str]:
135
- translated_text = self.translate_text(text)
136
- if translated_text:
137
- image = self.image_pipeline(
138
- prompt=f"A photorealistic scene depicting: {translated_text}",
139
- num_inference_steps=50,
140
- guidance_scale=7.5
141
- ).images[0]
142
-
143
- image_path = "output.png"
144
- image.save(image_path)
145
- return translated_text, image_path
146
- return "Translation failed", None
147
 
148
- def create_interface() -> gr.Interface:
149
- system = LinguaVisionSystem()
 
 
150
 
151
- interface = gr.Interface(
152
- fn=system.process_input,
153
- inputs=gr.Textbox(
154
- label="Enter text in any language",
155
- placeholder="Type your text here..."
156
- ),
157
- outputs=[
158
- gr.Textbox(label="English Translation"),
159
- gr.Image(label="Generated Visualization")
160
- ],
161
- title="LinguaVision: AI-Powered Language Learning Assistant",
162
- description="Transform text into visuals for enhanced language learning"
163
- )
164
 
165
- return interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  if __name__ == "__main__":
168
- interface = create_interface()
169
- interface.launch(server_name="0.0.0.0", server_port=7860)
 
26
  # interface.launch()
27
 
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  import gradio as gr
30
+ from translate import translate_text
31
+ from image_generator import generate_image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ def chatbot(input_text, src_lang="auto"):
34
+ """Process user input, translate it, and generate an image."""
35
+ # Translate input to English
36
+ translated_text = translate_text(input_text, src_lang, "en")
37
 
38
+ # Generate an image based on the translated text
39
+ image_path = generate_image(f"A scene depicting: {translated_text}")
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ return translated_text, image_path
42
+
43
+ # Gradio Interface
44
+ interface = gr.Interface(
45
+ fn=chatbot,
46
+ inputs=[
47
+ gr.Textbox(label="Enter text in any language"),
48
+ gr.Textbox(label="Source Language (optional, e.g., 'es' for Spanish)", value="auto"),
49
+ ],
50
+ outputs=[
51
+ gr.Textbox(label="Translated Text"),
52
+ gr.Image(label="Generated Image"),
53
+ ],
54
+ title="LinguaVision - Multilingual Chatbot with Image Generation",
55
+ description="Enter text in any language to translate it into English and generate an image based on the text.\n NOTE: This tool takes approximately 20 minutes to execute.",
56
+ )
57
 
58
  if __name__ == "__main__":
59
+ interface.launch(share=True)
 
image_generator.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from diffusers import StableDiffusionPipeline
2
+
3
+ # def generate_image(prompt):
4
+ # model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
5
+ # model.to("cuda") # Use GPU for faster generation
6
+ # image = model(prompt).images[0]
7
+ # image.save("output.png")
8
+ # return "output.png"
9
+
10
+ # if __name__ == "__main__":
11
+ # prompt = "A friendly person saying 'How are you?'"
12
+ # print("Generated Image Path:", generate_image(prompt))
13
+
14
+
15
+ # from diffusers import StableDiffusionPipeline
16
+ # import torch
17
+
18
+ # def generate_image(prompt):
19
+ # model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
20
+
21
+ # # Use GPU if available, otherwise fallback to CPU
22
+ # device = "cuda" if torch.cuda.is_available() else "cpu"
23
+ # model.to(device)
24
+
25
+ # image = model(prompt).images[0]
26
+ # image.save("output.png")
27
+ # return "output.png"
28
+
29
+ # if __name__ == "__main__":
30
+ # prompt = "A friendly person saying 'How are you?'"
31
+ # print("Generated Image Path:", generate_image(prompt))
32
+
33
+
34
+ from diffusers import StableDiffusionPipeline
35
+ import torch
36
+
37
+ # Preload the model globally
38
+ device = "cuda" if torch.cuda.is_available() else "cpu"
39
+ model = StableDiffusionPipeline.from_pretrained(
40
+ "stabilityai/stable-diffusion-2-1-base",
41
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
42
+ )
43
+ model.to(device)
44
+
45
+ def generate_image(prompt):
46
+ """Generate an image from a text prompt."""
47
+ image = model(prompt).images[0]
48
+ output_path = "output.png"
49
+ image.save(output_path)
50
+ return output_path
translate.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from transformers import MarianMTModel, MarianTokenizer
2
+
3
+ # def translate_text(text, src_lang="es", tgt_lang="en"):
4
+ # model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"
5
+ # tokenizer = MarianTokenizer.from_pretrained(model_name)
6
+ # model = MarianMTModel.from_pretrained(model_name)
7
+ # inputs = tokenizer(text, return_tensors="pt", padding=True)
8
+ # translated = model.generate(**inputs)
9
+ # return tokenizer.decode(translated[0], skip_special_tokens=True)
10
+
11
+ # if __name__ == "__main__":
12
+ # input_text = "¿Cómo estás?"
13
+ # print("Translated Text:", translate_text(input_text, src_lang="es", tgt_lang="en"))
14
+
15
+
16
+ from transformers import MarianMTModel, MarianTokenizer
17
+
18
+ # Preload the translation model globally
19
+ model_name = "Helsinki-NLP/opus-mt-mul-en"
20
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
21
+ translation_model = MarianMTModel.from_pretrained(model_name)
22
+
23
+ def translate_text(text, src_lang="auto", tgt_lang="en"):
24
+ """Translate text from any language to English."""
25
+ inputs = tokenizer(text, return_tensors="pt", padding=True)
26
+ translated = translation_model.generate(**inputs)
27
+ return tokenizer.decode(translated[0], skip_special_tokens=True)