nazdridoy commited on
Commit
43a0ca3
Β·
verified Β·
1 Parent(s): 8b82f5a

feat(image-to-image): add image-to-image generation

Browse files

- [feat] Implement `generate_image_to_image` for core generation logic and error handling (image_handler.py:153-296)
- [feat] Add `handle_image_to_image_generation` function for UI input processing (image_handler.py:299-318)
- [add] Define `DEFAULT_IMAGE_TO_IMAGE_MODEL`, `PROVIDER`, `MODEL_PRESETS`, and `EXAMPLE_PROMPTS` (utils.py:12-13,56-61,71-79)
- [feat] Create `create_image_to_image_tab` for Gradio UI, including presets and examples (ui_components.py:7-8,282-459)
- [feat] Integrate new image-to-image tab into the main Gradio interface (app.py:6,9,create_app():33-34)
- [docs] Update main header and footer with image-to-image feature description and tips (ui_components.py:create_main_header():470-471,create_footer():487-495)

Files changed (4) hide show
  1. app.py +5 -1
  2. image_handler.py +141 -0
  3. ui_components.py +158 -1
  4. utils.py +22 -0
app.py CHANGED
@@ -5,11 +5,12 @@ A comprehensive AI platform with chat and image generation capabilities.
5
 
6
  import gradio as gr
7
  from chat_handler import handle_chat_submit, handle_chat_retry
8
- from image_handler import handle_image_generation
9
  from ui_components import (
10
  create_main_header,
11
  create_chat_tab,
12
  create_image_tab,
 
13
  create_footer
14
  )
15
  from utils import get_gradio_theme
@@ -31,6 +32,9 @@ def create_app():
31
 
32
  # Image generation tab
33
  create_image_tab(handle_image_generation)
 
 
 
34
 
35
  # Footer with helpful information
36
  create_footer()
 
5
 
6
  import gradio as gr
7
  from chat_handler import handle_chat_submit, handle_chat_retry
8
+ from image_handler import handle_image_generation, handle_image_to_image_generation
9
  from ui_components import (
10
  create_main_header,
11
  create_chat_tab,
12
  create_image_tab,
13
+ create_image_to_image_tab,
14
  create_footer
15
  )
16
  from utils import get_gradio_theme
 
32
 
33
  # Image generation tab
34
  create_image_tab(handle_image_generation)
35
+
36
+ # Image-to-image tab
37
+ create_image_to_image_tab(handle_image_to_image_generation)
38
 
39
  # Footer with helpful information
40
  create_footer()
image_handler.py CHANGED
@@ -153,6 +153,147 @@ def generate_image(
153
  return None, format_error_message("Unexpected Error", f"An unexpected error occurred: {error_msg}")
154
 
155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  def handle_image_generation(prompt_val, model_val, provider_val, negative_prompt_val, width_val, height_val, steps_val, guidance_val, seed_val):
157
  """
158
  Handle image generation request with validation.
 
153
  return None, format_error_message("Unexpected Error", f"An unexpected error occurred: {error_msg}")
154
 
155
 
156
+ def generate_image_to_image(
157
+ input_image,
158
+ prompt: str,
159
+ model_name: str,
160
+ provider: str,
161
+ negative_prompt: str = "",
162
+ num_inference_steps: int = IMAGE_CONFIG["num_inference_steps"],
163
+ guidance_scale: float = IMAGE_CONFIG["guidance_scale"],
164
+ seed: int = IMAGE_CONFIG["seed"],
165
+ ):
166
+ """
167
+ Generate an image using image-to-image generation with the specified model and provider through HF-Inferoxy.
168
+ """
169
+ # Validate proxy API key
170
+ is_valid, error_msg = validate_proxy_key()
171
+ if not is_valid:
172
+ return None, error_msg
173
+
174
+ proxy_api_key = os.getenv("PROXY_KEY")
175
+
176
+ token_id = None
177
+ try:
178
+ # Get token from HF-Inferoxy proxy server with timeout handling
179
+ print(f"πŸ”‘ Image-to-Image: Requesting token from proxy...")
180
+ token, token_id = get_proxy_token(api_key=proxy_api_key)
181
+ print(f"βœ… Image-to-Image: Got token: {token_id}")
182
+
183
+ print(f"🎨 Image-to-Image: Using model='{model_name}', provider='{provider}'")
184
+
185
+ # Create client with specified provider
186
+ client = InferenceClient(
187
+ provider=provider,
188
+ api_key=token
189
+ )
190
+
191
+ print(f"πŸš€ Image-to-Image: Client created, preparing generation params...")
192
+
193
+ # Prepare generation parameters
194
+ generation_params = {
195
+ "input_image": input_image,
196
+ "prompt": prompt,
197
+ "num_inference_steps": num_inference_steps,
198
+ "guidance_scale": guidance_scale,
199
+ }
200
+
201
+ # Add optional parameters if provided
202
+ if negative_prompt:
203
+ generation_params["negative_prompt"] = negative_prompt
204
+ if seed != -1:
205
+ generation_params["seed"] = seed
206
+
207
+ print(f"πŸ“‘ Image-to-Image: Making generation request with {IMAGE_GENERATION_TIMEOUT}s timeout...")
208
+
209
+ # Create generation function for timeout handling
210
+ def generate_image_task():
211
+ return client.image_to_image(**generation_params)
212
+
213
+ # Execute with timeout using ThreadPoolExecutor
214
+ with ThreadPoolExecutor(max_workers=1) as executor:
215
+ future = executor.submit(generate_image_task)
216
+
217
+ try:
218
+ # Generate image with timeout
219
+ image = future.result(timeout=IMAGE_GENERATION_TIMEOUT)
220
+ except FutureTimeoutError:
221
+ future.cancel() # Cancel the running task
222
+ raise TimeoutError(f"Image-to-image generation timed out after {IMAGE_GENERATION_TIMEOUT} seconds")
223
+
224
+ print(f"πŸ–ΌοΈ Image-to-Image: Generation completed! Image type: {type(image)}")
225
+
226
+ # Report successful token usage
227
+ if token_id:
228
+ report_token_status(token_id, "success", api_key=proxy_api_key)
229
+
230
+ return image, format_success_message("Image-to-image generated", f"using {model_name} on {provider}")
231
+
232
+ except ConnectionError as e:
233
+ # Handle proxy connection errors
234
+ error_msg = f"Cannot connect to HF-Inferoxy server: {str(e)}"
235
+ print(f"πŸ”Œ Image-to-Image connection error: {error_msg}")
236
+ if token_id:
237
+ report_token_status(token_id, "error", error_msg, api_key=proxy_api_key)
238
+ return None, format_error_message("Connection Error", "Unable to connect to the proxy server. Please check if it's running.")
239
+
240
+ except TimeoutError as e:
241
+ # Handle timeout errors
242
+ error_msg = f"Image-to-image generation timed out: {str(e)}"
243
+ print(f"⏰ Image-to-Image timeout: {error_msg}")
244
+ if token_id:
245
+ report_token_status(token_id, "error", error_msg, api_key=proxy_api_key)
246
+ return None, format_error_message("Timeout Error", f"Image-to-image generation took too long (>{IMAGE_GENERATION_TIMEOUT//60} minutes). Try reducing steps.")
247
+
248
+ except HfHubHTTPError as e:
249
+ # Handle HuggingFace API errors
250
+ error_msg = str(e)
251
+ print(f"πŸ€— Image-to-Image HF error: {error_msg}")
252
+ if token_id:
253
+ report_token_status(token_id, "error", error_msg, api_key=proxy_api_key)
254
+
255
+ # Provide more user-friendly error messages
256
+ if "401" in error_msg:
257
+ return None, format_error_message("Authentication Error", "Invalid or expired API token. The proxy will provide a new token on retry.")
258
+ elif "402" in error_msg:
259
+ return None, format_error_message("Quota Exceeded", "API quota exceeded. The proxy will try alternative providers.")
260
+ elif "429" in error_msg:
261
+ return None, format_error_message("Rate Limited", "Too many requests. Please wait a moment and try again.")
262
+ elif "content policy" in error_msg.lower() or "safety" in error_msg.lower():
263
+ return None, format_error_message("Content Policy", "Image prompt was rejected by content policy. Please try a different prompt.")
264
+ else:
265
+ return None, format_error_message("HuggingFace API Error", error_msg)
266
+
267
+ except Exception as e:
268
+ # Handle all other errors
269
+ error_msg = str(e)
270
+ print(f"❌ Image-to-Image unexpected error: {error_msg}")
271
+ if token_id:
272
+ report_token_status(token_id, "error", error_msg, api_key=proxy_api_key)
273
+ return None, format_error_message("Unexpected Error", f"An unexpected error occurred: {error_msg}")
274
+
275
+
276
+ def handle_image_to_image_generation(input_image_val, prompt_val, model_val, provider_val, negative_prompt_val, steps_val, guidance_val, seed_val):
277
+ """
278
+ Handle image-to-image generation request with validation.
279
+ """
280
+ # Validate input image
281
+ if input_image_val is None:
282
+ return None, format_error_message("Validation Error", "Please upload an input image")
283
+
284
+ # Generate image-to-image
285
+ return generate_image_to_image(
286
+ input_image=input_image_val,
287
+ prompt=prompt_val,
288
+ model_name=model_val,
289
+ provider=provider_val,
290
+ negative_prompt=negative_prompt_val,
291
+ num_inference_steps=steps_val,
292
+ guidance_scale=guidance_val,
293
+ seed=seed_val
294
+ )
295
+
296
+
297
  def handle_image_generation(prompt_val, model_val, provider_val, negative_prompt_val, width_val, height_val, steps_val, guidance_val, seed_val):
298
  """
299
  Handle image generation request with validation.
ui_components.py CHANGED
@@ -6,8 +6,9 @@ Contains functions to create different sections of the Gradio interface.
6
  import gradio as gr
7
  from utils import (
8
  DEFAULT_CHAT_MODEL, DEFAULT_IMAGE_MODEL, DEFAULT_IMAGE_PROVIDER,
 
9
  CHAT_CONFIG, IMAGE_CONFIG, IMAGE_PROVIDERS, IMAGE_MODEL_PRESETS,
10
- IMAGE_EXAMPLE_PROMPTS
11
  )
12
 
13
 
@@ -282,6 +283,154 @@ def create_image_tab(handle_image_generation_fn):
282
  gen_event.then(lambda: gr.update(visible=False), None, [stop_generate_btn], queue=False)
283
 
284
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  def create_image_presets(img_model_name, img_provider):
286
  """Create quick model presets for image generation."""
287
  with gr.Group():
@@ -315,6 +464,7 @@ def create_main_header():
315
  **Features:**
316
  - πŸ’¬ **Smart Chat**: Conversational AI with streaming responses
317
  - 🎨 **Image Generation**: Text-to-image creation with multiple providers
 
318
  - πŸ”„ **Intelligent Token Management**: Automatic token rotation and error handling
319
  - 🌐 **Multi-Provider Support**: Works with HF Inference, Cerebras, Cohere, Groq, Together, Fal.ai, and more
320
  """)
@@ -337,6 +487,13 @@ def create_footer():
337
  - Experiment with different models and providers for varied styles
338
  - Higher inference steps = better quality but slower generation
339
 
 
 
 
 
 
 
 
340
  **Supported Providers:**
341
  - **fal-ai**: High-quality image generation (default for images)
342
  - **hf-inference**: Core API with comprehensive model support
 
6
  import gradio as gr
7
  from utils import (
8
  DEFAULT_CHAT_MODEL, DEFAULT_IMAGE_MODEL, DEFAULT_IMAGE_PROVIDER,
9
+ DEFAULT_IMAGE_TO_IMAGE_MODEL, DEFAULT_IMAGE_TO_IMAGE_PROVIDER,
10
  CHAT_CONFIG, IMAGE_CONFIG, IMAGE_PROVIDERS, IMAGE_MODEL_PRESETS,
11
+ IMAGE_TO_IMAGE_MODEL_PRESETS, IMAGE_EXAMPLE_PROMPTS, IMAGE_TO_IMAGE_EXAMPLE_PROMPTS
12
  )
13
 
14
 
 
283
  gen_event.then(lambda: gr.update(visible=False), None, [stop_generate_btn], queue=False)
284
 
285
 
286
+ def create_image_to_image_tab(handle_image_to_image_generation_fn):
287
+ """
288
+ Create the image-to-image tab interface.
289
+ """
290
+ with gr.Tab("πŸ–ΌοΈ Image-to-Image", id="image-to-image"):
291
+ with gr.Row():
292
+ with gr.Column(scale=1):
293
+ # Input image
294
+ input_image = gr.Image(
295
+ label="Input Image",
296
+ type="pil",
297
+ height=400,
298
+ show_download_button=True
299
+ )
300
+
301
+ # Model and provider inputs
302
+ with gr.Group():
303
+ gr.Markdown("**πŸ€– Model & Provider**")
304
+ img2img_model_name = gr.Textbox(
305
+ value=DEFAULT_IMAGE_TO_IMAGE_MODEL,
306
+ label="Model Name",
307
+ placeholder="e.g., Qwen/Qwen-Image-Edit or black-forest-labs/FLUX.1-Kontext-dev"
308
+ )
309
+ img2img_provider = gr.Dropdown(
310
+ choices=IMAGE_PROVIDERS,
311
+ value=DEFAULT_IMAGE_TO_IMAGE_PROVIDER,
312
+ label="Provider",
313
+ interactive=True
314
+ )
315
+
316
+ with gr.Column(scale=1):
317
+ # Output image
318
+ output_image = gr.Image(
319
+ label="Generated Image",
320
+ type="pil",
321
+ height=400,
322
+ show_download_button=True
323
+ )
324
+ status_text = gr.Textbox(
325
+ label="Generation Status",
326
+ interactive=False,
327
+ lines=2
328
+ )
329
+
330
+ with gr.Column(scale=1):
331
+ # Generation parameters
332
+ with gr.Group():
333
+ gr.Markdown("**πŸ“ Prompts**")
334
+ img2img_prompt = gr.Textbox(
335
+ value=IMAGE_TO_IMAGE_EXAMPLE_PROMPTS[0], # Use first example as default
336
+ label="Prompt",
337
+ lines=3,
338
+ placeholder="Describe how you want to modify the image..."
339
+ )
340
+ img2img_negative_prompt = gr.Textbox(
341
+ value=IMAGE_CONFIG["negative_prompt"],
342
+ label="Negative Prompt",
343
+ lines=2,
344
+ placeholder="Describe what you DON'T want in the modified image..."
345
+ )
346
+
347
+ with gr.Group():
348
+ gr.Markdown("**βš™οΈ Generation Settings**")
349
+ with gr.Row():
350
+ img2img_steps = gr.Slider(
351
+ minimum=10, maximum=100, value=IMAGE_CONFIG["num_inference_steps"], step=1,
352
+ label="Inference Steps", info="More steps = better quality"
353
+ )
354
+ img2img_guidance = gr.Slider(
355
+ minimum=1.0, maximum=20.0, value=IMAGE_CONFIG["guidance_scale"], step=0.5,
356
+ label="Guidance Scale", info="How closely to follow prompt"
357
+ )
358
+
359
+ img2img_seed = gr.Slider(
360
+ minimum=-1, maximum=999999, value=IMAGE_CONFIG["seed"], step=1,
361
+ label="Seed", info="-1 for random"
362
+ )
363
+
364
+ # Generate and Stop buttons
365
+ with gr.Row():
366
+ generate_btn = gr.Button(
367
+ "πŸ–ΌοΈ Generate Image-to-Image",
368
+ variant="primary",
369
+ size="lg",
370
+ scale=2
371
+ )
372
+ stop_generate_btn = gr.Button("⏹ Stop", variant="secondary", visible=False)
373
+
374
+ # Quick model presets
375
+ create_image_to_image_presets(img2img_model_name, img2img_provider)
376
+
377
+ # Examples for image-to-image generation
378
+ create_image_to_image_examples(img2img_prompt)
379
+
380
+ # Connect image-to-image generation events
381
+ # Show stop immediately when starting generation
382
+ generate_btn.click(
383
+ fn=lambda: gr.update(visible=True),
384
+ inputs=None,
385
+ outputs=[stop_generate_btn],
386
+ queue=False
387
+ )
388
+
389
+ gen_event = generate_btn.click(
390
+ fn=handle_image_to_image_generation_fn,
391
+ inputs=[
392
+ input_image, img2img_prompt, img2img_model_name, img2img_provider, img2img_negative_prompt,
393
+ img2img_steps, img2img_guidance, img2img_seed
394
+ ],
395
+ outputs=[output_image, status_text]
396
+ )
397
+
398
+ # Stop current image-to-image generation
399
+ stop_generate_btn.click(
400
+ fn=lambda: gr.update(visible=False),
401
+ inputs=None,
402
+ outputs=[stop_generate_btn],
403
+ cancels=[gen_event],
404
+ queue=False
405
+ )
406
+
407
+ # Hide stop after generation completes
408
+ gen_event.then(lambda: gr.update(visible=False), None, [stop_generate_btn], queue=False)
409
+
410
+
411
+ def create_image_to_image_presets(img2img_model_name, img2img_provider):
412
+ """Create quick model presets for image-to-image generation."""
413
+ with gr.Group():
414
+ gr.Markdown("**🎯 Popular Presets**")
415
+
416
+ for name, model, provider in IMAGE_TO_IMAGE_MODEL_PRESETS:
417
+ btn = gr.Button(name, size="sm")
418
+ btn.click(
419
+ lambda m=model, p=provider: (m, p),
420
+ outputs=[img2img_model_name, img2img_provider]
421
+ )
422
+
423
+
424
+ def create_image_to_image_examples(img2img_prompt):
425
+ """Create example prompts for image-to-image generation."""
426
+ with gr.Group():
427
+ gr.Markdown("**🌟 Example Prompts**")
428
+ img2img_examples = gr.Examples(
429
+ examples=[[prompt] for prompt in IMAGE_TO_IMAGE_EXAMPLE_PROMPTS],
430
+ inputs=img2img_prompt
431
+ )
432
+
433
+
434
  def create_image_presets(img_model_name, img_provider):
435
  """Create quick model presets for image generation."""
436
  with gr.Group():
 
464
  **Features:**
465
  - πŸ’¬ **Smart Chat**: Conversational AI with streaming responses
466
  - 🎨 **Image Generation**: Text-to-image creation with multiple providers
467
+ - πŸ–ΌοΈ **Image-to-Image**: Transform and modify existing images with AI
468
  - πŸ”„ **Intelligent Token Management**: Automatic token rotation and error handling
469
  - 🌐 **Multi-Provider Support**: Works with HF Inference, Cerebras, Cohere, Groq, Together, Fal.ai, and more
470
  """)
 
487
  - Experiment with different models and providers for varied styles
488
  - Higher inference steps = better quality but slower generation
489
 
490
+ **Image-to-Image Tab:**
491
+ - Upload an input image you want to modify
492
+ - Describe the changes you want to make to the image
493
+ - Use negative prompts to avoid unwanted modifications
494
+ - Perfect for style transfers, object additions, and image transformations
495
+ - Works great with models like Qwen Image Edit and FLUX.1 Kontext
496
+
497
  **Supported Providers:**
498
  - **fal-ai**: High-quality image generation (default for images)
499
  - **hf-inference**: Core API with comprehensive model support
utils.py CHANGED
@@ -10,6 +10,8 @@ import os
10
  DEFAULT_CHAT_MODEL = "openai/gpt-oss-20b"
11
  DEFAULT_IMAGE_MODEL = "Qwen/Qwen-Image"
12
  DEFAULT_IMAGE_PROVIDER = "fal-ai"
 
 
13
 
14
  # Chat configuration
15
  CHAT_CONFIG = {
@@ -56,6 +58,14 @@ IMAGE_MODEL_PRESETS = [
56
  ("SDXL (HF)", "stabilityai/stable-diffusion-xl-base-1.0", "hf-inference"),
57
  ]
58
 
 
 
 
 
 
 
 
 
59
  # Example prompts for image generation
60
  IMAGE_EXAMPLE_PROMPTS = [
61
  "A majestic dragon flying over a medieval castle, epic fantasy art, detailed, 8k",
@@ -68,6 +78,18 @@ IMAGE_EXAMPLE_PROMPTS = [
68
  "An astronaut floating in space with Earth in background, photorealistic, stunning"
69
  ]
70
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  def get_proxy_key():
73
  """Get the proxy API key from environment variables."""
 
10
  DEFAULT_CHAT_MODEL = "openai/gpt-oss-20b"
11
  DEFAULT_IMAGE_MODEL = "Qwen/Qwen-Image"
12
  DEFAULT_IMAGE_PROVIDER = "fal-ai"
13
+ DEFAULT_IMAGE_TO_IMAGE_MODEL = "Qwen/Qwen-Image-Edit"
14
+ DEFAULT_IMAGE_TO_IMAGE_PROVIDER = "fal-ai"
15
 
16
  # Chat configuration
17
  CHAT_CONFIG = {
 
58
  ("SDXL (HF)", "stabilityai/stable-diffusion-xl-base-1.0", "hf-inference"),
59
  ]
60
 
61
+ # Model presets for image-to-image generation
62
+ IMAGE_TO_IMAGE_MODEL_PRESETS = [
63
+ ("Qwen Image Edit (Fal.ai)", "Qwen/Qwen-Image-Edit", "fal-ai"),
64
+ ("Qwen Image Edit (Replicate)", "Qwen/Qwen-Image-Edit", "replicate"),
65
+ ("FLUX.1 Kontext (Nebius)", "black-forest-labs/FLUX.1-Kontext-dev", "nebius"),
66
+ ("SDXL (HF)", "stabilityai/stable-diffusion-xl-base-1.0", "hf-inference"),
67
+ ]
68
+
69
  # Example prompts for image generation
70
  IMAGE_EXAMPLE_PROMPTS = [
71
  "A majestic dragon flying over a medieval castle, epic fantasy art, detailed, 8k",
 
78
  "An astronaut floating in space with Earth in background, photorealistic, stunning"
79
  ]
80
 
81
+ # Example prompts for image-to-image generation
82
+ IMAGE_TO_IMAGE_EXAMPLE_PROMPTS = [
83
+ "Turn the cat into a tiger with stripes and fierce expression",
84
+ "Make the background a magical forest with glowing mushrooms",
85
+ "Change the style to vintage comic book with bold colors",
86
+ "Add a superhero cape and mask to the person",
87
+ "Transform the building into a futuristic skyscraper",
88
+ "Make the flowers bloom and add butterflies around them",
89
+ "Change the weather to a stormy night with lightning",
90
+ "Add a magical portal in the background with sparkles"
91
+ ]
92
+
93
 
94
  def get_proxy_key():
95
  """Get the proxy API key from environment variables."""