File size: 6,747 Bytes
235ff3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import streamlit as st
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
from diffusers import UniPCMultistepScheduler
import torch
from PIL import Image
import numpy as np
import cv2
import time

# App title and config
st.set_page_config(
    page_title="AI Image Generator with ControlNet",
    page_icon="🎨",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS for styling
st.markdown("""
    <style>
    .main {
        background-color: #f5f5f5;
    }
    .stButton>button {
        background-color: #4CAF50;
        color: white;
        border-radius: 8px;
        padding: 10px 24px;
        font-weight: bold;
    }
    .stButton>button:hover {
        background-color: #45a049;
    }
    .stSelectbox, .stSlider, .stTextInput {
        margin-bottom: 20px;
    }
    .header {
        color: #4CAF50;
        text-align: center;
    }
    .footer {
        text-align: center;
        margin-top: 30px;
        color: #777;
        font-size: 0.9em;
    }
    .image-container {
        display: flex;
        justify-content: space-around;
        flex-wrap: wrap;
        gap: 20px;
        margin-top: 20px;
    }
    .image-card {
        border-radius: 10px;
        box-shadow: 0 4px 8px rgba(0,0,0,0.1);
        padding: 15px;
        background: white;
    }
    </style>
    """, unsafe_allow_html=True)

# Header
st.markdown("<h1 class='header'>🎨 AI Image Generator with ControlNet</h1>", unsafe_allow_html=True)
st.markdown("Generate stunning images guided by Stable Diffusion and ControlNet. Upload a reference image or use edge detection to control the output.")

# Sidebar for controls
with st.sidebar:
    st.image("https://huggingface.co/front/assets/huggingface_logo-noborder.svg", width=200)
    st.markdown("### Configuration")
    
    # Model selection
    model_choice = st.selectbox(
        "Select ControlNet Type",
        ("Canny Edge", "Depth Map", "OpenPose (Human Pose)"),
        index=0
    )
    
    # Parameters
    prompt = st.text_area("Prompt", "a beautiful landscape with mountains and lake, highly detailed, digital art")
    negative_prompt = st.text_area("Negative Prompt", "blurry, low quality, distorted")
    num_images = st.slider("Number of images to generate", 1, 4, 1)
    steps = st.slider("Number of inference steps", 20, 100, 50)
    guidance_scale = st.slider("Guidance scale", 1.0, 20.0, 7.5)
    seed = st.number_input("Seed", value=42, min_value=0, max_value=1000000)
    
    # Upload control image
    uploaded_file = st.file_uploader("Upload control image", type=["jpg", "png", "jpeg"])
    
    # Advanced options
    with st.expander("Advanced Options"):
        strength = st.slider("Control strength", 0.1, 2.0, 1.0)
        low_threshold = st.slider("Canny low threshold", 1, 255, 100)
        high_threshold = st.slider("Canny high threshold", 1, 255, 200)

# Initialize models (cached)
@st.cache_resource
def load_models(model_type):
    if model_type == "Canny Edge":
        controlnet = ControlNetModel.from_pretrained(
            "lllyasviel/sd-controlnet-canny", 
            torch_dtype=torch.float16
        )
    elif model_type == "Depth Map":
        controlnet = ControlNetModel.from_pretrained(
            "lllyasviel/sd-controlnet-depth", 
            torch_dtype=torch.float16
        )
    else:  # OpenPose
        controlnet = ControlNetModel.from_pretrained(
            "lllyasviel/sd-controlnet-openpose", 
            torch_dtype=torch.float16
        )
    
    pipe = StableDiffusionControlNetPipeline.from_pretrained(
        "runwayml/stable-diffusion-v1-5",
        controlnet=controlnet,
        torch_dtype=torch.float16,
        safety_checker=None
    ).to("cuda")
    
    pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
    pipe.enable_model_cpu_offload()
    return pipe

# Process control image based on model type
def process_control_image(image, model_type):
    image = np.array(image)
    
    if model_type == "Canny Edge":
        image = cv2.Canny(image, low_threshold, high_threshold)
        image = image[:, :, None]
        image = np.concatenate([image, image, image], axis=2)
    elif model_type == "Depth Map":
        # Using MiDaS for depth estimation - would need additional imports
        # This is simplified for demo purposes
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = np.stack([image]*3, axis=-1)
    else:  # OpenPose
        # Would need OpenPose processing - simplified for demo
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    return Image.fromarray(image)

# Main content
col1, col2 = st.columns([1, 1])

with col1:
    st.markdown("### Control Image")
    if uploaded_file is not None:
        control_image = Image.open(uploaded_file)
        processed_image = process_control_image(control_image, model_choice)
        st.image(processed_image, caption="Processed Control Image", use_column_width=True)
    else:
        st.info("Please upload an image to use as control")

with col2:
    st.markdown("### Generated Images")
    if st.button("Generate Images"):
        if uploaded_file is None:
            st.warning("Please upload a control image first")
        else:
            with st.spinner("Generating images... Please wait"):
                start_time = time.time()
                
                # Load models
                pipe = load_models(model_choice)
                
                # Generator for reproducibility
                generator = torch.Generator(device="cuda").manual_seed(seed)
                
                # Generate images
                images = pipe(
                    [prompt] * num_images,
                    negative_prompt=[negative_prompt] * num_images,
                    image=processed_image,
                    num_inference_steps=steps,
                    generator=generator,
                    guidance_scale=guidance_scale,
                    controlnet_conditioning_scale=strength
                ).images
                
                # Display results
                st.markdown(f"<div class='image-container'>", unsafe_allow_html=True)
                for i, img in enumerate(images):
                    st.image(img, caption=f"Image {i+1}", use_column_width=True)
                st.markdown("</div>", unsafe_allow_html=True)
                
                # Show performance info
                end_time = time.time()
                st.success(f"Generated {num_images} images in {end_time - start_time:.2f} seconds")

# Footer
st.markdown("""
    <div class='footer'>
        <p>Powered by Stable Diffusion and ControlNet | Deployed on Hugging Face Spaces</p>
    </div>
""", unsafe_allow_html=True)