Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- app.py +65 -0
- main.py +157 -0
- requirements.txt +5 -0
app.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import tensorflow as tf
|
| 3 |
+
import numpy as np
|
| 4 |
+
import cv2
|
| 5 |
+
import os
|
| 6 |
+
from main import tensor_to_image,StyleContentModel, run_style_transfer
|
| 7 |
+
|
| 8 |
+
# Define layers and instantiate the model once globally
|
| 9 |
+
content_layers = ['block5_conv2']
|
| 10 |
+
style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']
|
| 11 |
+
extractor = StyleContentModel(style_layers, content_layers)
|
| 12 |
+
|
| 13 |
+
def style_transfer_wrapper(content_img_np, style_img_np):
|
| 14 |
+
"""
|
| 15 |
+
A wrapper to handle I/O for the Gradio interface.
|
| 16 |
+
Saves numpy arrays to temp files to use with the main function.
|
| 17 |
+
"""
|
| 18 |
+
if content_img_np is None or style_img_np is None:
|
| 19 |
+
return None # Return None if either image is missing
|
| 20 |
+
|
| 21 |
+
# Save numpy arrays to temporary files
|
| 22 |
+
content_path = "temp_content.jpg"
|
| 23 |
+
style_path = "temp_style.jpg"
|
| 24 |
+
|
| 25 |
+
# Gradio provides RGB, but cv2 saves in BGR order
|
| 26 |
+
cv2.imwrite(content_path, cv2.cvtColor(content_img_np, cv2.COLOR_RGB2BGR))
|
| 27 |
+
cv2.imwrite(style_path, cv2.cvtColor(style_img_np, cv2.COLOR_RGB2BGR))
|
| 28 |
+
|
| 29 |
+
# Run the main process (using fewer iterations for a faster demo)
|
| 30 |
+
final_tensor = run_style_transfer(content_path, style_path, iterations=500)
|
| 31 |
+
|
| 32 |
+
# Convert tensor to displayable image
|
| 33 |
+
final_image = tensor_to_image(final_tensor)
|
| 34 |
+
|
| 35 |
+
# Clean up temporary files
|
| 36 |
+
os.remove(content_path)
|
| 37 |
+
os.remove(style_path)
|
| 38 |
+
|
| 39 |
+
return final_image
|
| 40 |
+
|
| 41 |
+
## 4. GRADIO UI DEFINITION ##
|
| 42 |
+
|
| 43 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 44 |
+
gr.Markdown("# 🎨 Neural Style Transfer")
|
| 45 |
+
gr.Markdown("Combine the content of one image with the artistic style of another. This demo uses a VGG19 model. Processing can take a minute, especially on CPU.")
|
| 46 |
+
|
| 47 |
+
with gr.Row():
|
| 48 |
+
content_img = gr.Image(label="Content Image", type="numpy", value="https://gradio-builds.s3.amazonaws.com/demo-files/acropolis.jpg")
|
| 49 |
+
style_img = gr.Image(label="Style Image", type="numpy", value="https://gradio-builds.s3.amazonaws.com/demo-files/starry_night.jpg")
|
| 50 |
+
|
| 51 |
+
run_button = gr.Button("Generate Image", variant="primary")
|
| 52 |
+
|
| 53 |
+
output_img = gr.Image(label="Result")
|
| 54 |
+
|
| 55 |
+
run_button.click(
|
| 56 |
+
fn=style_transfer_wrapper,
|
| 57 |
+
inputs=[content_img, style_img],
|
| 58 |
+
outputs=output_img
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
gr.Markdown("---")
|
| 62 |
+
gr.Markdown("Based on the paper '[A Neural Algorithm of Artistic Style](https://arxiv.org/abs/1508.06576)' by Gatys et al.")
|
| 63 |
+
|
| 64 |
+
# Launch the Gradio app
|
| 65 |
+
demo.launch(debug=True)
|
main.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import tensorflow as tf
|
| 2 |
+
import numpy as np
|
| 3 |
+
import cv2
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
|
| 6 |
+
## 1. UTILITY FUNCTIONS ##
|
| 7 |
+
|
| 8 |
+
def load_and_process_img(path_to_img, img_size=512):
|
| 9 |
+
"""Loads an image from a path and prepares it for the VGG model."""
|
| 10 |
+
img = cv2.imread(path_to_img)
|
| 11 |
+
if img is None:
|
| 12 |
+
raise FileNotFoundError(f"Could not read image from path: {path_to_img}")
|
| 13 |
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
| 14 |
+
img = cv2.resize(img, (img_size, img_size))
|
| 15 |
+
img = img.astype(np.float32)
|
| 16 |
+
img = np.expand_dims(img, axis=0)
|
| 17 |
+
img = tf.keras.applications.vgg19.preprocess_input(img)
|
| 18 |
+
return tf.convert_to_tensor(img)
|
| 19 |
+
|
| 20 |
+
def tensor_to_image(tensor):
|
| 21 |
+
"""Converts a tensor back into a displayable image."""
|
| 22 |
+
tensor = tensor * 255
|
| 23 |
+
tensor = np.array(tensor, dtype=np.uint8)
|
| 24 |
+
if np.ndim(tensor) > 3:
|
| 25 |
+
assert tensor.shape[0] == 1
|
| 26 |
+
tensor = tensor[0]
|
| 27 |
+
return tensor
|
| 28 |
+
|
| 29 |
+
## 2. STYLE TRANSFER MODEL ##
|
| 30 |
+
|
| 31 |
+
def gram_matrix(input_tensor):
|
| 32 |
+
"""Calculates the Gram matrix of a given tensor."""
|
| 33 |
+
result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor)
|
| 34 |
+
input_shape = tf.shape(input_tensor)
|
| 35 |
+
num_locations = tf.cast(input_shape[1] * input_shape[2], tf.float32)
|
| 36 |
+
return result / num_locations
|
| 37 |
+
|
| 38 |
+
class StyleContentModel(tf.keras.models.Model):
|
| 39 |
+
"""A custom model to extract style and content features."""
|
| 40 |
+
def __init__(self, style_layers, content_layers):
|
| 41 |
+
super(StyleContentModel, self).__init__()
|
| 42 |
+
# Load the VGG19 model
|
| 43 |
+
vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet')
|
| 44 |
+
vgg.trainable = False
|
| 45 |
+
|
| 46 |
+
# Get the outputs from the specified layers
|
| 47 |
+
style_outputs = [vgg.get_layer(name).output for name in style_layers]
|
| 48 |
+
content_outputs = [vgg.get_layer(name).output for name in content_layers]
|
| 49 |
+
model_outputs = style_outputs + content_outputs
|
| 50 |
+
|
| 51 |
+
# Build the new model
|
| 52 |
+
self.vgg = tf.keras.Model(vgg.input, model_outputs)
|
| 53 |
+
self.style_layers = style_layers
|
| 54 |
+
self.content_layers = content_layers
|
| 55 |
+
self.num_style_layers = len(style_layers)
|
| 56 |
+
self.vgg.trainable = False
|
| 57 |
+
|
| 58 |
+
def call(self, inputs):
|
| 59 |
+
"""Processes an image and returns a dict of style and content features."""
|
| 60 |
+
# VGG19 expects pixel values in the range of 0-255
|
| 61 |
+
inputs = inputs * 255.0
|
| 62 |
+
|
| 63 |
+
# Get the outputs from our pre-built VGG model
|
| 64 |
+
outputs = self.vgg(inputs)
|
| 65 |
+
|
| 66 |
+
# Separate the style and content outputs
|
| 67 |
+
style_outputs = outputs[:self.num_style_layers]
|
| 68 |
+
content_outputs = outputs[self.num_style_layers:]
|
| 69 |
+
|
| 70 |
+
# Calculate the Gram matrix for each style layer output
|
| 71 |
+
style_outputs = [gram_matrix(style_output) for style_output in style_outputs]
|
| 72 |
+
|
| 73 |
+
# Create a dictionary of the features
|
| 74 |
+
content_dict = {name: value for name, value in zip(self.content_layers, content_outputs)}
|
| 75 |
+
style_dict = {name: value for name, value in zip(self.style_layers, style_outputs)}
|
| 76 |
+
|
| 77 |
+
return {'content': content_dict, 'style': style_dict}
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
## 3. MAIN ORCHESTRATION FUNCTION ##
|
| 81 |
+
|
| 82 |
+
def run_style_transfer(content_path, style_path, iterations=1000, content_weight=1e4, style_weight=1e-2):
|
| 83 |
+
"""
|
| 84 |
+
Performs the neural style transfer.
|
| 85 |
+
|
| 86 |
+
Returns:
|
| 87 |
+
A tensor representing the final stylized image.
|
| 88 |
+
"""
|
| 89 |
+
# Define the layers to use for style and content
|
| 90 |
+
content_layers = ['block5_conv2']
|
| 91 |
+
style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']
|
| 92 |
+
|
| 93 |
+
# Build the feature extractor model
|
| 94 |
+
extractor = StyleContentModel(style_layers, content_layers)
|
| 95 |
+
|
| 96 |
+
# Load content and style images
|
| 97 |
+
content_image = load_and_process_img(content_path)
|
| 98 |
+
style_image = load_and_process_img(style_path)
|
| 99 |
+
|
| 100 |
+
# Calculate the target style and content features
|
| 101 |
+
style_targets = extractor(style_image)['style']
|
| 102 |
+
content_targets = extractor(content_image)['content']
|
| 103 |
+
|
| 104 |
+
# Initialize the image to be generated and the optimizer
|
| 105 |
+
generated_image = tf.Variable(content_image)
|
| 106 |
+
optimizer = tf.optimizers.Adam(learning_rate=0.02, beta_1=0.99, epsilon=1e-1)
|
| 107 |
+
|
| 108 |
+
@tf.function()
|
| 109 |
+
def train_step(image):
|
| 110 |
+
with tf.GradientTape() as tape:
|
| 111 |
+
outputs = extractor(image)
|
| 112 |
+
|
| 113 |
+
# Calculate total loss
|
| 114 |
+
content_loss = tf.add_n([tf.reduce_mean((content_targets[name] - outputs['content'][name])**2) for name in content_targets.keys()])
|
| 115 |
+
style_loss = tf.add_n([tf.reduce_mean((style_targets[name] - outputs['style'][name])**2) for name in style_targets.keys()])
|
| 116 |
+
total_loss = content_weight * content_loss + style_weight * style_loss
|
| 117 |
+
|
| 118 |
+
# Apply gradients to update the generated image
|
| 119 |
+
grad = tape.gradient(total_loss, image)
|
| 120 |
+
optimizer.apply_gradients([(grad, image)])
|
| 121 |
+
image.assign(tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0))
|
| 122 |
+
|
| 123 |
+
# Run the optimization loop
|
| 124 |
+
for i in range(iterations):
|
| 125 |
+
train_step(generated_image)
|
| 126 |
+
if (i + 1) % 100 == 0:
|
| 127 |
+
print(f"Iteration {i+1}/{iterations}")
|
| 128 |
+
|
| 129 |
+
# Post-process to remove VGG preprocessing and return a displayable tensor
|
| 130 |
+
final_tensor = generated_image
|
| 131 |
+
final_tensor = tf.reverse(final_tensor, axis=[-1]) # BGR to RGB
|
| 132 |
+
final_tensor = tf.clip_by_value(final_tensor, 0.0, 255.0)
|
| 133 |
+
|
| 134 |
+
return final_tensor
|
| 135 |
+
|
| 136 |
+
## 4. SCRIPT EXECUTION ##
|
| 137 |
+
|
| 138 |
+
if __name__ == '__main__':
|
| 139 |
+
# Define paths to your images
|
| 140 |
+
CONTENT_PATH = 'path/to/content_image.jpg'
|
| 141 |
+
STYLE_PATH = 'path/to/style_image.jpg'
|
| 142 |
+
|
| 143 |
+
# Run the style transfer process
|
| 144 |
+
final_tensor = run_style_transfer(CONTENT_PATH, STYLE_PATH, iterations=1000)
|
| 145 |
+
|
| 146 |
+
# Convert the final tensor to an image and display it
|
| 147 |
+
final_image = tensor_to_image(final_tensor)
|
| 148 |
+
|
| 149 |
+
plt.figure(figsize=(8, 8))
|
| 150 |
+
plt.imshow(final_image)
|
| 151 |
+
plt.axis('off')
|
| 152 |
+
plt.title("Stylized Image")
|
| 153 |
+
plt.show()
|
| 154 |
+
|
| 155 |
+
# To save the image
|
| 156 |
+
final_image_bgr = cv2.cvtColor(final_image, cv2.COLOR_RGB2BGR)
|
| 157 |
+
cv2.imwrite('stylized_image.png', final_image_bgr)
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
tensorflow
|
| 2 |
+
opencv-python-headless
|
| 3 |
+
numpy
|
| 4 |
+
matplotlib
|
| 5 |
+
gradio
|