Spaces:
Running
Running
File size: 6,287 Bytes
f234477 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import tensorflow as tf
import numpy as np
import cv2
import matplotlib.pyplot as plt
## 1. UTILITY FUNCTIONS ##
def load_and_process_img(path_to_img, img_size=512):
"""Loads an image from a path and prepares it for the VGG model."""
img = cv2.imread(path_to_img)
if img is None:
raise FileNotFoundError(f"Could not read image from path: {path_to_img}")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (img_size, img_size))
img = img.astype(np.float32)
img = np.expand_dims(img, axis=0)
img = tf.keras.applications.vgg19.preprocess_input(img)
return tf.convert_to_tensor(img)
def tensor_to_image(tensor):
"""Converts a tensor back into a displayable image."""
tensor = tensor * 255
tensor = np.array(tensor, dtype=np.uint8)
if np.ndim(tensor) > 3:
assert tensor.shape[0] == 1
tensor = tensor[0]
return tensor
## 2. STYLE TRANSFER MODEL ##
def gram_matrix(input_tensor):
"""Calculates the Gram matrix of a given tensor."""
result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor)
input_shape = tf.shape(input_tensor)
num_locations = tf.cast(input_shape[1] * input_shape[2], tf.float32)
return result / num_locations
class StyleContentModel(tf.keras.models.Model):
"""A custom model to extract style and content features."""
def __init__(self, style_layers, content_layers):
super(StyleContentModel, self).__init__()
# Load the VGG19 model
vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet')
vgg.trainable = False
# Get the outputs from the specified layers
style_outputs = [vgg.get_layer(name).output for name in style_layers]
content_outputs = [vgg.get_layer(name).output for name in content_layers]
model_outputs = style_outputs + content_outputs
# Build the new model
self.vgg = tf.keras.Model(vgg.input, model_outputs)
self.style_layers = style_layers
self.content_layers = content_layers
self.num_style_layers = len(style_layers)
self.vgg.trainable = False
def call(self, inputs):
"""Processes an image and returns a dict of style and content features."""
# VGG19 expects pixel values in the range of 0-255
inputs = inputs * 255.0
# Get the outputs from our pre-built VGG model
outputs = self.vgg(inputs)
# Separate the style and content outputs
style_outputs = outputs[:self.num_style_layers]
content_outputs = outputs[self.num_style_layers:]
# Calculate the Gram matrix for each style layer output
style_outputs = [gram_matrix(style_output) for style_output in style_outputs]
# Create a dictionary of the features
content_dict = {name: value for name, value in zip(self.content_layers, content_outputs)}
style_dict = {name: value for name, value in zip(self.style_layers, style_outputs)}
return {'content': content_dict, 'style': style_dict}
## 3. MAIN ORCHESTRATION FUNCTION ##
def run_style_transfer(content_path, style_path, iterations=1000, content_weight=1e4, style_weight=1e-2):
"""
Performs the neural style transfer.
Returns:
A tensor representing the final stylized image.
"""
# Define the layers to use for style and content
content_layers = ['block5_conv2']
style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']
# Build the feature extractor model
extractor = StyleContentModel(style_layers, content_layers)
# Load content and style images
content_image = load_and_process_img(content_path)
style_image = load_and_process_img(style_path)
# Calculate the target style and content features
style_targets = extractor(style_image)['style']
content_targets = extractor(content_image)['content']
# Initialize the image to be generated and the optimizer
generated_image = tf.Variable(content_image)
optimizer = tf.optimizers.Adam(learning_rate=0.02, beta_1=0.99, epsilon=1e-1)
@tf.function()
def train_step(image):
with tf.GradientTape() as tape:
outputs = extractor(image)
# Calculate total loss
content_loss = tf.add_n([tf.reduce_mean((content_targets[name] - outputs['content'][name])**2) for name in content_targets.keys()])
style_loss = tf.add_n([tf.reduce_mean((style_targets[name] - outputs['style'][name])**2) for name in style_targets.keys()])
total_loss = content_weight * content_loss + style_weight * style_loss
# Apply gradients to update the generated image
grad = tape.gradient(total_loss, image)
optimizer.apply_gradients([(grad, image)])
image.assign(tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0))
# Run the optimization loop
for i in range(iterations):
train_step(generated_image)
if (i + 1) % 100 == 0:
print(f"Iteration {i+1}/{iterations}")
# Post-process to remove VGG preprocessing and return a displayable tensor
final_tensor = generated_image
final_tensor = tf.reverse(final_tensor, axis=[-1]) # BGR to RGB
final_tensor = tf.clip_by_value(final_tensor, 0.0, 255.0)
return final_tensor
## 4. SCRIPT EXECUTION ##
if __name__ == '__main__':
# Define paths to your images
CONTENT_PATH = 'path/to/content_image.jpg'
STYLE_PATH = 'path/to/style_image.jpg'
# Run the style transfer process
final_tensor = run_style_transfer(CONTENT_PATH, STYLE_PATH, iterations=1000)
# Convert the final tensor to an image and display it
final_image = tensor_to_image(final_tensor)
plt.figure(figsize=(8, 8))
plt.imshow(final_image)
plt.axis('off')
plt.title("Stylized Image")
plt.show()
# To save the image
final_image_bgr = cv2.cvtColor(final_image, cv2.COLOR_RGB2BGR)
cv2.imwrite('stylized_image.png', final_image_bgr) |