Spaces:
Running
Running
| import tensorflow as tf | |
| import numpy as np | |
| import cv2 | |
| import matplotlib.pyplot as plt | |
| ## 1. UTILITY FUNCTIONS ## | |
| def load_and_process_img(path_to_img, img_size=512): | |
| """Loads an image from a path and prepares it for the VGG model.""" | |
| img = cv2.imread(path_to_img) | |
| if img is None: | |
| raise FileNotFoundError(f"Could not read image from path: {path_to_img}") | |
| img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
| img = cv2.resize(img, (img_size, img_size)) | |
| img = img.astype(np.float32) | |
| img = np.expand_dims(img, axis=0) | |
| img = tf.keras.applications.vgg19.preprocess_input(img) | |
| return tf.convert_to_tensor(img) | |
| def tensor_to_image(tensor): | |
| """Converts a tensor back into a displayable image.""" | |
| tensor = tensor * 255 | |
| tensor = np.array(tensor, dtype=np.uint8) | |
| if np.ndim(tensor) > 3: | |
| assert tensor.shape[0] == 1 | |
| tensor = tensor[0] | |
| return tensor | |
| ## 2. STYLE TRANSFER MODEL ## | |
| def gram_matrix(input_tensor): | |
| """Calculates the Gram matrix of a given tensor.""" | |
| result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor) | |
| input_shape = tf.shape(input_tensor) | |
| num_locations = tf.cast(input_shape[1] * input_shape[2], tf.float32) | |
| return result / num_locations | |
| class StyleContentModel(tf.keras.models.Model): | |
| """A custom model to extract style and content features.""" | |
| def __init__(self, style_layers, content_layers): | |
| super(StyleContentModel, self).__init__() | |
| # Load the VGG19 model | |
| vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet') | |
| vgg.trainable = False | |
| # Get the outputs from the specified layers | |
| style_outputs = [vgg.get_layer(name).output for name in style_layers] | |
| content_outputs = [vgg.get_layer(name).output for name in content_layers] | |
| model_outputs = style_outputs + content_outputs | |
| # Build the new model | |
| self.vgg = tf.keras.Model(vgg.input, model_outputs) | |
| self.style_layers = style_layers | |
| self.content_layers = content_layers | |
| self.num_style_layers = len(style_layers) | |
| self.vgg.trainable = False | |
| def call(self, inputs): | |
| """Processes an image and returns a dict of style and content features.""" | |
| # VGG19 expects pixel values in the range of 0-255 | |
| inputs = inputs * 255.0 | |
| # Get the outputs from our pre-built VGG model | |
| outputs = self.vgg(inputs) | |
| # Separate the style and content outputs | |
| style_outputs = outputs[:self.num_style_layers] | |
| content_outputs = outputs[self.num_style_layers:] | |
| # Calculate the Gram matrix for each style layer output | |
| style_outputs = [gram_matrix(style_output) for style_output in style_outputs] | |
| # Create a dictionary of the features | |
| content_dict = {name: value for name, value in zip(self.content_layers, content_outputs)} | |
| style_dict = {name: value for name, value in zip(self.style_layers, style_outputs)} | |
| return {'content': content_dict, 'style': style_dict} | |
| ## 3. MAIN ORCHESTRATION FUNCTION ## | |
| def run_style_transfer(content_path, style_path, iterations=1000, content_weight=1e4, style_weight=1e-2): | |
| """ | |
| Performs the neural style transfer. | |
| Returns: | |
| A tensor representing the final stylized image. | |
| """ | |
| # Define the layers to use for style and content | |
| content_layers = ['block5_conv2'] | |
| style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1'] | |
| # Build the feature extractor model | |
| extractor = StyleContentModel(style_layers, content_layers) | |
| # Load content and style images | |
| content_image = load_and_process_img(content_path) | |
| style_image = load_and_process_img(style_path) | |
| # Calculate the target style and content features | |
| style_targets = extractor(style_image)['style'] | |
| content_targets = extractor(content_image)['content'] | |
| # Initialize the image to be generated and the optimizer | |
| generated_image = tf.Variable(content_image) | |
| optimizer = tf.optimizers.Adam(learning_rate=0.02, beta_1=0.99, epsilon=1e-1) | |
| def train_step(image): | |
| with tf.GradientTape() as tape: | |
| outputs = extractor(image) | |
| # Calculate total loss | |
| content_loss = tf.add_n([tf.reduce_mean((content_targets[name] - outputs['content'][name])**2) for name in content_targets.keys()]) | |
| style_loss = tf.add_n([tf.reduce_mean((style_targets[name] - outputs['style'][name])**2) for name in style_targets.keys()]) | |
| total_loss = content_weight * content_loss + style_weight * style_loss | |
| # Apply gradients to update the generated image | |
| grad = tape.gradient(total_loss, image) | |
| optimizer.apply_gradients([(grad, image)]) | |
| image.assign(tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)) | |
| # Run the optimization loop | |
| for i in range(iterations): | |
| train_step(generated_image) | |
| if (i + 1) % 100 == 0: | |
| print(f"Iteration {i+1}/{iterations}") | |
| # Post-process to remove VGG preprocessing and return a displayable tensor | |
| final_tensor = generated_image | |
| final_tensor = tf.reverse(final_tensor, axis=[-1]) # BGR to RGB | |
| final_tensor = tf.clip_by_value(final_tensor, 0.0, 255.0) | |
| return final_tensor | |
| ## 4. SCRIPT EXECUTION ## | |
| if __name__ == '__main__': | |
| # Define paths to your images | |
| CONTENT_PATH = 'path/to/content_image.jpg' | |
| STYLE_PATH = 'path/to/style_image.jpg' | |
| # Run the style transfer process | |
| final_tensor = run_style_transfer(CONTENT_PATH, STYLE_PATH, iterations=1000) | |
| # Convert the final tensor to an image and display it | |
| final_image = tensor_to_image(final_tensor) | |
| plt.figure(figsize=(8, 8)) | |
| plt.imshow(final_image) | |
| plt.axis('off') | |
| plt.title("Stylized Image") | |
| plt.show() | |
| # To save the image | |
| final_image_bgr = cv2.cvtColor(final_image, cv2.COLOR_RGB2BGR) | |
| cv2.imwrite('stylized_image.png', final_image_bgr) |