Spaces:

Prinaka
/

Image-Style-Transfer

Running

App Files Files Community

Image-Style-Transfer / main.py

Prinaka

Upload 3 files

f234477 verified 3 months ago

raw

history blame contribute delete

6.29 kB

	import tensorflow as tf
	import numpy as np
	import cv2
	import matplotlib.pyplot as plt

	## 1. UTILITY FUNCTIONS ##

	def load_and_process_img(path_to_img, img_size=512):
	"""Loads an image from a path and prepares it for the VGG model."""
	img = cv2.imread(path_to_img)
	if img is None:
	raise FileNotFoundError(f"Could not read image from path: {path_to_img}")
	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	img = cv2.resize(img, (img_size, img_size))
	img = img.astype(np.float32)
	img = np.expand_dims(img, axis=0)
	img = tf.keras.applications.vgg19.preprocess_input(img)
	return tf.convert_to_tensor(img)

	def tensor_to_image(tensor):
	"""Converts a tensor back into a displayable image."""
	tensor = tensor * 255
	tensor = np.array(tensor, dtype=np.uint8)
	if np.ndim(tensor) > 3:
	assert tensor.shape[0] == 1
	tensor = tensor[0]
	return tensor

	## 2. STYLE TRANSFER MODEL ##

	def gram_matrix(input_tensor):
	"""Calculates the Gram matrix of a given tensor."""
	result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor)
	input_shape = tf.shape(input_tensor)
	num_locations = tf.cast(input_shape[1] * input_shape[2], tf.float32)
	return result / num_locations

	class StyleContentModel(tf.keras.models.Model):
	"""A custom model to extract style and content features."""
	def __init__(self, style_layers, content_layers):
	super(StyleContentModel, self).__init__()
	# Load the VGG19 model
	vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet')
	vgg.trainable = False

	# Get the outputs from the specified layers
	style_outputs = [vgg.get_layer(name).output for name in style_layers]
	content_outputs = [vgg.get_layer(name).output for name in content_layers]
	model_outputs = style_outputs + content_outputs

	# Build the new model
	self.vgg = tf.keras.Model(vgg.input, model_outputs)
	self.style_layers = style_layers
	self.content_layers = content_layers
	self.num_style_layers = len(style_layers)
	self.vgg.trainable = False

	def call(self, inputs):
	"""Processes an image and returns a dict of style and content features."""
	# VGG19 expects pixel values in the range of 0-255
	inputs = inputs * 255.0

	# Get the outputs from our pre-built VGG model
	outputs = self.vgg(inputs)

	# Separate the style and content outputs
	style_outputs = outputs[:self.num_style_layers]
	content_outputs = outputs[self.num_style_layers:]

	# Calculate the Gram matrix for each style layer output
	style_outputs = [gram_matrix(style_output) for style_output in style_outputs]

	# Create a dictionary of the features
	content_dict = {name: value for name, value in zip(self.content_layers, content_outputs)}
	style_dict = {name: value for name, value in zip(self.style_layers, style_outputs)}

	return {'content': content_dict, 'style': style_dict}


	## 3. MAIN ORCHESTRATION FUNCTION ##

	def run_style_transfer(content_path, style_path, iterations=1000, content_weight=1e4, style_weight=1e-2):
	"""
	Performs the neural style transfer.

	Returns:
	A tensor representing the final stylized image.
	"""
	# Define the layers to use for style and content
	content_layers = ['block5_conv2']
	style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']

	# Build the feature extractor model
	extractor = StyleContentModel(style_layers, content_layers)

	# Load content and style images
	content_image = load_and_process_img(content_path)
	style_image = load_and_process_img(style_path)

	# Calculate the target style and content features
	style_targets = extractor(style_image)['style']
	content_targets = extractor(content_image)['content']

	# Initialize the image to be generated and the optimizer
	generated_image = tf.Variable(content_image)
	optimizer = tf.optimizers.Adam(learning_rate=0.02, beta_1=0.99, epsilon=1e-1)

	@tf.function()
	def train_step(image):
	with tf.GradientTape() as tape:
	outputs = extractor(image)

	# Calculate total loss
	content_loss = tf.add_n([tf.reduce_mean((content_targets[name] - outputs['content'][name])**2) for name in content_targets.keys()])
	style_loss = tf.add_n([tf.reduce_mean((style_targets[name] - outputs['style'][name])**2) for name in style_targets.keys()])
	total_loss = content_weight * content_loss + style_weight * style_loss

	# Apply gradients to update the generated image
	grad = tape.gradient(total_loss, image)
	optimizer.apply_gradients([(grad, image)])
	image.assign(tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0))

	# Run the optimization loop
	for i in range(iterations):
	train_step(generated_image)
	if (i + 1) % 100 == 0:
	print(f"Iteration {i+1}/{iterations}")

	# Post-process to remove VGG preprocessing and return a displayable tensor
	final_tensor = generated_image
	final_tensor = tf.reverse(final_tensor, axis=[-1]) # BGR to RGB
	final_tensor = tf.clip_by_value(final_tensor, 0.0, 255.0)

	return final_tensor

	## 4. SCRIPT EXECUTION ##

	if __name__ == '__main__':
	# Define paths to your images
	CONTENT_PATH = 'path/to/content_image.jpg'
	STYLE_PATH = 'path/to/style_image.jpg'

	# Run the style transfer process
	final_tensor = run_style_transfer(CONTENT_PATH, STYLE_PATH, iterations=1000)

	# Convert the final tensor to an image and display it
	final_image = tensor_to_image(final_tensor)

	plt.figure(figsize=(8, 8))
	plt.imshow(final_image)
	plt.axis('off')
	plt.title("Stylized Image")
	plt.show()

	# To save the image
	final_image_bgr = cv2.cvtColor(final_image, cv2.COLOR_RGB2BGR)
	cv2.imwrite('stylized_image.png', final_image_bgr)