Prinaka commited on
Commit
f234477
·
verified ·
1 Parent(s): 1f84467

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +65 -0
  2. main.py +157 -0
  3. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tensorflow as tf
3
+ import numpy as np
4
+ import cv2
5
+ import os
6
+ from main import tensor_to_image,StyleContentModel, run_style_transfer
7
+
8
+ # Define layers and instantiate the model once globally
9
+ content_layers = ['block5_conv2']
10
+ style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']
11
+ extractor = StyleContentModel(style_layers, content_layers)
12
+
13
+ def style_transfer_wrapper(content_img_np, style_img_np):
14
+ """
15
+ A wrapper to handle I/O for the Gradio interface.
16
+ Saves numpy arrays to temp files to use with the main function.
17
+ """
18
+ if content_img_np is None or style_img_np is None:
19
+ return None # Return None if either image is missing
20
+
21
+ # Save numpy arrays to temporary files
22
+ content_path = "temp_content.jpg"
23
+ style_path = "temp_style.jpg"
24
+
25
+ # Gradio provides RGB, but cv2 saves in BGR order
26
+ cv2.imwrite(content_path, cv2.cvtColor(content_img_np, cv2.COLOR_RGB2BGR))
27
+ cv2.imwrite(style_path, cv2.cvtColor(style_img_np, cv2.COLOR_RGB2BGR))
28
+
29
+ # Run the main process (using fewer iterations for a faster demo)
30
+ final_tensor = run_style_transfer(content_path, style_path, iterations=500)
31
+
32
+ # Convert tensor to displayable image
33
+ final_image = tensor_to_image(final_tensor)
34
+
35
+ # Clean up temporary files
36
+ os.remove(content_path)
37
+ os.remove(style_path)
38
+
39
+ return final_image
40
+
41
+ ## 4. GRADIO UI DEFINITION ##
42
+
43
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
44
+ gr.Markdown("# 🎨 Neural Style Transfer")
45
+ gr.Markdown("Combine the content of one image with the artistic style of another. This demo uses a VGG19 model. Processing can take a minute, especially on CPU.")
46
+
47
+ with gr.Row():
48
+ content_img = gr.Image(label="Content Image", type="numpy", value="https://gradio-builds.s3.amazonaws.com/demo-files/acropolis.jpg")
49
+ style_img = gr.Image(label="Style Image", type="numpy", value="https://gradio-builds.s3.amazonaws.com/demo-files/starry_night.jpg")
50
+
51
+ run_button = gr.Button("Generate Image", variant="primary")
52
+
53
+ output_img = gr.Image(label="Result")
54
+
55
+ run_button.click(
56
+ fn=style_transfer_wrapper,
57
+ inputs=[content_img, style_img],
58
+ outputs=output_img
59
+ )
60
+
61
+ gr.Markdown("---")
62
+ gr.Markdown("Based on the paper '[A Neural Algorithm of Artistic Style](https://arxiv.org/abs/1508.06576)' by Gatys et al.")
63
+
64
+ # Launch the Gradio app
65
+ demo.launch(debug=True)
main.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import numpy as np
3
+ import cv2
4
+ import matplotlib.pyplot as plt
5
+
6
+ ## 1. UTILITY FUNCTIONS ##
7
+
8
+ def load_and_process_img(path_to_img, img_size=512):
9
+ """Loads an image from a path and prepares it for the VGG model."""
10
+ img = cv2.imread(path_to_img)
11
+ if img is None:
12
+ raise FileNotFoundError(f"Could not read image from path: {path_to_img}")
13
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
14
+ img = cv2.resize(img, (img_size, img_size))
15
+ img = img.astype(np.float32)
16
+ img = np.expand_dims(img, axis=0)
17
+ img = tf.keras.applications.vgg19.preprocess_input(img)
18
+ return tf.convert_to_tensor(img)
19
+
20
+ def tensor_to_image(tensor):
21
+ """Converts a tensor back into a displayable image."""
22
+ tensor = tensor * 255
23
+ tensor = np.array(tensor, dtype=np.uint8)
24
+ if np.ndim(tensor) > 3:
25
+ assert tensor.shape[0] == 1
26
+ tensor = tensor[0]
27
+ return tensor
28
+
29
+ ## 2. STYLE TRANSFER MODEL ##
30
+
31
+ def gram_matrix(input_tensor):
32
+ """Calculates the Gram matrix of a given tensor."""
33
+ result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor)
34
+ input_shape = tf.shape(input_tensor)
35
+ num_locations = tf.cast(input_shape[1] * input_shape[2], tf.float32)
36
+ return result / num_locations
37
+
38
+ class StyleContentModel(tf.keras.models.Model):
39
+ """A custom model to extract style and content features."""
40
+ def __init__(self, style_layers, content_layers):
41
+ super(StyleContentModel, self).__init__()
42
+ # Load the VGG19 model
43
+ vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet')
44
+ vgg.trainable = False
45
+
46
+ # Get the outputs from the specified layers
47
+ style_outputs = [vgg.get_layer(name).output for name in style_layers]
48
+ content_outputs = [vgg.get_layer(name).output for name in content_layers]
49
+ model_outputs = style_outputs + content_outputs
50
+
51
+ # Build the new model
52
+ self.vgg = tf.keras.Model(vgg.input, model_outputs)
53
+ self.style_layers = style_layers
54
+ self.content_layers = content_layers
55
+ self.num_style_layers = len(style_layers)
56
+ self.vgg.trainable = False
57
+
58
+ def call(self, inputs):
59
+ """Processes an image and returns a dict of style and content features."""
60
+ # VGG19 expects pixel values in the range of 0-255
61
+ inputs = inputs * 255.0
62
+
63
+ # Get the outputs from our pre-built VGG model
64
+ outputs = self.vgg(inputs)
65
+
66
+ # Separate the style and content outputs
67
+ style_outputs = outputs[:self.num_style_layers]
68
+ content_outputs = outputs[self.num_style_layers:]
69
+
70
+ # Calculate the Gram matrix for each style layer output
71
+ style_outputs = [gram_matrix(style_output) for style_output in style_outputs]
72
+
73
+ # Create a dictionary of the features
74
+ content_dict = {name: value for name, value in zip(self.content_layers, content_outputs)}
75
+ style_dict = {name: value for name, value in zip(self.style_layers, style_outputs)}
76
+
77
+ return {'content': content_dict, 'style': style_dict}
78
+
79
+
80
+ ## 3. MAIN ORCHESTRATION FUNCTION ##
81
+
82
+ def run_style_transfer(content_path, style_path, iterations=1000, content_weight=1e4, style_weight=1e-2):
83
+ """
84
+ Performs the neural style transfer.
85
+
86
+ Returns:
87
+ A tensor representing the final stylized image.
88
+ """
89
+ # Define the layers to use for style and content
90
+ content_layers = ['block5_conv2']
91
+ style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']
92
+
93
+ # Build the feature extractor model
94
+ extractor = StyleContentModel(style_layers, content_layers)
95
+
96
+ # Load content and style images
97
+ content_image = load_and_process_img(content_path)
98
+ style_image = load_and_process_img(style_path)
99
+
100
+ # Calculate the target style and content features
101
+ style_targets = extractor(style_image)['style']
102
+ content_targets = extractor(content_image)['content']
103
+
104
+ # Initialize the image to be generated and the optimizer
105
+ generated_image = tf.Variable(content_image)
106
+ optimizer = tf.optimizers.Adam(learning_rate=0.02, beta_1=0.99, epsilon=1e-1)
107
+
108
+ @tf.function()
109
+ def train_step(image):
110
+ with tf.GradientTape() as tape:
111
+ outputs = extractor(image)
112
+
113
+ # Calculate total loss
114
+ content_loss = tf.add_n([tf.reduce_mean((content_targets[name] - outputs['content'][name])**2) for name in content_targets.keys()])
115
+ style_loss = tf.add_n([tf.reduce_mean((style_targets[name] - outputs['style'][name])**2) for name in style_targets.keys()])
116
+ total_loss = content_weight * content_loss + style_weight * style_loss
117
+
118
+ # Apply gradients to update the generated image
119
+ grad = tape.gradient(total_loss, image)
120
+ optimizer.apply_gradients([(grad, image)])
121
+ image.assign(tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0))
122
+
123
+ # Run the optimization loop
124
+ for i in range(iterations):
125
+ train_step(generated_image)
126
+ if (i + 1) % 100 == 0:
127
+ print(f"Iteration {i+1}/{iterations}")
128
+
129
+ # Post-process to remove VGG preprocessing and return a displayable tensor
130
+ final_tensor = generated_image
131
+ final_tensor = tf.reverse(final_tensor, axis=[-1]) # BGR to RGB
132
+ final_tensor = tf.clip_by_value(final_tensor, 0.0, 255.0)
133
+
134
+ return final_tensor
135
+
136
+ ## 4. SCRIPT EXECUTION ##
137
+
138
+ if __name__ == '__main__':
139
+ # Define paths to your images
140
+ CONTENT_PATH = 'path/to/content_image.jpg'
141
+ STYLE_PATH = 'path/to/style_image.jpg'
142
+
143
+ # Run the style transfer process
144
+ final_tensor = run_style_transfer(CONTENT_PATH, STYLE_PATH, iterations=1000)
145
+
146
+ # Convert the final tensor to an image and display it
147
+ final_image = tensor_to_image(final_tensor)
148
+
149
+ plt.figure(figsize=(8, 8))
150
+ plt.imshow(final_image)
151
+ plt.axis('off')
152
+ plt.title("Stylized Image")
153
+ plt.show()
154
+
155
+ # To save the image
156
+ final_image_bgr = cv2.cvtColor(final_image, cv2.COLOR_RGB2BGR)
157
+ cv2.imwrite('stylized_image.png', final_image_bgr)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ tensorflow
2
+ opencv-python-headless
3
+ numpy
4
+ matplotlib
5
+ gradio