File size: 6,287 Bytes
f234477
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import tensorflow as tf
import numpy as np
import cv2
import matplotlib.pyplot as plt

## 1. UTILITY FUNCTIONS ##

def load_and_process_img(path_to_img, img_size=512):
    """Loads an image from a path and prepares it for the VGG model."""
    img = cv2.imread(path_to_img)
    if img is None:
        raise FileNotFoundError(f"Could not read image from path: {path_to_img}")
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (img_size, img_size))
    img = img.astype(np.float32)
    img = np.expand_dims(img, axis=0)
    img = tf.keras.applications.vgg19.preprocess_input(img)
    return tf.convert_to_tensor(img)

def tensor_to_image(tensor):
    """Converts a tensor back into a displayable image."""
    tensor = tensor * 255
    tensor = np.array(tensor, dtype=np.uint8)
    if np.ndim(tensor) > 3:
        assert tensor.shape[0] == 1
        tensor = tensor[0]
    return tensor

## 2. STYLE TRANSFER MODEL ##

def gram_matrix(input_tensor):
    """Calculates the Gram matrix of a given tensor."""
    result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor)
    input_shape = tf.shape(input_tensor)
    num_locations = tf.cast(input_shape[1] * input_shape[2], tf.float32)
    return result / num_locations

class StyleContentModel(tf.keras.models.Model):
    """A custom model to extract style and content features."""
    def __init__(self, style_layers, content_layers):
        super(StyleContentModel, self).__init__()
        # Load the VGG19 model
        vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet')
        vgg.trainable = False
        
        # Get the outputs from the specified layers
        style_outputs = [vgg.get_layer(name).output for name in style_layers]
        content_outputs = [vgg.get_layer(name).output for name in content_layers]
        model_outputs = style_outputs + content_outputs
        
        # Build the new model
        self.vgg = tf.keras.Model(vgg.input, model_outputs)
        self.style_layers = style_layers
        self.content_layers = content_layers
        self.num_style_layers = len(style_layers)
        self.vgg.trainable = False

    def call(self, inputs):
        """Processes an image and returns a dict of style and content features."""
        # VGG19 expects pixel values in the range of 0-255
        inputs = inputs * 255.0
        
        # Get the outputs from our pre-built VGG model
        outputs = self.vgg(inputs)
        
        # Separate the style and content outputs
        style_outputs = outputs[:self.num_style_layers]
        content_outputs = outputs[self.num_style_layers:]
        
        # Calculate the Gram matrix for each style layer output
        style_outputs = [gram_matrix(style_output) for style_output in style_outputs]
        
        # Create a dictionary of the features
        content_dict = {name: value for name, value in zip(self.content_layers, content_outputs)}
        style_dict = {name: value for name, value in zip(self.style_layers, style_outputs)}
        
        return {'content': content_dict, 'style': style_dict}

    
## 3. MAIN ORCHESTRATION FUNCTION ##

def run_style_transfer(content_path, style_path, iterations=1000, content_weight=1e4, style_weight=1e-2):
    """

    Performs the neural style transfer.

    

    Returns:

        A tensor representing the final stylized image.

    """
    # Define the layers to use for style and content
    content_layers = ['block5_conv2'] 
    style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']

    # Build the feature extractor model
    extractor = StyleContentModel(style_layers, content_layers)
    
    # Load content and style images
    content_image = load_and_process_img(content_path)
    style_image = load_and_process_img(style_path)
    
    # Calculate the target style and content features
    style_targets = extractor(style_image)['style']
    content_targets = extractor(content_image)['content']
    
    # Initialize the image to be generated and the optimizer
    generated_image = tf.Variable(content_image)
    optimizer = tf.optimizers.Adam(learning_rate=0.02, beta_1=0.99, epsilon=1e-1)

    @tf.function()
    def train_step(image):
        with tf.GradientTape() as tape:
            outputs = extractor(image)
            
            # Calculate total loss
            content_loss = tf.add_n([tf.reduce_mean((content_targets[name] - outputs['content'][name])**2) for name in content_targets.keys()])
            style_loss = tf.add_n([tf.reduce_mean((style_targets[name] - outputs['style'][name])**2) for name in style_targets.keys()])
            total_loss = content_weight * content_loss + style_weight * style_loss
        
        # Apply gradients to update the generated image
        grad = tape.gradient(total_loss, image)
        optimizer.apply_gradients([(grad, image)])
        image.assign(tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0))

    # Run the optimization loop
    for i in range(iterations):
        train_step(generated_image)
        if (i + 1) % 100 == 0:
            print(f"Iteration {i+1}/{iterations}")

    # Post-process to remove VGG preprocessing and return a displayable tensor
    final_tensor = generated_image
    final_tensor = tf.reverse(final_tensor, axis=[-1]) # BGR to RGB
    final_tensor = tf.clip_by_value(final_tensor, 0.0, 255.0)
    
    return final_tensor

## 4. SCRIPT EXECUTION ##

if __name__ == '__main__':
    # Define paths to your images
    CONTENT_PATH = 'path/to/content_image.jpg'
    STYLE_PATH = 'path/to/style_image.jpg'
    
    # Run the style transfer process
    final_tensor = run_style_transfer(CONTENT_PATH, STYLE_PATH, iterations=1000)
    
    # Convert the final tensor to an image and display it
    final_image = tensor_to_image(final_tensor)
    
    plt.figure(figsize=(8, 8))
    plt.imshow(final_image)
    plt.axis('off')
    plt.title("Stylized Image")
    plt.show()

    # To save the image
    final_image_bgr = cv2.cvtColor(final_image, cv2.COLOR_RGB2BGR)
    cv2.imwrite('stylized_image.png', final_image_bgr)