basic_ML_models
basic_color_autoencoder
명징직조지훈
2022. 10. 23. 12:51
충분한 양의 흑백 사진을 입력으로 하고 그에 대응하는 채색된 사진을 출력으로 해서 오토인코더를 훈련시킬 수 있다면 적절하게 색을 입히는 숨겨진 구조를 발견할 수 있을 것이다.
충분한 양의 흑백 사진을 입력으로 하고 그에 대응하는 채색된 사진을 출력으로 해서 오토인코더를 훈련시킬 수 있다면 적절하게 색을 입히는 구조를 발견할 수 있을 것이다. 잡음을 제거하는 것이 아닌 잡음을 흑백 이미지게 추가하는 것
from keras.layers import Dense, Input
from keras.layers import Conv2D, Flatten
from keras.layers import Reshape, Conv2DTranspose
from keras.models import Model
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from keras.datasets import cifar10
from keras import backend as K
from keras.utils import plot_model
import numpy as np
import matplotlib.pyplot as plt
import os
컬러 이미지의 흑백 변환
grayscale = 0.299*red + 0.587*green + 0.114*blue
def rgb2gray(rgb):
return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])
(x_train, _), (x_test, _) = cifar10.load_data()
img_rows = x_train.shape[1]
img_cols = x_train.shape[2]
channels = x_train.shape[3]
imgs_dir = 'saved_images'
save_dir = os.path.join(os.getcwd(), imgs_dir)
if not os.path.isdir(save_dir):
os.makedirs(save_dir)
imgs = x_test[:100]
imgs = imgs.reshape((10, 10, img_rows, img_cols, channels))
img = np.vstack([np.hstack(i) for i in imgs])
plt.figure()
plt.axis('off')
plt.title('Test color images (Ground Truth')
plt.imshow(imgs, interpolarion='none', cmap='gray')
plt.savefig('%s/test_color.pnt'%imgs_dir)
plt.show()
x_train_gray = rgb2gray(x_train)
x_test_gray = rgb2gray(x_test)
imgs = x_test_gray[:100]
imgs = imgs.reshape((10, 10, img_rows, img_cols))
imgs = np.vstack([np.hstack(i) for i in imgs])
plt.figure()
plt.axis('off')
plt.title('Test gray images (Input)')
plt.imshow(imgs, interpolarion='none', cmap='gray')
plt.savefig('$s/test_gray.pnt'%imgs_dir)
plt.show()
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255
x_train_gray = x_train_gray.astype('float32') / 255
x_test_gray = x_test_gray.astype('float32') / 255
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, channels)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, channels)
x_train_gray = x_train_gray.reshape(x_train_gray.shape[0], img_rows, img_cols, 1)
x_test_gray = x_test_gray.reshape(x_test_gray.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
batch_size = 32
kernel_size = 3
latent_dim = 256
layer_filters = [64, 128, 256]
inputs = Input(shape=input_shape, name='encoder_input')
x = inputs
for filters in layer_filters:
x = Conv2D(filters = filters, kernel_size=kernel_size, strides=2, activation='relu', padding='same')(x)
shape = K.int_shape(x)
x = Flatten()(x)
latent = Dense(latent_dim, name='latent_vector')(x)
encoder = Model(inputs, latent, name='encoder')
encoder.summary()
latent_inputs = Input(shape=(latent_dim,), name='decoder_input')
x = Dense(shape[1]*shape[2]*shape[3])(latent_inputs)
x = Reshape((shape[1], shape[2], shape[3]))(x)
for filters in layer_filters[::-1]:
x = Conv2DTranspose(filters=filters, kernel_size=kernel_size, strides=2, activation='relu', padding='same')(x)
output = Conv2DTranspose(filters=channels, kernel_size=kernel_size, activation='sigmoid', padding='same', name='decoder_output')(x)
decoder = Model(latent_inputs, output, name='decoder')
decoder.summary()
autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder')
autoencoder.summary()
save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'colorized_ae_model.{epoch:03d}.h5'
if not os.path.isdir(save_dir):
os.makedirs(save_dir)
filepath = os.path.join(save_dir, model_name)
lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, verbose=1, min_lr=0.5e-6)
checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_loss', verbose=1, save_best_only=True)
autoencoder.compile(loss='mse', optimizer='adam')
callbacks = [lr_reducer, checkpoint]
autoencoder.fit(x_train_gray, x_train, validation_data=(x_test_gray, x_test), epochs=30, batch_size=batch_size, callbacks=callbacks)
x_decoded = autoencoder.predict(x_test_gray)
Model: "encoder"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
encoder_input (InputLayer) [(None, 32, 32, 1)] 0
conv2d (Conv2D) (None, 16, 16, 64) 640
conv2d_1 (Conv2D) (None, 8, 8, 128) 73856
conv2d_2 (Conv2D) (None, 4, 4, 256) 295168
flatten (Flatten) (None, 4096) 0
latent_vector (Dense) (None, 256) 1048832
=================================================================
Total params: 1,418,496
Trainable params: 1,418,496
Non-trainable params: 0
_________________________________________________________________
Model: "decoder"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
decoder_input (InputLayer) [(None, 256)] 0
dense (Dense) (None, 4096) 1052672
reshape (Reshape) (None, 4, 4, 256) 0
conv2d_transpose (Conv2DTra (None, 8, 8, 256) 590080
nspose)
conv2d_transpose_1 (Conv2DT (None, 16, 16, 128) 295040
ranspose)
conv2d_transpose_2 (Conv2DT (None, 32, 32, 64) 73792
ranspose)
decoder_output (Conv2DTrans (None, 32, 32, 3) 1731
pose)
=================================================================
Total params: 2,013,315
Trainable params: 2,013,315
Non-trainable params: 0
_________________________________________________________________
Model: "autoencoder"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
encoder_input (InputLayer) [(None, 32, 32, 1)] 0
encoder (Functional) (None, 256) 1418496
decoder (Functional) (None, 32, 32, 3) 2013315
=================================================================
Total params: 3,431,811
Trainable params: 3,431,811
Non-trainable params: 0
_________________________________________________________________
오토인코더에 합성곱과 전치 합성곱 블록을 추가해 용량을 늘렸다.
마지막으로 출력 필터 크기는 3으로 증가, 이는 채색된 출력의 RGB 채널과 동일