tensorflow1.1/variational autoencoder人脸识别

来源:互联网 发布:22级研究所升级数据 编辑:程序博客网 时间:2024/06/06 18:48

olivettifaces是纽约大学的一个比较小的人脸库,由40个人的400张图片构成,即每个人的人脸图片为10张。每张图片的灰度级为8位,每个像素的灰度大小位于0-255之间,每张图片大小为64×64。图片大小是1190*942,一共有20*20张人脸,故每张人脸大小是(1190/20)*(942/20)即57*47=2679本文所用的训练数据就是这张图片,400个样本,40个类别。

#coding:utf-8"""tensorflow 1.1matplotlib 2.02python3"""import tensorflow as tfimport numpy as npimport matplotlib.pyplot as pltimport pickle#读取数据集with open('facedataset.pickle','rb') as f:    (train_data,train_labels),(test_data,test_labels) = pickle.load(f)#shuffle datanp.random.seed(100)train_data = np.random.permutation(train_data)np.random.seed(100)train_labels = np.random.permutation(train_labels)np.random.seed(200)train_data = np.random.permutation(test_data)np.random.seed(200)train_labels = np.random.permutation(test_labels)input_dim = 57*47hidden_encoder_dim1 = 512hidden_decoder_dim1 = 512hidden_encoder_dim2 = 128hidden_decoder_dim2 = 128latent_dim = 10epochs = 3000batch_size = 5N_pictures = 3def weight_variable(shape):    return tf.Variable(tf.truncated_normal(shape,stddev=0.001))def bias_variable(shape):    return tf.Variable(tf.truncated_normal(shape))x = tf.placeholder('float32',[None,input_dim])#定义l2 regularizationl2_losses = tf.constant(0.0)#encoder网络w_encoder1 = weight_variable([input_dim,hidden_encoder_dim1])b_encoder1 = bias_variable([hidden_encoder_dim1])encoder1 = tf.nn.relu(tf.matmul(x,w_encoder1)+b_encoder1)l2_losses += tf.nn.l2_loss(w_encoder1)w_encoder2 = weight_variable([hidden_encoder_dim1,hidden_encoder_dim2])b_encoder2 = bias_variable([hidden_encoder_dim2])encoder2 = tf.nn.relu(tf.matmul(encoder1,w_encoder2)+b_encoder2)l2_losses += tf.nn.l2_loss(w_encoder2)#定义一个mu网络mu_w_encoder2 = weight_variable([hidden_encoder_dim2,latent_dim])mu_b_encoder2 = bias_variable([latent_dim])mu_encoder2 = tf.matmul(encoder2,mu_w_encoder2)+mu_b_encoder2l2_losses += tf.nn.l2_loss(mu_w_encoder2)#定义一个var网络var_w_encoder2 = weight_variable([hidden_encoder_dim2,latent_dim])var_b_encoder = bias_variable([latent_dim])var_encoder2 = tf.matmul(encoder2,var_w_encoder2)+var_b_encoderl2_losses += tf.nn.l2_loss(var_w_encoder2)#抽样生成标准正态分布epsilon = tf.random_normal(tf.shape(var_encoder2))z_logvar_encoder2 = tf.sqrt(tf.exp(var_encoder2))z = mu_encoder2+tf.multiply(z_logvar_encoder2,epsilon)#定义decoder网络w_decoder1 = weight_variable([latent_dim,hidden_decoder_dim2])b_decoder2 = bias_variable([hidden_decoder_dim2])decoder1 = tf.nn.relu(tf.matmul(z,w_decoder1)+b_decoder2)l2_losses += tf.nn.l2_loss(w_decoder1)w_decoder2 = weight_variable([hidden_decoder_dim2,hidden_decoder_dim1])b_decoder2 = bias_variable([hidden_decoder_dim1])decoder2 = tf.nn.relu(tf.matmul(decoder1,w_decoder2)+b_decoder2)l2_losses += tf.nn.l2_loss(w_decoder2)w_decoder3 = weight_variable([hidden_decoder_dim1,input_dim])b_decoder3 = bias_variable([input_dim])decoder3 = tf.nn.sigmoid(tf.matmul(decoder2,w_decoder3)+b_decoder3)l2_losses += tf.nn.l2_loss(w_decoder3)#计算costlog_px_given_z = -tf.reduce_sum(x*tf.log(decoder3+1e-10)+(1-x)*tf.log(1-decoder3+1e-10),1)KLD = -0.5*tf.reduce_sum(1+var_encoder2-tf.pow(mu_encoder2,2)-tf.exp(var_encoder2),1)cost = tf.reduce_mean(log_px_given_z+KLD)regularized_cost = cost+l2_lossestrain = tf.train.AdamOptimizer(0.0001).minimize(cost)with tf.Session() as sess:    init = tf.global_variables_initializer()    sess.run(init)    fig,a = plt.subplots(2,N_pictures,figsize=(6,4))    plt.ion()    view_figures = test_data[:N_pictures]    for i in range(N_pictures):        a[0][i].imshow(np.reshape(view_figures[i],(57,47)))        a[0][i].set_xticks(())        a[0][i].set_yticks(())    for step in range(10000):        #batch_x = next_batch(train_data,batch_size)        i = 0        while i < train_data.shape[0]:            batch_x = train_data[i:i+batch_size]            i = i+batch_size            _,encodered,decodered,c = sess.run([train,z,decoder3,cost],feed_dict={x:batch_x})        if step % 100 ==0:            print('= = = = = = > > > > > >','train loss: %.4f' %c)            decoder_figures = sess.run(decoder3,feed_dict={x:view_figures})            for i in range(N_pictures):                a[1][i].clear()                a[1][i].imshow(np.reshape(decoder_figures[i],(57,47)))                a[1][i].set_axis_off()            plt.draw()            plt.pause(1)    plt.ioff()

结果

识别效果明显好于autoencoder,采用小批量,低learning_rate能学习的更好
这里写图片描述

基本人脸已经能分辨出来了
这里写图片描述

阅读全文
0 0