tensorflow 构建CNN识别自己的数据（类似于MNSIT）

来源：互联网发布：隔音窗知乎编辑：程序博客网时间：2024/05/16 14:46

转载请注明出处

在学习了通过构建CNN来识别MNIST之后，我们都大概理解了tensorflow的构建过程，那么怎么才用CNN来识别自己的数据呢？
想想都有意思，那就请认真的看下面的文章

说明：
1. 图片数据是通过代码自己生成的不用下载，下面先讲的就是怎么生成图片，生成的时候需要用到字体文件和cv2库我在下文中都给出了地址和安装方式。
2. 生成了图片之后，训练的时候是image是从文件夹读的，label是从文本读的，如果想改，完全可以按照规则改成自己的数据，不过记得再完成卷积之后的全连接层的时候一定要改按照图片的大小改输入。
3. 有什么问题欢迎留言。

解释基本都在代码中，基本都有注释，欢迎留言

**完整代码和image样本：如果把data下面的训练图片和测试图片都下载了的话，可以不同再生成图片，可直接训练，再研究如何调用图片即可
https://coding.net/u/centaur/p/Share/git/tree/master/custom_image_tutorials**

生成数据

先看一下生成的数据如下图

自己生成的数据.png
这里的数据类似于MNSIT的形式，只含有十个数字。

MNIST是手写的数字，这里为了模仿这个效果对图片做了一定的扭曲和旋转
下面做一个对比
mnist数据.png
自己构建的数据.png

因为文件中多个地方都用到了相对于代码存储位置，这里大家先看一下我的目录结构

目录结构.png

data下面的image分别是测试和训练的样本路径是"data/image/train/"和"data/image/train/"
下面是两个文本分别是训练和测试的label，文本中9#8#1#8#5#7#7#1#4#0#3#2#7#8#9#8#5#3#1#8#4#1#1#5#2#7#9#5#2#7#0#1#7......每两个标签之间通过#连接，图片是按照0,1,2…的顺序命名的，这样正好把image和label对应了起来，在训练的时候容易取。
DroidSansMono.ttf是生成图片中字的字体，可做更改，代码中也要记着更改。
get_text.py是生成两个label文本的代码，其中可指定生成训练和测试样本的数量
read_text_generate_image.py是通过读label文本然后生成对应的图片。再生成图片的时候一定要记着把字体放在data/目录下面下载地址
- train.py是训练的代码

生成测试样本和训练样本的label。

生成label的代码比较简单就不做赘述

import randomimport osnumber = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']path_dir = "data/"# 不存在直接创建if not os.path.exists(path_dir):    os.makedirs(path_dir)# 随机的返回一个字符，这里不只限于生成一个字符（后期扩展性）def random_number_text(char_set=number, code_size=1):    code_text = []    for each in range(code_size):        c = random.choice(char_set)        code_text.append(c)    return code_textdef write_labels(size, name):    code_list = []    for each in range(size):        number_list = random_number_text()        code = ''.join(number_list)  # 用引号中的东西去连接list的两个条目        code_list.append(code)    code_text = '#'.join(code_list)    print(code_text)    f = open(path_dir + name, 'w')    f.write(code_text)    f.close()def main():    # 可指定大小    trian_size = 5000    test_size = 1000    train_label_name = "code_train_text.txt"    test_label_name = "code_test_text.txt"    write_labels(trian_size, train_label_name)    write_labels(test_size, test_label_name)if __name__ == '__main__':    main()

根据样本生成image

再生成图片的时候一定要记着把字体放在data/目录下面下载地址

有人安装opencv 请用pip install opencv-python这个指令

如果你的电脑是Windows还可能报下面这个错误:

import cv2Traceback (most recent call last):  File "<stdin>", line 1, in <module>ImportError: DLL load failed: 找不到指定的模块。

不要惊慌，这是因为opencv需要依赖MSVCP140.dll 这个C++类库，而python 3.5 以上的版本不包括这个类库，你的系统正好是windows 家庭版啥的，也没有这个类库，因此去下载

不要惊慌解决方案
生成图片需要用到cv2 这个库记着安装pip install opencv-python
read_text_generate_image.py

import osimport randomfrom PIL import Image, ImageDraw, ImageFontimport numpy as npimport cv2# 超参数 需要的文件先创建image_path_train = "data/image/train/"if not os.path.exists(image_path_train):    os.makedirs(image_path_train)image_path_test = "data/image/test/"if not os.path.exists(image_path_test):    os.makedirs(image_path_test)# 指定字体DEFAULT_FONTS = "data/DroidSansMono.ttf"# 生成图片的大小WIDHT = 28HEIGHT = 28# 把label中的内容返回list调用def get_content_from_file(label_name):    content = open("data/" + label_name, "r", encoding="utf-8")    code_text = content.read()    return code_text.split("#")# 用opencv 转为灰度 这里需要用到cv2def convert2gray(img):    if len(img.shape) > 2:        gray = np.mean(img, -1)        # 上面的转法较快，正规转法如下        # r, g, b = img[:,:,0], img[:,:,1], img[:,:,2]        # gray = 0.2989 * r + 0.5870 * g + 0.1140 * b        return gray    else:        return img# 再目录dir_path下生成 名字为i.jpg 内容为c的图片def generate_image(i, c, dir_path):    path = dir_path + str(i) + ".jpg"    print(path)    color = (0, 0, 0) # 字体颜色    background = (255, 255, 255) # 背景颜色    print(str(i) + "要存的字符是" + c)    #  创建函数     image = create_image_one_char(c, color, background)     image = convert2gray(np.array(image)) # 转为灰度    cv2.imwrite(path, image)# 用cv2存起来# 更加内容c 生成扭曲和旋转的imagedef create_image_one_char(c, color, background):    # 自定义字体    font = ImageFont.truetype(DEFAULT_FONTS, 30)     im = Image.new('RGBA', (WIDHT, HEIGHT), background)    drawAvatar = ImageDraw.Draw(im)    w, h = im.size    drawAvatar.text((4, -3), c, fill=color, font=font) # 在图片上写下内容    del drawAvatar # 释放对象    # 旋转 整个图片旋转    im = im.crop(im.getbbox())    im = im.rotate(random.uniform(-30, 30), Image.BILINEAR, expand=1)    # 扭曲    # 随机生成 几个坐标 为了得到相对扭曲的四个角的坐标    dx = w * random.uniform(0.1, 0.4)    dy = h * random.uniform(0.2, 0.5)    x1 = int(random.uniform(-dx, dx))    y1 = int(random.uniform(-dy, dy))    x2 = int(random.uniform(-dx, dx))    y2 = int(random.uniform(-dy, dy))    w2 = w + abs(x1) + abs(x2)    h2 = h + abs(y1) + abs(y2)    data = (        x1, y1,        -x1, h2 - y2,        w2 + x2, h2 + y2,        w2 - x2, -y1,    )    im = im.resize((w2, h2))    # 变量data是一个8元组(x0,y0,x1,y1,x2,y2,x3,y3)，它包括源四边形的左    # 上，左下，右下和右上四个角。 通过四个角去拉扯一张图片    im = im.transform((WIDHT, HEIGHT), Image.QUAD, data)    image = Image.new('RGB', (WIDHT, HEIGHT), background)    # 把旋转乱了的图片贴在一个正规的图片上    image.paste(im, (0, 0), im)    return image# 超参数train_label_name = "code_train_text.txt"test_label_name = "code_test_text.txt"# 根据label名和文件夹位置生成图片def write_image(label_name, dir_path):    code_list = get_content_from_file(label_name)    for each in range(len(code_list)):        generate_image(each, code_list[each], dir_path)def main():    # 分别处理 训练样本和测试样本    write_image(train_label_name, image_path_train)    write_image(test_label_name, image_path_test)if __name__ == '__main__':    main()

CNN训练

转载请注明出处http://www.jianshu.com/p/75a8eb564de7

网络模型

网络模型是一个两层的卷积和池化加上两个全连接层

def code_cnn():    # 第一个卷积层    W_conv1 = weigth_variable([5, 5, 1, 32])    b_conv1 = weigth_variable([32])    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)  # 28*28*32    h_pool1 = max_pool_2x2(h_conv1)  # 14*14*32    # 第二个卷积层    W_conv2 = weigth_variable([5, 5, 32, 64])    b_conv2 = weigth_variable([64])    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)  # 14*14*64    h_pool2 = max_pool_2x2(h_conv2)  # 7*7*64    h_pool2 = tf.nn.dropout(h_pool2, keep_prob)    # 三层全连接层    W_fc1 = weigth_variable([7 * 7 * 64, 1024])    b_fc1 = bias_varibale([1024])    # [n_samples, 7, 7, 64] ->> [n_samples, 7*7*64]    h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)  # 防止过度拟合    # 第四层全连接层    W_fc2 = weigth_variable([1024, 10])    b_fc2 = bias_varibale([10])    prediction = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)    return prediction

取数据训练

既然把数据存成了图片那么就需要把图片读出来转成tensor

1. 把image和label从文件总读出来，都成相应的list方便操作。

# 根据路径得到文本的内容def getStrContent(path):    return open(path, 'r', encoding="utf-8").read()# 返回 训练样本路径的list 和 对应的标签用来以后训练def get_image_path_labels(IMAGE_PATH=IMAGE_PATH, LABEL_PATH=LABEL_PATH, IMAGE_MUMBER=IMAGE_MUMBER):    image_path = IMAGE_PATH    label_path = LABEL_PATH    image_paths = []    for each in range(IMAGE_MUMBER):        image_paths.append(image_path + str(each) + ".jpg")    string = getStrContent(label_path)    labels = string.split("#")    return image_paths, labelsdef main():    # 得到训练样本路径list和标签的list    image_paths, labels = get_image_path_labels()    train_code_cnn(image_paths, labels)

2. 数据被放在了对应的list中那么就可对应的取数据，训练数据

我们训练样本生成了5000个测试样本生成了1000个
下面代码中的batch设置为了100 每次把100张图片存成一个tensor.
所以我们需要对5000个数据进行迭代读取。
生成一个batch

# 生成一个训练batch 把batch个image和lebel拼成两个tensor# 这里的each是一个迭代器 因为有5000个图片每次100个，所以是循环传入的0到49def get_next_batch(batch_size, each, images, labels):    # image的tensor    batch_x = np.zeros([batch_size, IMAGE_HEIGHT * IMAGE_WIDTH])    # label的tensor    batch_y = np.zeros([batch_size, 10])    def get_text_and_image(i, each):        image_num = each * batch_size + i        label = labels[image_num]        image_path = images[image_num]        captcha_image = Image.open(image_path) #按照路径打开图片        captcha_image = np.array(captcha_image)        return label, captcha_image    # 按照 batch_size迭代    for i in range(batch_size):         text, image = get_text_and_image(i, each)        image = convert2gray(image)#转为灰度        batch_x[i, :] = image.flatten() / 255  # (image.flatten()-128)/128  mean为0        batch_y[i, :] = text2vec(text)    return batch_x, batch_y

训练数据
在训练的时候会进行测试，分别用测试数据集和训练数据集合进行测试
一般来说训练数据要比测试数据好很多，不过我训练了一会发现两个差不多

      for epoch in range(EPOCH):        # 每个epoch        for each in range(int(IMAGE_MUMBER / BATCH_SIZE)):            batch_x, batch_y = get_next_batch(BATCH_SIZE, each, image_paths, labels)            _, loss_ = sess.run([train_step, cross_entropy]                                , feed_dict={xs: batch_x, ys: batch_y, keep_prob: 0.5})            print("epoch: %d  iter: %d/%d   loss: %f"                   % (epoch + 1, BATCH_SIZE * each, IMAGE_MUMBER, loss_))        # 测试样本计算准确率        # 这里还是按照训练的时候的方法，把image好label分别都成list        # 然后统一转为tensor再通过测试函数进行测试。        test_iamge_path = "data/image/test/"        test_labels_path = "data/code_test_text.txt"        test_image_paths, test_labels = \            get_image_path_labels(test_iamge_path, test_labels_path, 200)        batch_x_test, batch_y_test = \            get_random_batch(BATCH_SIZE, test_image_paths, test_labels,200)        accuracy_test = compute_accuracy(batch_x_test, batch_y_test, sess)        print("测试样本测试 epoch: %d  acc: %f" % (epoch + 1, accuracy_test))        # 训练样本计算准确率        batch_x_test, batch_y_test = get_random_batch(BATCH_SIZE, image_paths, labels)        accuracy = compute_accuracy(batch_x_test, batch_y_test, sess)        print("训练样本测试 epoch: %d  acc: %f" % (epoch + 1, accuracy))

因为图片比较小，训练起来很快再28个epoch的时候训练数据能达到100%，训练数据能达到99%

epoch: 28  iter: 4300/5000   loss: 0.070238epoch: 28  iter: 4400/5000   loss: 0.039228epoch: 28  iter: 4500/5000   loss: 0.039181epoch: 28  iter: 4600/5000   loss: 0.048799epoch: 28  iter: 4700/5000   loss: 0.165373epoch: 28  iter: 4800/5000   loss: 0.040288epoch: 28  iter: 4900/5000   loss: 0.061203测试样本 epoch: 28  acc: 0.990000训练样本 epoch: 28  acc: 1.000000

转载请注明出处http://www.jianshu.com/p/75a8eb564de7
到这里算是介绍完了，我把完整的代码贴在下面

import tensorflow as tfimport numpy as npfrom PIL import Imageimport randomIMAGE_MUMBER = 5000EPOCH = 200BATCH_SIZE = 100IMAGE_PATH = "data/image/train/"LABEL_PATH = "data/code_train_text.txt"# 计算weightdef weigth_variable(shape):    # stddev : 正态分布的标准差    initial = tf.truncated_normal(shape, stddev=0.1)  # 截断正态分布    return tf.Variable(initial)# 计算biasesdef bias_varibale(shape):    initial = tf.constant(0.1, shape=shape)    return tf.Variable(initial)# 计算卷积def conv2d(x, W):    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')# 定义池化def max_pool_2x2(x):    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')IMAGE_HEIGHT = 28IMAGE_WIDTH = 28CHAR_SET_LEN = 10xs = tf.placeholder(tf.float32, [None, IMAGE_HEIGHT * IMAGE_WIDTH])ys = tf.placeholder(tf.float32, [None, 10])keep_prob = tf.placeholder(tf.float32)  # 防止过拟合x_image = tf.reshape(xs, [-1, IMAGE_HEIGHT, IMAGE_WIDTH, 1])# 训练网络def code_cnn():    # 第一个卷积层    W_conv1 = weigth_variable([5, 5, 1, 32])    b_conv1 = weigth_variable([32])    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)  # 28*28*32    h_pool1 = max_pool_2x2(h_conv1)  # 14*14*32    # 第二个卷积层    W_conv2 = weigth_variable([5, 5, 32, 64])    b_conv2 = weigth_variable([64])    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)  # 14*14*64    h_pool2 = max_pool_2x2(h_conv2)  # 7*7*64    h_pool2 = tf.nn.dropout(h_pool2, keep_prob)    # 三层全连接层    W_fc1 = weigth_variable([7 * 7 * 64, 1024])    b_fc1 = bias_varibale([1024])    # [n_samples, 7, 7, 64] ->> [n_samples, 7*7*64]    h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)  # 防止过度拟合    # 第四层全连接层    W_fc2 = weigth_variable([1024, 10])    b_fc2 = bias_varibale([10])    prediction = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)    return predictiondef convert2gray(img):    if len(img.shape) > 2:        gray = np.mean(img, -1)        # 上面的转法较快，正规转法如下        # r, g, b = img[:,:,0], img[:,:,1], img[:,:,2]        # gray = 0.2989 * r + 0.5870 * g + 0.1140 * b        return gray    else:        return img# 文本转向量def text2vec(text):    text_len = len(text)    vector = np.zeros(1 * CHAR_SET_LEN)    def char2pos(c):        if c == '_':            k = 62            return k        k = ord(c) - 48        if k > 9:            k = ord(c) - 55            if k > 35:                k = ord(c) - 61                if k > 61:                    raise ValueError('No Map')        return k    for i, c in enumerate(text):        idx = i * CHAR_SET_LEN + char2pos(c)        vector[idx] = 1    return vector# 向量转回文本def vec2text(vec):    char_pos = vec.nonzero()[0]    text = []    for i, c in enumerate(char_pos):        char_at_pos = i  # c/63        char_idx = c % CHAR_SET_LEN        if char_idx < 10:            char_code = char_idx + ord('0')        elif char_idx < 36:            char_code = char_idx - 10 + ord('A')        elif char_idx < 62:            char_code = char_idx - 36 + ord('a')        elif char_idx == 62:            char_code = ord('_')        else:            raise ValueError('error')        text.append(chr(char_code))    return "".join(text)# 生成一个训练batchdef get_next_batch(batch_size, each, images, labels):    batch_x = np.zeros([batch_size, IMAGE_HEIGHT * IMAGE_WIDTH])    batch_y = np.zeros([batch_size, 10])    def get_text_and_image(i, each):        image_num = each * batch_size + i        label = labels[image_num]        image_path = images[image_num]        captcha_image = Image.open(image_path)        captcha_image = np.array(captcha_image)        return label, captcha_image    for i in range(batch_size):        text, image = get_text_and_image(i, each)        image = convert2gray(image)        batch_x[i, :] = image.flatten() / 255  # (image.flatten()-128)/128  mean为0        batch_y[i, :] = text2vec(text)    return batch_x, batch_y# 随机生成一个训练batchdef get_random_batch(batch_size, images, labels,IMAGE_MUMBER = IMAGE_MUMBER):    batch_x = np.zeros([batch_size, IMAGE_HEIGHT * IMAGE_WIDTH])    batch_y = np.zeros([batch_size, 1 * CHAR_SET_LEN])    def get_captcha_text_and_image(i):        image_num = i        label = labels[image_num]        image_path = images[image_num]        captcha_image = Image.open(image_path)        captcha_image = np.array(captcha_image)        return label, captcha_image    for i in range(batch_size):        text, image = get_captcha_text_and_image(random.randint(0, IMAGE_MUMBER - 1))        image = convert2gray(image)        batch_x[i, :] = image.flatten() / 255  # (image.flatten()-128)/128  mean为0        batch_y[i, :] = text2vec(text)    return batch_x, batch_y# 计算准确率def compute_accuracy(v_xs, v_ys, sess):  # 传入测试样本和对应的label    global prediction    y_pre = sess.run(prediction, feed_dict={xs: v_xs, keep_prob: 1})    correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(v_ys, 1))    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))    result = sess.run(accuracy, feed_dict={xs: v_xs, ys: v_ys, keep_prob: 1})    return resultprediction = code_cnn()def train_code_cnn(image_paths, labels):    # 定义网络    global prediction    # 计算loss cross_entropy    cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction), reduction_indices=[1]))    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)    sess = tf.Session()    # 初始化variable    init = tf.global_variables_initializer()    sess.run(init)    for epoch in range(EPOCH):        # 每个epoch        for each in range(int(IMAGE_MUMBER / BATCH_SIZE)):            batch_x, batch_y = get_next_batch(BATCH_SIZE, each, image_paths, labels)            _, loss_ = sess.run([train_step, cross_entropy]                                , feed_dict={xs: batch_x, ys: batch_y, keep_prob: 0.5})            print("epoch: %d  iter: %d/%d   loss: %f"                  % (epoch + 1, BATCH_SIZE * each, IMAGE_MUMBER, loss_))        # 训练样本测试准确率        test_iamge_path = "data/image/test/"        test_labels_path = "data/code_test_text.txt"        test_image_paths, test_labels = \            get_image_path_labels(test_iamge_path, test_labels_path, 200)        batch_x_test, batch_y_test = \            get_random_batch(BATCH_SIZE, test_image_paths, test_labels,200)        accuracy_test = compute_accuracy(batch_x_test, batch_y_test, sess)        print("测试样本测试 epoch: %d  acc: %f" % (epoch + 1, accuracy_test))        batch_x_test, batch_y_test = get_random_batch(BATCH_SIZE, image_paths, labels)        accuracy = compute_accuracy(batch_x_test, batch_y_test, sess)        print("训练样本测试 epoch: %d  acc: %f" % (epoch + 1, accuracy))# 根据路径得到文本的内容def getStrContent(path):    return open(path, 'r', encoding="utf-8").read()# 返回 训练样本路径的list 和 对应的标签用来以后训练def get_image_path_labels(IMAGE_PATH=IMAGE_PATH, LABEL_PATH=LABEL_PATH, IMAGE_MUMBER=IMAGE_MUMBER):    image_path = IMAGE_PATH    label_path = LABEL_PATH    image_paths = []    for each in range(IMAGE_MUMBER):        image_paths.append(image_path + str(each) + ".jpg")    string = getStrContent(label_path)    labels = string.split("#")    return image_paths, labelsdef main():    # 得到训练样本路径list和标签的list    image_paths, labels = get_image_path_labels()    train_code_cnn(image_paths, labels)if __name__ == '__main__':    main()

阅读全文

1 0