Opencv + C++转换mnist数据集文件为图像

来源：互联网发布：支付宝找回淘宝密码编辑：程序博客网时间：2024/05/21 09:42

mnist数据集是深度学习入门的一个很经典的手写体数字的数据集，我们跑的第一个深度学习网络往往就是mnist数据集，但是它的数据格式比较特殊，具体的格式及介绍见官网http://yann.lecun.com/exdb/mnist/

今天就写个程序，把mnist数据集中的数据转换为图像，这样我们就可以利用这些数据把图像转换为像caffe、Tensorflow等要求的格式作为练习，以后对自己的学习任务的样本也可以这样做。

废话不多说了，直接上代码，代码主要参考的是caffe中的convert_mnist_data.cpp文件中的转换方法。

#include <opencv2/imgproc/imgproc.hpp>#include <opencv2/highgui/highgui.hpp>#include <opencv2/core/core.hpp>#include <vector>#include <iostream>#include <fstream>#include <string>using namespace std;using namespace cv;//把大端数据转换为我们常用的小端数据uint32_t swap_endian(uint32_t val){    val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF);    return (val << 16) | (val >> 16);}void readAndSave(const string& mnist_img_path, const string& mnist_label_path){    //以二进制格式读取mnist数据库中的图像文件和标签文件    ifstream mnist_image(mnist_img_path, ios::in | ios::binary);    ifstream mnist_label(mnist_label_path, ios::in | ios::binary);    if(mnist_image.is_open() == false)    {        cout<<"open mnist image file error!"<<endl;        return ;    }    if(mnist_label.is_open() == false)    {        cout<<"open mnist label file error!"<<endl;        return ;    }        uint32_t magic;//文件中的魔术数(magic number)    uint32_t num_items;//mnist图像集文件中的图像数目    uint32_t num_label;//mnist标签集文件中的标签数目    uint32_t rows;//图像的行数    uint32_t cols;//图像的列数    //读魔术数    mnist_image.read(reinterpret_cast<char*>(&magic), 4);    magic = swap_endian(magic);    if(magic != 2051)    {        cout<<"this is not the mnist image file"<<endl;        return ;    }    mnist_label.read(reinterpret_cast<char*>(&magic), 4);    magic = swap_endian(magic);    if(magic != 2049)    {        cout<<"this is not the mnist label file"<<endl;        return ;    }    //读图像/标签数    mnist_image.read(reinterpret_cast<char*>(&num_items), 4);    num_items = swap_endian(num_items);    mnist_label.read(reinterpret_cast<char*>(&num_label), 4);    num_label = swap_endian(num_label);    //判断两种标签数是否相等    if(num_items != num_label)    {        cout<<"the image file and label file are not a pair"<<endl;    }    //读图像行数、列数    mnist_image.read(reinterpret_cast<char*>(&rows), 4);    rows = swap_endian(rows);    mnist_image.read(reinterpret_cast<char*>(&cols), 4);    cols = swap_endian(cols);    //读取图像    for(int i = 0; i != num_items; i++)    {        char* pixels = new char[rows * cols];        mnist_image.read(pixels, rows * cols);        char label;        mnist_label.read(&label, 1);        Mat image(rows,cols,CV_8UC1);        for(int m = 0; m != rows; m++)        {            uchar* ptr = image.ptr<uchar>(m);            for(int n = 0; n != cols; n++)            {                if(pixels[m * cols + n] == 0)                    ptr[n] = 0;                else                    ptr[n] = 255;            }        }        string saveFile = "./mnist_train/" + to_string((unsigned int)label) + "_" + to_string(i) + ".jpg";        imwrite(saveFile, image);    }}int main(){    //readAndSave("./t10k-images.idx3-ubyte", "./t10k-labels.idx1-ubyte");    readAndSave("train-images.idx3-ubyte", "train-labels.idx1-ubyte");    return 0;}

前面提到的大端模式是指数据的高字节保存在内存的低地址中，而数据的低字节保存在内存的高地址中，这样的存储模式有点儿类似于把数据当作字符串顺序处理：地址由小向大增加，而数据从高位往低位放；

小端模式是我们平时常用的模式，比如十进制的1用二进制小端模式存放则为0000 0001。

结果如下图所示：

0 0