【CNTK】CNTK学习笔记之制作自己的数据集（以MNIST手写数字数据集为例）

来源：互联网发布：下载游戏的软件编辑：程序博客网时间：2024/06/13 21:33

CNTK所使用的数据集格式是这种的：

这里写图片描述

所以我们自己的数据集要做的和这个一样。
MNIST手写数字图片库下载：http://download.csdn.net/detail/bless2015/9610008
用OpenCV+vs2013实现的：

#include <cv.h>#include <highgui.h>#include<io.h>#include <string.h> #include <fstream>   #include <iostream>using   namespace   std;using   namespace   cv;char * filePath = "D:\\mnist\\trainimage\\0";//这个方法就是迭代读取文件夹里的图片的。void getFiles(string path, vector<string>& files){    //文件句柄      long   hFile = 0;    //文件信息      struct _finddata_t fileinfo;    string p;    if ((hFile = _findfirst(p.assign(path).append("\\*").c_str(), &fileinfo)) != -1)    {        do        {            //如果是目录,迭代之              //如果不是,加入列表              if ((fileinfo.attrib &  _A_SUBDIR))            {                if (strcmp(fileinfo.name, ".") != 0 && strcmp(fileinfo.name, "..") != 0)                    getFiles(p.assign(path).append("\\").append(fileinfo.name), files);            }            else            {                files.push_back(p.assign(path).append("\\").append(fileinfo.name));            }        } while (_findnext(hFile, &fileinfo) == 0);        _findclose(hFile);    }}int main(){    vector<string> files;    getFiles(filePath, files);    char str[30];    int size = files.size();    stringstream ss;    IplImage *image, *imageresize = 0;    ofstream myfile("D:/mnist/trainimage/trainimage.txt", ios::app);    string s;    for (int i = 0; i < size; i++)    {        image = 0;        // 参数0的意思是强制转化读取图像为灰度图        image = cvLoadImage(files[i].c_str(), 0);        for (int m = 0; m < image->height; m++){            for (int j = 0; j < image->width; j++){            //获取灰度图的像素                CvScalar dPixelVal = cvGet2D(image, m, j);                char temp[20];                sprintf(temp,"%d", (int)dPixelVal.val[0]);                s.append(temp);                s.append(" ");            }        }        //按照CNTK要求的格式输出        myfile << "|labels " << "1 0 0 0 0 0 0 0 0 0 " << "|features " << s << endl;        cout << "处理完毕" << files[i].c_str() << endl;        cvReleaseImage(&image);        cvReleaseImage(&imageresize);        s = "";    }    system("pause");    cvWaitKey(0);}

其中myfile是要写入的txt文件。

filePath是图片所在文件夹。这段代码写的还不太智能，没有根据文件夹来自动修改类别。所以类别自己手动改的。其实这个问题也好解决，可以先批量修改图片的文件名，把其所属类别加进去，到时候从文件名判断，要输出的lables类别。打算lables在这里优化一下。

myfile << "|labels " << "1 0 0 0 0 0 0 0 0 0 " << "|features " << s << endl;

写完以后效果和第一张图一样，就不在展示了。CNTK里自带mnist手写数字的python版下载文件:

/cntk/Examples/Image/MNIST/AdditionalFiles/mnist_convert.py

下载下来是这两个文件：
这里写图片描述

用这两个训练和测试和用自己生成的train.txt和test.txt
这里写图片描述
errors一样，都为0.91%，说明自己制作的数据集格式正确。

这里写图片描述

0 0