caffe convert_mnist_data.cpp 代码注释
来源:互联网 发布:淘宝怎么搜军刺 编辑:程序博客网 时间:2024/05/24 13:27
convert_mnist_data.cpp 代码注释
将mnist数据集转换为lmdb(默认)获leveldb格式,便于caffe载入数据。
TIPS:Caffe为什么采用LMDB、LEVELDB,而不是直接读取原始数据?
答:一方面,数据类型多种多样(有二进制文件、文本文件、编码后的图像文件如JPEG或PNG、网络爬取的数据等),不可能用一套代码实现所有类型的输入数据读取,转换为统一格式可以简化数据读取层的视线;另一方面,使用LMDB、LEVELDB可以提高磁盘IO利用率。
// This script converts the MNIST dataset to a lmdb (default) or// leveldb (--backend=leveldb) format used by caffe to load data.// Usage:// convert_mnist_data [FLAGS] input_image_file input_label_file// output_db_file// The MNIST dataset could be downloaded at// http://yann.lecun.com/exdb/mnist/#include <gflags/gflags.h>#include <glog/logging.h>#include <google/protobuf/text_format.h>#if defined(USE_LEVELDB) && defined(USE_LMDB)#include <leveldb/db.h>#include <leveldb/write_batch.h>#include <lmdb.h>#endif#include <stdint.h>#include <sys/stat.h>#include <fstream> // NOLINT(readability/streams)#include <string>#include "boost/scoped_ptr.hpp"#include "caffe/proto/caffe.pb.h"#include "caffe/util/db.hpp"#include "caffe/util/format.hpp"#if defined(USE_LEVELDB) && defined(USE_LMDB)using namespace caffe; // NOLINT(build/namespaces)using boost::scoped_ptr;using std::string;//GFLAGS工具定义命令后选项backend,默认值为lmdb,即--backend=lmdbDEFINE_string(backend, "lmdb", "The backend for storing the result");//大小端转换。Mnist原始数据文件中32位整形值为大端存储,C/C++变量为小端存储,因此需要加入转换机制uint32_t swap_endian(uint32_t val) { val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF); return (val << 16) | (val >> 16);}void convert_dataset(const char* image_filename, const char* label_filename, const char* db_path, const string& db_backend) { // Open files std::ifstream image_file(image_filename, std::ios::in | std::ios::binary); std::ifstream label_file(label_filename, std::ios::in | std::ios::binary); CHECK(image_file) << "Unable to open file " << image_filename; CHECK(label_file) << "Unable to open file " << label_filename; // Read the magic and the meta data uint32_t magic; //魔数 2051-数据,2049-标记 uint32_t num_items; uint32_t num_labels; uint32_t rows; uint32_t cols; //获取魔数,样本图像宽高标记,进行魔数验证 image_file.read(reinterpret_cast<char*>(&magic), 4); magic = swap_endian(magic); CHECK_EQ(magic, 2051) << "Incorrect image file magic."; label_file.read(reinterpret_cast<char*>(&magic), 4); magic = swap_endian(magic); CHECK_EQ(magic, 2049) << "Incorrect label file magic."; image_file.read(reinterpret_cast<char*>(&num_items), 4); num_items = swap_endian(num_items); label_file.read(reinterpret_cast<char*>(&num_labels), 4); num_labels = swap_endian(num_labels); CHECK_EQ(num_items, num_labels); image_file.read(reinterpret_cast<char*>(&rows), 4); rows = swap_endian(rows); image_file.read(reinterpret_cast<char*>(&cols), 4); cols = swap_endian(cols); scoped_ptr<db::DB> db(db::GetDB(db_backend)); db->Open(db_path, db::NEW); scoped_ptr<db::Transaction> txn(db->NewTransaction()); // Storing to db char label; char* pixels = new char[rows * cols]; int count = 0; string value; Datum datum; datum.set_channels(1); datum.set_height(rows); datum.set_width(cols); LOG(INFO) << "A total of " << num_items << " items."; LOG(INFO) << "Rows: " << rows << " Cols: " << cols; for (int item_id = 0; item_id < num_items; ++item_id) { //读取样本数据、标记 image_file.read(pixels, rows * cols); label_file.read(&label, 1); datum.set_data(pixels, rows*cols); datum.set_label(label); string key_str = caffe::format_int(item_id, 8); //样本序列化 datum.SerializeToString(&value); txn->Put(key_str, value); //批量提交更改 1000个样本提交一次 if (++count % 1000 == 0) { txn->Commit(); } } // write the last batch 提交剩余的样本 if (count % 1000 != 0) { txn->Commit(); } LOG(INFO) << "Processed " << count << " files."; delete[] pixels; db->Close();}int main(int argc, char** argv) {#ifndef GFLAGS_GFLAGS_H_ namespace gflags = google;#endif FLAGS_alsologtostderr = 1; gflags::SetUsageMessage("This script converts the MNIST dataset to\n" "the lmdb/leveldb format used by Caffe to load data.\n" "Usage:\n" " convert_mnist_data [FLAGS] input_image_file input_label_file " "output_db_file\n" "The MNIST dataset could be downloaded at\n" " http://yann.lecun.com/exdb/mnist/\n" "You should gunzip them after downloading," "or directly use data/mnist/get_mnist.sh\n"); gflags::ParseCommandLineFlags(&argc, &argv, true); const string& db_backend = FLAGS_backend; if (argc != 4) { gflags::ShowUsageWithFlagsRestrict(argv[0], "examples/mnist/convert_mnist_data"); } else { google::InitGoogleLogging(argv[0]); convert_dataset(argv[1], argv[2], argv[3], db_backend); } return 0;}#elseint main(int argc, char** argv) { LOG(FATAL) << "This example requires LevelDB and LMDB; " << "compile with USE_LEVELDB and USE_LMDB.";}#endif // USE_LEVELDB and USE_LMDB
阅读全文
0 0
- caffe convert_mnist_data.cpp 代码注释
- [caffe代码] convert_imageset.cpp
- cpp代码注释
- caffe 中classification.cpp的源码注释
- caffe运行mnist出现convert_mnist_data.bin: not found问题解决
- Caffe Notes: Caffe.cpp
- Caffe代码解读(三):annotated_data_layer.hpp和.cpp
- Ubuntu 16.04使用Eclipse运行Caffe-SSD的cpp代码
- caffe运行mnist出现convert_mnist_data.bin: not found问题的解决
- Caffe源码解析caffe.cpp
- 【撸码caffe 三】 caffe.cpp
- caffe深度学习网络softmax层代码注释
- caffe深度学习网络relu层代码注释
- center loss代码注释(caffe新添加层)
- caffe bechmark.cpp 分析
- Caffe Innerproduct.cpp学习
- caffe base_conv_layers.cpp 学习
- caffe Poolinglayer.cpp学习
- TensorFlow数据读取模块调用过程(cifar10)
- 轻院1362: 绝对值排序
- Socket编程实现文件的传输
- php 默认装好的模块
- 数据挖掘十大算法----EM算法(最大期望算法)
- caffe convert_mnist_data.cpp 代码注释
- 游戏编程入门(1):游戏专业概论
- adb shell无法连接到Genymotion上的虚拟设备的问题
- string 与 stringbuffer的性能比较
- hdoj 1089
- Python学习01-中文编码
- 自适应控制 PID
- html<head>
- nginx对ip限流的操作