使用caffe实现多标签输入
来源:互联网 发布:中国航信网络面试 编辑:程序博客网 时间:2024/05/20 09:07
最近需要使用caffe训练一个回归网络,这个网络的输入label是多维的float型数据,在搜索了大量资料后,整理了一份比较简洁的操作流程,这里记录下来。
本篇博客主要参照了这篇博客,并进行了一些小幅度的修改和完善:
http://blog.csdn.net/qq295456059/article/details/53142574
首先,我们用于做回归的训练文件txt的格式为:
cat_1.jpg 0.03 0.45 0.55 0.66 cat_2.jpg 0.44 0.31 0.05 0.34 dog_1.jpg 0.67 0.25 0.79 0.56 dog_2.jpg 0.89 0.46 0.91 0.38
那么我们一共需要对以下文件进行修改:
1.tools/convert_imageset.cpp
这里建议复制convert_imageset.cpp文件,并将其命名为convert_imageset_regression.cpp。依然将其放在tools文件夹下面。
首先我们添加一些头文件
#include <boost/tokenizer.hpp> //### To use tokenizer #include <iostream> //### using namespace std; //###
我们将int类型的label改成float,如下:
//std::vector<std::pair<std::string, int> > lines; //### std::vector<std::pair<std::string, std::vector<float> > > lines; std::string line; //size_t pos; //int label; //### std::vector<float> labels;
在while函数中,将float型的label放入lines中
while (std::getline(infile, line)) { // pos = line.find_last_of(' '); // label = atoi(line.substr(pos + 1).c_str()); // lines.push_back(std::make_pair(line.substr(0, pos), label)); //### std::vector<std::string> tokens; boost::char_separator<char> sep(" "); boost::tokenizer<boost::char_separator<char> > tok(line, sep); tokens.clear(); std::copy(tok.begin(), tok.end(), std::back_inserter(tokens)); for (int i = 1; i < tokens.size(); ++i) { labels.push_back(atof(tokens.at(i).c_str())); } lines.push_back(std::make_pair(tokens.at(0), labels)); //###To clear the vector labels labels.clear(); }
修改完之后的整个文件如下:
// This program converts a set of images to a lmdb/leveldb by storing them// as Datum proto buffers.// Usage:// convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME//// where ROOTFOLDER is the root folder that holds all the images, and LISTFILE// should be a list of files as well as their labels, in the format as// subfolder1/file1.JPEG 7// ....#include <algorithm>#include <fstream> // NOLINT(readability/streams)#include <string>#include <utility>#include <vector>#include "boost/scoped_ptr.hpp"#include "gflags/gflags.h"#include "glog/logging.h"#include "caffe/proto/caffe.pb.h"#include "caffe/util/db.hpp"#include "caffe/util/format.hpp"#include "caffe/util/io.hpp"#include "caffe/util/rng.hpp"#include <boost/tokenizer.hpp> //### To use tokenizer #include <iostream>using namespace caffe; // NOLINT(build/namespaces)using std::pair;using boost::scoped_ptr;DEFINE_bool(gray, false, "When this option is on, treat images as grayscale ones");DEFINE_bool(shuffle, false, "Randomly shuffle the order of images and their labels");DEFINE_string(backend, "lmdb", "The backend {lmdb, leveldb} for storing the result");DEFINE_int32(resize_width, 0, "Width images are resized to");DEFINE_int32(resize_height, 0, "Height images are resized to");DEFINE_bool(check_size, false, "When this option is on, check that all the datum have the same size");DEFINE_bool(encoded, false, "When this option is on, the encoded image will be save in datum");DEFINE_string(encode_type, "", "Optional: What type should we encode the image as ('png','jpg',...).");int main(int argc, char** argv) {#ifdef USE_OPENCV ::google::InitGoogleLogging(argv[0]); // Print output to stderr (while still logging) FLAGS_alsologtostderr = 1;#ifndef GFLAGS_GFLAGS_H_ namespace gflags = google;#endif gflags::SetUsageMessage("Convert a set of images to the leveldb/lmdb\n" "format used as input for Caffe.\n" "Usage:\n" " convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME\n" "The ImageNet dataset for the training demo is at\n" " http://www.image-net.org/download-images\n"); gflags::ParseCommandLineFlags(&argc, &argv, true); if (argc < 4) { gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/convert_imageset"); return 1; } const bool is_color = !FLAGS_gray; const bool check_size = FLAGS_check_size; const bool encoded = FLAGS_encoded; const string encode_type = FLAGS_encode_type; std::ifstream infile(argv[2]); //std::vector<std::pair<std::string, int> > lines; std::vector<std::pair<std::string, std::vector<float> > > lines; std::string line; //size_t pos; //int label; std::vector<float> labels; while (std::getline(infile, line)) { // pos = line.find_last_of(' '); // label = atoi(line.substr(pos + 1).c_str()); // lines.push_back(std::make_pair(line.substr(0, pos), label)); //### std::vector<std::string> tokens; boost::char_separator<char> sep(" "); boost::tokenizer<boost::char_separator<char> > tok(line, sep); tokens.clear(); std::copy(tok.begin(), tok.end(), std::back_inserter(tokens)); for (int i = 1; i < tokens.size(); ++i) { labels.push_back(atof(tokens.at(i).c_str())); } lines.push_back(std::make_pair(tokens.at(0), labels)); //###To clear the vector labels labels.clear(); } if (FLAGS_shuffle) { // randomly shuffle data LOG(INFO) << "Shuffling data"; shuffle(lines.begin(), lines.end()); } LOG(INFO) << "A total of " << lines.size() << " images."; if (encode_type.size() && !encoded) LOG(INFO) << "encode_type specified, assuming encoded=true."; int resize_height = std::max<int>(0, FLAGS_resize_height); int resize_width = std::max<int>(0, FLAGS_resize_width); // Create new DB scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend)); db->Open(argv[3], db::NEW); scoped_ptr<db::Transaction> txn(db->NewTransaction()); // Storing to db std::string root_folder(argv[1]); Datum datum; int count = 0; int data_size = 0; bool data_size_initialized = false; for (int line_id = 0; line_id < lines.size(); ++line_id) { bool status; std::string enc = encode_type; if (encoded && !enc.size()) { // Guess the encoding type from the file name string fn = lines[line_id].first; size_t p = fn.rfind('.'); if ( p == fn.npos ) LOG(WARNING) << "Failed to guess the encoding of '" << fn << "'"; enc = fn.substr(p); std::transform(enc.begin(), enc.end(), enc.begin(), ::tolower); } status = ReadImageToDatum(root_folder + lines[line_id].first, lines[line_id].second, resize_height, resize_width, is_color, enc, &datum); if (status == false) continue; if (check_size) { if (!data_size_initialized) { data_size = datum.channels() * datum.height() * datum.width(); data_size_initialized = true; } else { const std::string& data = datum.data(); CHECK_EQ(data.size(), data_size) << "Incorrect data field size " << data.size(); } } // sequential string key_str = caffe::format_int(line_id, 8) + "_" + lines[line_id].first; // Put in db string out; CHECK(datum.SerializeToString(&out)); txn->Put(key_str, out); if (++count % 1000 == 0) { // Commit db txn->Commit(); txn.reset(db->NewTransaction()); LOG(INFO) << "Processed " << count << " files."; } } // write the last batch if (count % 1000 != 0) { txn->Commit(); LOG(INFO) << "Processed " << count << " files."; }#else LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV.";#endif // USE_OPENCV return 0;}
2. include/caffe/util/io.hpp
修改io.hpp文件,在这个文件中添加一个函数定义:
bool ReadImageToDatum(const string& filename, const vector<float> labels, const int height, const int width, const bool is_color, const std::string & encoding, Datum* datum);
3. src/caffe/util/io.cpp
在io.cpp文件中写上之前io.hpp中添加的函数的实现:
bool ReadImageToDatum(const string& filename, const vector<float> labels, const int height, const int width, const bool is_color, const std::string & encoding, Datum* datum) { cv::Mat cv_img = ReadImageToCVMat(filename, height, width, is_color); if (cv_img.data) { // if (encoding.size()) { // if ( (cv_img.channels() == 3) == is_color && !height && !width && // matchExt(filename, encoding) ) // return ReadFileToDatum(filename, label, datum); // std::vector<uchar> buf; // cv::imencode("."+encoding, cv_img, buf); // datum->set_data(std::string(reinterpret_cast<char*>(&buf[0]), // buf.size())); // datum->set_label(label); // datum->set_encoded(true); // return true; // } CVMatToDatum(cv_img, datum); //datum->set_label(label); //### for (int i = 0; i < labels.size(); ++i) { datum->add_float_data(labels.at(i)); } return true; } else { return false; } }
4. src/caffe/proto/caffe.proto
修改caffe.proto文件,在message Datum中,将
optional uint32 label = 5;
改成
optional float label = 5;
将
optional float float_data = 6;
改成
repeated float float_data = 6;
修改完后的message Datum如下:
message Datum { optional int32 channels = 1; optional int32 height = 2; optional int32 width = 3; // the actual image data, in bytes optional bytes data = 4; optional float label = 5; // Optionally, the datum could also hold float data. repeated float float_data = 6; // If true data contains an encoded image that need to be decoded optional bool encoded = 7 [default = false];}
在DataParameter中,在最下面添加一行
optional uint32 label_num = 11 [default = 4];
修改完后的DataParameter如下:
message DataParameter { enum DB { LEVELDB = 0; LMDB = 1; } // Specify the data source. optional string source = 1; // Specify the batch size. optional uint32 batch_size = 4; // The rand_skip variable is for the data layer to skip a few data points // to avoid all asynchronous sgd clients to start at the same point. The skip // point would be set as rand_skip * rand(0,1). Note that rand_skip should not // be larger than the number of keys in the database. // DEPRECATED. Each solver accesses a different subset of the database. optional uint32 rand_skip = 7 [default = 0]; optional DB backend = 8 [default = LEVELDB]; // DEPRECATED. See TransformationParameter. For data pre-processing, we can do // simple scaling and subtracting the data mean, if provided. Note that the // mean subtraction is always carried out before scaling. optional float scale = 2 [default = 1]; optional string mean_file = 3; // DEPRECATED. See TransformationParameter. Specify if we would like to randomly // crop an image. optional uint32 crop_size = 5 [default = 0]; // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror // data. optional bool mirror = 6 [default = false]; // Force the encoded image to have 3 color channels optional bool force_encoded_color = 9 [default = false]; // Prefetch queue (Increase if data feeding bandwidth varies, within the // limit of device memory for GPU training) optional uint32 prefetch = 10 [default = 4]; optional uint32 label_num = 11 [default = 4];}
5.src/caffe/layers/data_layer.cpp
首先是DataLayerSetup函数,
1.// label2. //### 3. // if (this->output_labels_) { 4. // vector<int> label_shape(1, batch_size); 5. // top[1]->Reshape(label_shape); 6. // for (int i = 0; i < this->PREFETCH_COUNT; ++i) { 7. // this->prefetch_[i].label_.Reshape(label_shape); 8. // } 9. // } 10. 11. //### 12. int labelNum = this->layer_param_.data_param().label_num(); //#####13.14. if (this->output_labels_) { 15. 16. vector<int> label_shape; 17. label_shape.push_back(batch_size); 18. label_shape.push_back(labelNum); 19. label_shape.push_back(1); 20. label_shape.push_back(1); 21. top[1]->Reshape(label_shape);22. for (int i = 0; i < this->prefetch_.size(); ++i) {23. this->prefetch_[i]->label_.Reshape(label_shape);24. }25. }26.
第二处修改在load_batch函数
1.// Copy label.2. // ###3. // if (this->output_labels_) {4. // Dtype* top_label = batch->label_.mutable_cpu_data();5. // top_label[item_id] = datum.label();6. // }7.8. // ###9. 10. if (this->output_labels_) { 11. Dtype* top_label = batch->label_.mutable_cpu_data(); 12. for(int i=0;i<labelNum;i++){ 13. top_label[item_id*labelNum+i] = datum.float_data(i); //read float labels 14. } 15. }
修改完后的data_layer.cpp文件如下:
#ifdef USE_OPENCV#include <opencv2/core/core.hpp>#endif // USE_OPENCV#include <stdint.h>#include <vector>#include "caffe/data_transformer.hpp"#include "caffe/layers/data_layer.hpp"#include "caffe/util/benchmark.hpp"namespace caffe {template <typename Dtype>DataLayer<Dtype>::DataLayer(const LayerParameter& param) : BasePrefetchingDataLayer<Dtype>(param), offset_() { db_.reset(db::GetDB(param.data_param().backend())); db_->Open(param.data_param().source(), db::READ); cursor_.reset(db_->NewCursor());}template <typename Dtype>DataLayer<Dtype>::~DataLayer() { this->StopInternalThread();}template <typename Dtype>void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int batch_size = this->layer_param_.data_param().batch_size(); // Read a data point, and use it to initialize the top blob. Datum datum; datum.ParseFromString(cursor_->value()); // Use data_transformer to infer the expected blob shape from datum. vector<int> top_shape = this->data_transformer_->InferBlobShape(datum); this->transformed_data_.Reshape(top_shape); // Reshape top[0] and prefetch_data according to the batch_size. top_shape[0] = batch_size; top[0]->Reshape(top_shape); for (int i = 0; i < this->prefetch_.size(); ++i) { this->prefetch_[i]->data_.Reshape(top_shape); } LOG_IF(INFO, Caffe::root_solver()) << "output data size: " << top[0]->num() << "," << top[0]->channels() << "," << top[0]->height() << "," << top[0]->width(); // label //### // if (this->output_labels_) { // vector<int> label_shape(1, batch_size); // top[1]->Reshape(label_shape); // for (int i = 0; i < this->PREFETCH_COUNT; ++i) { // this->prefetch_[i].label_.Reshape(label_shape); // } // } //### int labelNum = this->layer_param_.data_param().label_num(); //##### if (this->output_labels_) { vector<int> label_shape; label_shape.push_back(batch_size); label_shape.push_back(labelNum); label_shape.push_back(1); label_shape.push_back(1); top[1]->Reshape(label_shape); for (int i = 0; i < this->prefetch_.size(); ++i) { this->prefetch_[i]->label_.Reshape(label_shape); } }}template <typename Dtype>bool DataLayer<Dtype>::Skip() { int size = Caffe::solver_count(); int rank = Caffe::solver_rank(); bool keep = (offset_ % size) == rank || // In test mode, only rank 0 runs, so avoid skipping this->layer_param_.phase() == TEST; return !keep;}template<typename Dtype>void DataLayer<Dtype>::Next() { cursor_->Next(); if (!cursor_->valid()) { LOG_IF(INFO, Caffe::root_solver()) << "Restarting data prefetching from start."; cursor_->SeekToFirst(); } offset_++;}// This function is called on prefetch threadtemplate<typename Dtype>void DataLayer<Dtype>::load_batch(Batch<Dtype>* batch) { CPUTimer batch_timer; batch_timer.Start(); double read_time = 0; double trans_time = 0; CPUTimer timer; CHECK(batch->data_.count()); CHECK(this->transformed_data_.count()); const int batch_size = this->layer_param_.data_param().batch_size(); Datum datum; for (int item_id = 0; item_id < batch_size; ++item_id) { timer.Start(); while (Skip()) { Next(); } datum.ParseFromString(cursor_->value()); read_time += timer.MicroSeconds(); if (item_id == 0) { // Reshape according to the first datum of each batch // on single input batches allows for inputs of varying dimension. // Use data_transformer to infer the expected blob shape from datum. vector<int> top_shape = this->data_transformer_->InferBlobShape(datum); this->transformed_data_.Reshape(top_shape); // Reshape batch according to the batch_size. top_shape[0] = batch_size; batch->data_.Reshape(top_shape); } // Apply data transformations (mirror, scale, crop...) timer.Start(); int offset = batch->data_.offset(item_id); Dtype* top_data = batch->data_.mutable_cpu_data(); this->transformed_data_.set_cpu_data(top_data + offset); this->data_transformer_->Transform(datum, &(this->transformed_data_)); // Copy label. // ### // if (this->output_labels_) { // Dtype* top_label = batch->label_.mutable_cpu_data(); // top_label[item_id] = datum.label(); // } // ### int labelNum = this->layer_param_.data_param().label_num(); //#### if (this->output_labels_) { Dtype* top_label = batch->label_.mutable_cpu_data(); for(int i=0;i<labelNum;i++){ top_label[item_id*labelNum+i] = datum.float_data(i); //read float labels } } trans_time += timer.MicroSeconds(); Next(); } timer.Stop(); batch_timer.Stop(); DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";}INSTANTIATE_CLASS(DataLayer);REGISTER_LAYER_CLASS(Data);} // namespace caffe
至此,已经完成了所有源码的修改。重新编译caffe会在build/tools文件夹下生成一个convert_imageset_regression的可执行文件,这个文件就是我们用来生成LMDB的文件。之后我们就可以使用这个版本的caffe来完成多标签训练的任务,这里需要注意几点;
1.需要在网络结构文件中写明label的个数,就是在data层中添加一个参数label_num: 4。如果是4个参数就写4, 8个参数就写8.
2.做回归任务一般不使用accuracy层。
3.做回归任务的时候如果要对数据做crop和resize等操作的时候,也要对label数据进行相应的映射变换。
我们可以使用convert_imageset_regression文件生成用于训练和测试的lmdb文件:
convert_imageset_regression IMGROOT train.txt train_lmdb
生成好lmdb文件后,加上我们的网络,就可以使用caffe进行训练
./caffe train -–solverl= regression_solver.prototxt
最后附上我使用vgg16训练的回归网络的网络结构图
name: "VGG16" layer { name: "data" type: "Data" top: "data" top: "label" include { phase: TRAIN } transform_param { mirror: true crop_size: 224 mean_value: 103.939 mean_value: 116.779 mean_value: 123.68 } data_param { source: "data/ilsvrc12_shrt_256/ilsvrc12_train_leveldb" batch_size: 64 label_num: 4 backend: LEVELDB } } layer { name: "data" type: "Data" top: "data" top: "label" include { phase: TEST } transform_param { mirror: false crop_size: 224 mean_value: 103.939 mean_value: 116.779 mean_value: 123.68 } data_param { source: "data/ilsvrc12_shrt_256/ilsvrc12_val_leveldb" batch_size: 50 label_num: 4 backend: LEVELDB } } layer { bottom: "data" top: "conv1_1" name: "conv1_1" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 64 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv1_1" top: "conv1_1" name: "relu1_1" type: "ReLU" } layer { bottom: "conv1_1" top: "conv1_2" name: "conv1_2" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 64 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv1_2" top: "conv1_2" name: "relu1_2" type: "ReLU" } layer { bottom: "conv1_2" top: "pool1" name: "pool1" type: "Pooling" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { bottom: "pool1" top: "conv2_1" name: "conv2_1" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 128 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv2_1" top: "conv2_1" name: "relu2_1" type: "ReLU" } layer { bottom: "conv2_1" top: "conv2_2" name: "conv2_2" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 128 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv2_2" top: "conv2_2" name: "relu2_2" type: "ReLU" } layer { bottom: "conv2_2" top: "pool2" name: "pool2" type: "Pooling" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { bottom: "pool2" top: "conv3_1" name: "conv3_1" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv3_1" top: "conv3_1" name: "relu3_1" type: "ReLU" } layer { bottom: "conv3_1" top: "conv3_2" name: "conv3_2" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv3_2" top: "conv3_2" name: "relu3_2" type: "ReLU" } layer { bottom: "conv3_2" top: "conv3_3" name: "conv3_3" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv3_3" top: "conv3_3" name: "relu3_3" type: "ReLU" } layer { bottom: "conv3_3" top: "pool3" name: "pool3" type: "Pooling" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { bottom: "pool3" top: "conv4_1" name: "conv4_1" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv4_1" top: "conv4_1" name: "relu4_1" type: "ReLU" } layer { bottom: "conv4_1" top: "conv4_2" name: "conv4_2" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv4_2" top: "conv4_2" name: "relu4_2" type: "ReLU" } layer { bottom: "conv4_2" top: "conv4_3" name: "conv4_3" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv4_3" top: "conv4_3" name: "relu4_3" type: "ReLU" } layer { bottom: "conv4_3" top: "pool4" name: "pool4" type: "Pooling" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { bottom: "pool4" top: "conv5_1" name: "conv5_1" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv5_1" top: "conv5_1" name: "relu5_1" type: "ReLU" } layer { bottom: "conv5_1" top: "conv5_2" name: "conv5_2" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv5_2" top: "conv5_2" name: "relu5_2" type: "ReLU" } layer { bottom: "conv5_2" top: "conv5_3" name: "conv5_3" type: "Convolution" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "conv5_3" top: "conv5_3" name: "relu5_3" type: "ReLU" } layer { bottom: "conv5_3" top: "pool5" name: "pool5" type: "Pooling" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { bottom: "pool5" top: "fc6" name: "fc6" type: "InnerProduct" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.1 } } } layer { bottom: "fc6" top: "fc6" name: "relu6" type: "ReLU" } layer { bottom: "fc6" top: "fc6" name: "drop6" type: "Dropout" dropout_param { dropout_ratio: 0.5 } } layer { bottom: "fc6" top: "fc7" name: "fc7" type: "InnerProduct" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.1 } } } layer { bottom: "fc7" top: "fc7" name: "relu7" type: "ReLU" } layer { bottom: "fc7" top: "fc7" name: "drop7" type: "Dropout" dropout_param { dropout_ratio: 0.5 } } layer { bottom: "fc7" top: "fc8" name: "fc8" type: "InnerProduct" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.1 } } } layer { name: "EuclideanLoss" type: "EuclideanLoss" bottom: "fc8" bottom: "label" top: "loss"}
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
训练好网络之后,我们需要测试网络的表现。这里附上我写的调用接口
CRegression.h
#include <caffe/caffe.hpp>#include <opencv2/core/core.hpp>#include <opencv2/highgui/highgui.hpp>#include <opencv2/imgproc/imgproc.hpp>#include <algorithm>#include <iosfwd>#include <memory>#include <string>#include <utility>#include <vector>using namespace caffe;using namespace cv;using std::string;class Regression{public:Regression();~Regression();int Initial(const string &model_file, const string &trained_file);Rect FillBuf(Mat Img);int FillBuf(Mat Img, int tag);private:shared_ptr<Net<float> > net_;};
CRegression.cpp
#include "CRegression.h"Regression::Regression(){}Regression::~Regression(){}int Regression::Initial(const string &model_file, const string &trained_file){Caffe::set_mode(Caffe::GPU);/* Load the network. */net_.reset(new Net<float>(model_file, TEST));net_->CopyTrainedLayersFrom(trained_file);return 0;}Rect Regression::FillBuf(Mat Img){Mat sample = Img.clone();sample.convertTo(sample, CV_32FC3);Blob<float>* inputBlob = net_->input_blobs()[0];int width = inputBlob->width();int height = inputBlob->height();Size inputSize = Size(width, height);CHECK(sample.size() == inputSize)<< "sample size is not equal to inputSize";Mat mean(sample.size(), CV_32FC3, Scalar(103.939, 116.779, 123.68));subtract(sample, mean, sample);float* data = inputBlob->mutable_cpu_data(); //将图片的像素值,复制进网络的输入Blob for (int k = 0; k < 3; ++k){for (int i = 0; i < height; ++i){for (int j = 0; j < width; ++j){int index = (k*height + i)*width + j; //获取偏移量 data[index] = sample.at<Vec3f>(i, j)[k];}}}vector<Blob<float>* > inputs(1, inputBlob);const vector<Blob<float>* >& outputBlobs = net_->Forward(inputs); //进行前向传播,并返回最后一层的blob Blob<float>* outputBlob = outputBlobs[0]; //输出blob const float* value = outputBlob->cpu_data();std::cout << value[0] << " " << value[1] << " " << value[2] << " " << value[3] << std::endl;return Rect();}
- 使用caffe实现多标签输入
- caffe实现多标签输入(multilabel、multitask)
- caffe实现多标签输入(multilabel、multitask)
- caffe实现多标签输入(multilabel、multitask)
- caffe实现多标签输入(multilabel、multitask)
- caffe 实现多标签输入(multilabel、multitask)
- caffe 实现多标签输入(multilabel、multitask)
- caffe 修改源码多标签输入
- Caffe实现多标签分类
- Caffe 实现多标签分类
- Caffe 实现多标签分类 支持Multi-Label的LMDB数据格式输入
- 修改caffe源码满足多标签输入--多标签lmdb
- 更改caffe使得其接收多标签输入,并在网络层中使用
- 多标签分类及其 caffe 实现总结
- caffe 更改源码,使得输入数据为多个标签
- caffe多标签训练
- Caffe中实现多标签数据准备及训练
- caffe实现多label输入(修改源码版)
- Storm-HBase集成--配置和开发
- 原来,在Linux系统也有快速格式化功能
- 第十周 项目 2
- 使用频率较高的7大seo作弊技术手法
- CGAffineTransform的疑惑探索
- 使用caffe实现多标签输入
- iOS 设置系统类似的方法弃用警告方式
- 每隔一段时间就报这个错误
- 本地jconsole远程监控linux系统tomcat
- JavaScript Boolean 对象
- MyBatis JdbcType 与Oracle、MySql数据类型对应关系详解
- 对DDS的深度认识
- 记录一个JavaScript播放器
- image fusion