caffe adaboost

来源：互联网发布：圆方软件视频教程编辑：程序博客网时间：2024/05/01 23:52

caffe voting

voting ensemble_accuracy_layer.cpp 代码如下，因为这里有根据概率求准确率的部分，所以弱分类器模型的可以不用配置Accuracy层，

Softmax后<span style="font-family: Arial, Helvetica, sans-serif; background-color: rgb(255, 255, 255);">直接用en</span><span style="font-family: Arial, Helvetica, sans-serif; background-color: rgb(255, 255, 255);">semble layer。</span>

Softmax 层和 accuracy 层的配置文件如下：

layer {  name: "3_prob"  type: "Softmax"  bottom: "3_ip2"  top: "3_prob"}layer {  name: "1_accuracy"  type: "Accuracy"  bottom: "1_prob"bottom: "label"  top: "1_accuracy"  include {    phase: TEST  }}

caffe.proto文件中层定义，因为ensemble 层只需要 name,type,bottom,top 四个参数即可。所以不需要在 caffe.proto 文件中设置。

message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
repeated string bottom = 3; // the name of each bottom blob
repeated string top = 4; // the name of each top blob

ensemble 层配置函数如下：

layer {  name: "ensemble<span style="font-family: Arial, Helvetica, sans-serif;">_accuracy"</span>  type: "Esemble"  bottom: "prob1"

<pre name="code" class="cpp">  bottom: "prob2"

  bottom: "prob3"

bottom: "label" top: "ensemble_accuracy"
include { phase: TEST }}

#include <algorithm>#include <functional>#include <utility>#include <vector>#include "caffe/layer.hpp"#include "caffe/util/io.hpp"#include "caffe/util/math_functions.hpp"#include "caffe/vision_layers.hpp"namespace caffe {template <typename Dtype>void EnsembleAccuracyLayer<Dtype>::Reshape(  const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {  vector<int> top_shape(0);    top[0]->Reshape(top_shape);}template <typename Dtype>void EnsembleAccuracyLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,    const vector<Blob<Dtype>*>& top) {    Dtype accuracy = 0;  int n_pred = bottom.size()-1;  const Dtype* bottom_label = bottom[n_pred]->cpu_data();  int num = bottom[0]->num();   // 样本个数  int dim = bottom[0]->count() / bottom[0]->num();  // 每个样本维数Dtype max_prob = 0;    // 得到最大<span style="font-family: Arial, Helvetica, sans-serif;">概率</span>int arg_max = 0; Dtype prob_pred = 0;  //For each data point  for (int i = 0; i < num; ++i) { // 对每个样本结果进行遍历int label = static_cast<int>(bottom_label[i]);//Find if any classifier is correctfor (int j = 0; j< n_pred; ++j) {const Dtype* bottom_data = bottom[j]->cpu_data();max_prob = 0;for(int k = 0; k < dim; k++){prob_pred = std::max(bottom_data[i * dim + k], Dtype(kLOG_THRESHOLD));if(prob_pred > max_prob){// 如果 这一类概率大于max_prob ，这保存该类概率max_prob = prob_pred; 　／／　保存最大概率arg_max = k;　　　// 保存所属标签。} }if(arg_max == label){   // 如果 语出的类和标签相同，则识别对，accuracy++;break;  }    }}      top[0]->mutable_cpu_data()[0] = accuracy/(double)num;}INSTANTIATE_CLASS(EnsembleAccuracyLayer);REGISTER_LAYER_CLASS(EnsembleAccuracy);}  // namespace caffe

caffe adaboost实现方式

https://github.com/terrychenism/NeuralNetTests/blob/master/caffe_utils/cnn_adaboost.py 参考代码，

今天在caffe上实现adaboost算法，

先训练弱分类器，用弱分类器的模型即可，如果把caffe训练好的模型当弱分类器，只需要调用caffe，使用该模型即可，不需要重新训练该弱分类器。

下面代码是调用caffe训练的模型，使用adaboost弱分类器。这里主要使用了sklearn 库。

#!/usr/bin/env python# -*- coding: utf-8 -*-# author: Tairui Chenimport numpy as npimport osimport sysimport argparseimport globimport timefrom sklearn.base import BaseEstimator, ClassifierMixinfrom sklearn.ensemble import AdaBoostClassifier, BaggingClassifierimport caffeg_rnd = np.random.randint(100000)def create_weighted_db(X, y, weights, name='boost'):    X = X.reshape(-1, 3, 32, 32)    train_fn = os.path.join(DIR, name + '.h5')    dd.io.save(train_fn, dict(data=X,                              label=y.astype(np.float32),                              sample_weight=weights), compress=False)    with open(os.path.join(DIR, name + '.txt'), 'w') as f:        print(train_fn, file=f)class CNN(BaseEstimator, ClassifierMixin):    def __init__(self):        pass    def get_params(self, deep=False):        return {}    def fit(self, X, y, sample_weight=None):        global g_seed        global g_loop        if sample_weight is None:            sample_weight = np.ones(X.shape[0], np.float32)            print('Calling fit with sample_weight None')        else:            sample_weight *= X.shape[0]            print('Calling fit with sample_weight sum', sample_weight.sum())        #sample_weight = np.ones(X.shape[0], np.float32)        #II = sample_weight > 0        #X = X[II]        #y = y[II]        #sample_weight = sample_weight[II]        #sample_weight = np.ones(X.shape[0])        w = sample_weight        #sample_weight[:10] = 0.0        #w[:1000] = 0.0        #w = sample_weight        #w0 = w / w.sum()        #print('Weight entropy:', -np.sum(w0 * np.log2(w0)))        print('Weight max:', w.max())        print('Weight min:', w.min())        #import sys; sys.exit(0)        self.classes_ = np.unique(y)        self.n_classes_ = len(self.classes_)        # Set up weighted database        create_weighted_db(X, y, sample_weight)        #steps = [(0.001, 2000, 2000)]        steps = [(0.001, 0.004, 60000), (0.0001, 0.004, 5000), (0.00001, 0.004, 5000)]        #steps = [(0.00001, 10000, 10000), (0.000001, 5000, 15000), (0.0000001, 5000, 20000)]        #steps = [(0.001, 10000, 10000)]        #steps = [(0.001, 200, 1000)]        name = os.path.join(CONF_DIR, 'adaboost_{}_loop{}'.format(g_rnd, g_loop))        bare_conf_fn = os.path.join(CONF_DIR, 'boost_bare.prototxt')        conf_fn = os.path.join(CONF_DIR, 'solver.prototxt.template')        #bare_conf_fn = 'regaug_bare.prototxt'        #conf_fn = 'regaug_solver.prototxt.template'        net, info = train_model(name, conf_fn, bare_conf_fn, steps,                                seed=g_seed, device_id=DEVICE_ID)        loss_fn = 'info/info_{}_loop{}.h5'.format(g_rnd, g_loop)        dd.io.save(loss_fn, info)        print('Saved to', loss_fn)        g_loop += 1        print('Classifier set up')        self.net_ = net    def predict_proba(self, X):        X = X.reshape(-1, 3, 32, 32)        #X = X.transpose(0, 2, 3, 1)        prob = np.zeros((X.shape[0], self.n_classes_))        M = 2500        for k in range(int(np.ceil(X.shape[0] / M))):            y = self.net_.forward_all(data=X[k*M:(k+1)*M]).values()[0].squeeze(axis=(2,3))            prob[k*M:(k+1)*M] = y        T = 30.0        eps = 0.0001        #prob = prob.clip(eps, 1-eps)        log_prob = np.log(prob)        print('log_prob', log_prob.min(), log_prob.max())        #log_prob = log_prob.clip(min=-4, max=4)        new_prob = np.exp(log_prob / T)        new_prob /= dd.apply_once(np.sum, new_prob, [1])        return new_prob    def predict(self, X):        prob = self.predict_proba(X)        return prob.argmax(-1)train_data = np.load('G:/EDU/_SOURCE_CODE/chainer/examples/cifar10/data/train_data.npy')train_labels = np.load('G:/EDU/_SOURCE_CODE/chainer/examples/cifar10/data/train_labels.npy')model_path = 'cifar10/' # substitute your path here# GoogleNetnet_fn   = model_path + 'VGG_mini_ABN.prototxt'param_fn = model_path + 'cifar10_vgg_iter_120000.caffemodel'caffe.set_mode_cpu()net = caffe.Classifier(net_fn, param_fn,                       mean = np.float32([104.0, 116.0, 122.0]), # ImageNet mean, training set dependent                       channel_swap = (2,1,0)) # the reference model has channels in BGR order instead of RGBdef preprocess(net, img):    return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data']for i in range(10):img = train_data[i].transpose((1, 2, 0)) * 255img = img.astype(np.uint8)[:, :, ::-1]end = 'prob'h, w = img.shape[:2]src, dst = net.blobs['data'], net.blobs[end]src.data[0] = preprocess(net, img)net.forward(end=end)features = dst.data[0].copy()  X = train_datay = train_labelsX *= 255.0mean_x = X.mean(0)X -= mean_xte_X= np.load('G:/EDU/_SOURCE_CODE/chainer/examples/cifar10/data/test_data.npy')te_y = np.load('G:/EDU/_SOURCE_CODE/chainer/examples/cifar10/data/test_labels.npy')create_weighted_db(te_X, te_y, np.ones(te_X.shape[0], dtype=np.float32), name='test')  clf = AdaBoostClassifier(base_estimator=CNN(), algorithm='SAMME.R', n_estimators=10,                                 random_state=0)clf.fit(X.reshape(X.shape[0], -1), y)for i, score in enumerate(clf.staged_score(X.reshape(X.shape[0], -1), y)):                print(i+1, 'train score', score)for i, score in enumerate(clf.staged_score(te_X.reshape(te_X.shape[0], -1), te_y)):                print(i+1, 'test score', score)

0 0