OpenCV源码分析（一）级联分类器haartraining

来源：互联网发布：淘宝客亏本推广编辑：程序博客网时间：2024/06/05 21:39

OpenCV手册里讲到OpenCV_haartraining和OpenCV_traincascade都可以训练级联分类器。

区别是：traincascade能支持Haar和LBP特征，同时更易于增加其他特征；两者所输出的分类器文件格式不同，traincascade支持两种格式，可以从旧格式导出训练好的级联分类器。

与训练相关的辅助程序

createsamples.cpp：用来准备训练用的正样本数据和测试数据，输出.vec的文件，能被两种程序使用，文件以二进制方式存储图像。

performance.cpp：用来评估harrtraing输出的分类器，它读入一组标注好的图像，运行分类器，并报告性能，如检测到的物体数目、漏检数目、误检数目和其他信息。

首先训练Haar分类器的步骤：

1.准备正负样本；

2.用createsamples程序建立正样本集合；

3.用haartraining程序训练，得到最终的分类器模型xml文件。

详细过程可参考该连接：http://www.doc88.com/p-686754097427.html

为深入了解，对每个函数进行详细的了解，虽然haartraining相比traincascade更老，但还是先看haartraining。

 * haartraining.cpp * 训练级联分类器 * Train cascade classifier */#include <cstdio>#include <cstring>#include <cstdlib>using namespace std;#include "cvhaartraining.h"int main( int argc, char* argv[] ){    int i = 0;    char* nullname = (char*)"(NULL)";    char* vecname = NULL;    char* dirname = NULL;    char* bgname  = NULL;    bool bg_vecfile = false;    int npos    = 2000;    int nneg    = 2000;    int nstages = 14;    int mem     = 200;    int nsplits = 1;    float minhitrate     = 0.995F;    float maxfalsealarm  = 0.5F;    float weightfraction = 0.95F;    int mode         = 0;    int symmetric    = 1;    int equalweights = 0;    int width  = 24;    int height = 24;    const char* boosttypes[] = { "DAB", "RAB", "LB", "GAB" };    int boosttype = 3;    const char* stumperrors[] = { "misclass", "gini", "entropy" };    int stumperror = 0;    int maxtreesplits = 0;    int minpos = 500;    if( argc == 1 )    {

<span style="white-space:pre"></span>/*训练时命令行参数列表:详见cvharrtraining.h中的cvCreateCascadeClassifier函数参数说明<span style="line-height: 20.8px;">*/</span>

        printf( "Usage: %s\n  -data <dir_name>\n"//<span style="line-height: 20.8px;">即将创建的级联分类器的目录名称，必须是存在的，并包含0,1,或2级子目录。</span>                "  -vec <vec_file_name>\n"//<span style="line-height: 20.8px;">正样本图像的.vec文件名称。</span>                "  -bg <background_file_name>\n"//<span style="line-height: 20.8px;">背景图像的描述文件</span>                "  [-bg-vecfile]\n"<span style="white-space:pre"></span>                "  [-npos <number_of_positive_samples = %d>]\n"//<span style="line-height: 20.8px;">每级分类器用于训练的正样本数目</span>                "  [-nneg <number_of_negative_samples = %d>]\n"//<span style="line-height: 20.8px;">每</span><span style="line-height: 20.8px;">级分类器</span><span style="line-height: 20.8px;">用于训练的负样本数目</span>                "  [-nstages <number_of_stages = %d>]\n"//<span style="line-height: 20.8px;">训练的分类器级数</span>                "  [-nsplits <number_of_splits = %d>]\n"//<span style="line-height: 20.8px;">1个树桩、2或多叉树，每个弱分类器上的二元分割数目</span>                "  [-mem <memory_in_MB = %d>]\n"                "  [-sym (default)] [-nonsym]\n"  //若非0，则假设垂直对称                "  [-minhitrate <min_hit_rate = %f>]\n"  //每级所需的最小命中率                "  [-maxfalsealarm <max_false_alarm_rate = %f>]\n" //每级所需的最大误报率                "  [-weighttrimming <weight_trimming = %f>]\n"  //权重微调参数                "  [-eqw]\n"<span style="white-space:pre"></span>//若非0，则所有样本的初始权值相等                "  [-mode <BASIC (default) | CORE | ALL>]\n" //mode:0-BASIC=Viola, 1-CORE=ALL upright 2-ALL=ALL features                "  [-w <sample_width = %d>]\n"  //样本宽度                "  [-h <sample_height = %d>]\n" //样本高度                "  [-bt <DAB | RAB | LB | GAB (default)>]\n"  //应用的boosting算法类型。<span style="line-height: 20.8px;">0 - Discrete AdaBoost</span><span style="line-height: 20.8px;"> 1 - Real AdaBoost </span><span style="line-height: 20.8px;">2 - LogitBoost </span><span style="line-height: 20.8px;">3 - Gentle AdaBoost</span>

<span style="line-height: 20.8px;">                "  [-err <misclass (default) | gini | entropy>]\n" //若使用Discrete AdaBoost算法应用的误差类型</span>

                "  [-maxtreesplits <max_number_of_splits_in_tree_cascade = %d>]\n" //树级联中最大分裂数目                "  [-minpos <min_number_of_positive_samples_per_cluster = %d>]\n", //每类最小的正样本数目                argv[0], npos, nneg, nstages, nsplits, mem,                minhitrate, maxfalsealarm, weightfraction, width, height,                maxtreesplits, minpos );        return 0;    }    for( i = 1; i < argc; i++ )    {        if( !strcmp( argv[i], "-data" ) )        {            dirname = argv[++i];        }        else if( !strcmp( argv[i], "-vec" ) )        {            vecname = argv[++i];        }        else if( !strcmp( argv[i], "-bg" ) )        {            bgname = argv[++i];        }        else if( !strcmp( argv[i], "-bg-vecfile" ) )        {            bg_vecfile = true;        }        else if( !strcmp( argv[i], "-npos" ) )        {            npos = atoi( argv[++i] );        }        else if( !strcmp( argv[i], "-nneg" ) )        {            nneg = atoi( argv[++i] );        }        else if( !strcmp( argv[i], "-nstages" ) )        {            nstages = atoi( argv[++i] );        }        else if( !strcmp( argv[i], "-nsplits" ) )        {            nsplits = atoi( argv[++i] );        }        else if( !strcmp( argv[i], "-mem" ) )        {            mem = atoi( argv[++i] );        }        else if( !strcmp( argv[i], "-sym" ) )        {            symmetric = 1;        }        else if( !strcmp( argv[i], "-nonsym" ) )        {            symmetric = 0;        }        else if( !strcmp( argv[i], "-minhitrate" ) )        {            minhitrate = (float) atof( argv[++i] );        }        else if( !strcmp( argv[i], "-maxfalsealarm" ) )        {            maxfalsealarm = (float) atof( argv[++i] );        }        else if( !strcmp( argv[i], "-weighttrimming" ) )        {            weightfraction = (float) atof( argv[++i] );        }        else if( !strcmp( argv[i], "-eqw" ) )        {            equalweights = 1;        }        else if( !strcmp( argv[i], "-mode" ) )        {            char* tmp = argv[++i];            if( !strcmp( tmp, "CORE" ) )            {                mode = 1;            }            else if( !strcmp( tmp, "ALL" ) )            {                mode = 2;            }            else            {                mode = 0;            }        }        else if( !strcmp( argv[i], "-w" ) )        {            width = atoi( argv[++i] );        }        else if( !strcmp( argv[i], "-h" ) )        {            height = atoi( argv[++i] );        }        else if( !strcmp( argv[i], "-bt" ) )        {            i++;            if( !strcmp( argv[i], boosttypes[0] ) )            {                boosttype = 0;            }            else if( !strcmp( argv[i], boosttypes[1] ) )            {                boosttype = 1;            }            else if( !strcmp( argv[i], boosttypes[2] ) )            {                boosttype = 2;            }            else            {                boosttype = 3;            }        }        else if( !strcmp( argv[i], "-err" ) )        {            i++;            if( !strcmp( argv[i], stumperrors[0] ) )            {                stumperror = 0;            }            else if( !strcmp( argv[i], stumperrors[1] ) )            {                stumperror = 1;            }            else            {                stumperror = 2;            }        }        else if( !strcmp( argv[i], "-maxtreesplits" ) )        {            maxtreesplits = atoi( argv[++i] );        }        else if( !strcmp( argv[i], "-minpos" ) )        {            minpos = atoi( argv[++i] );        }    }    printf( "Data dir name: %s\n", ((dirname == NULL) ? nullname : dirname ) );    printf( "Vec file name: %s\n", ((vecname == NULL) ? nullname : vecname ) );    printf( "BG  file name: %s, is a vecfile: %s\n", ((bgname == NULL) ? nullname : bgname ), bg_vecfile ? "yes" : "no" );    printf( "Num pos: %d\n", npos );    printf( "Num neg: %d\n", nneg );    printf( "Num stages: %d\n", nstages );    printf( "Num splits: %d (%s as weak classifier)\n", nsplits,        (nsplits == 1) ? "stump" : "tree" );    printf( "Mem: %d MB\n", mem );    printf( "Symmetric: %s\n", (symmetric) ? "TRUE" : "FALSE" );    printf( "Min hit rate: %f\n", minhitrate );    printf( "Max false alarm rate: %f\n", maxfalsealarm );    printf( "Weight trimming: %f\n", weightfraction );    printf( "Equal weights: %s\n", (equalweights) ? "TRUE" : "FALSE" );    printf( "Mode: %s\n", ( (mode == 0) ? "BASIC" : ( (mode == 1) ? "CORE" : "ALL") ) );    printf( "Width: %d\n", width );    printf( "Height: %d\n", height );    //printf( "Max num of precalculated features: %d\n", numprecalculated );    printf( "Applied boosting algorithm: %s\n", boosttypes[boosttype] );    printf( "Error (valid only for Discrete and Real AdaBoost): %s\n",            stumperrors[stumperror] );    printf( "Max number of splits in tree cascade: %d\n", maxtreesplits );    printf( "Min number of positive samples per cluster: %d\n", minpos );

//创建树结构的级联分类器    cvCreateTreeCascadeClassifier( dirname, vecname, bgname,                               npos, nneg, nstages, mem,                               nsplits,                               minhitrate, maxfalsealarm, weightfraction,                               mode, symmetric,                               equalweights, width, height,                               boosttype, stumperror,                               maxtreesplits, minpos, bg_vecfile );    return 0;}

0 0