CamShift算法

来源：互联网发布：买房最好的软件编辑：程序博客网时间：2024/04/29 21:39

Camshift算法是Continuously Adaptive Mean Shift algorithm的简称。它是一个基于MeanSift的改进算法。它首次由Gary R.Bradski等人提出和应用在人脸的跟踪上，并取得了不错的效果。由于它是利用颜色的概率信息进行的跟踪，使得它的运行效率比较高。Camshift算法的过程由下面步骤组成：

（1）确定初始目标及其区域；

（2）计算出目标的色度（Hue)分量的直方图；

（3）利用直方图计算输入图像的反向投影图（后面做进一步的解释）；

（４）利用MeanShift算法在反向投影图中迭代收索，直到其收敛或达到最大迭代次数。并保存零次矩；

（5）从第（4）步中获得收索窗口的中心位置和计算出新的窗口大小，以此为参数，进入到下一幀的目标跟踪。（即跳转到第（2）步）；

几点说明：

1. 在输入图像进行反向投影图之前在HSV空间内做了一个阀值处理，用以滤掉一些噪声。

2. 反向投影图则是概率分布图，在反向投影图中某一像素点的值指的是这个点符合目标的概率分布的概率是多少，或者直接说其为目标图像像素点的像素点是多少。计算方法为：根据像素点的像素值查目标的直方图，其对应像素值的概率是多少就做为该点在反向投影图中的值。

3. Camshit算法到底是怎样自适应调整窗口的大小的。扩大：Canshift算法在计算窗口大小前，在MeanShift算出的窗口的四个方向上增大了TOLERANCE，即高和宽都增大了2TOLERANCE（此值自己调整设置），这才有可能使得窗口能够变大。缩小：在扩大的窗口内重新计算0阶矩，1阶矩和2阶矩，利用矩的值重新计算高和宽。因此Camshif算法相当于在MeanShift的结果上，再做了一个调整，从而使得跟踪的窗口大小能够随目标的大小变化。

优点：算法的效率比较高，如果能利用多少特征做出来的统计直方图，我估计实验效果会更好。

缺点：（1）只利用颜色统计做的跟踪，在背景有相似颜色时，会出现跟踪错误的情况。（2）不能做多目标跟踪。（3）由于它只在初始位置（而不是从每个像素点）开始迭代，所以有可能在初始位置错了后，收敛的位置还是原位置（即跟丢了后，可能会找不回来）。

问题：论文中有关于窗口大小调整，是根据直方图来迭代求解，不知是怎么回事？在代码中没看到实现。在此向大家请教！

下面是Camshift算法Demo的代码：

View Code
// CamShift.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"


#ifdef _CH_
#pragma package <opencv>
#endif

#define CV_NO_BACKWARD_COMPATIBILITY

#ifndef _EiC
#include "cv.h"
#include "highgui.h"
#include <stdio.h>
#include <ctype.h>
#endif

IplImage *image = 0, *hsv = 0, *hue = 0, *mask = 0, *backproject = 0, *histimg = 0;
CvHistogram *hist = 0;

int backproject_mode = 0;
int select_object = 0;
int track_object = 0;
int show_hist = 1;
CvPoint origin;
CvRect selection;
CvRect track_window;
CvBox2D track_box;
CvConnectedComp track_comp;
int hdims = 16;
float hranges_arr[] = {0,180};
float* hranges = hranges_arr;
int vmin = 10, vmax = 256, smin = 30;

void on_mouse( int event, int x, int y, int flags, void* param )
{
   if( !image )
       return;

   if( image->origin )
       y = image->height - y;

   if( select_object )//表明还正在框选目标;
   {
       selection.x = MIN(x,origin.x);
       selection.y = MIN(y,origin.y);
       selection.width = selection.x + CV_IABS(x - origin.x);
       selection.height = selection.y + CV_IABS(y - origin.y);

       //保证数据的有效性;
       selection.x = MAX( selection.x, 0 );
       selection.y = MAX( selection.y, 0 );
       selection.width = MIN( selection.width, image->width );
       selection.height = MIN( selection.height, image->height );
       selection.width -= selection.x;
       selection.height -= selection.y;
   }

   switch( event )
   {
   case CV_EVENT_LBUTTONDOWN://框选目标;
       origin = cvPoint(x,y);
       selection = cvRect(x,y,0,0);
       select_object = 1;
       break;
   case CV_EVENT_LBUTTONUP://框选结束;
       select_object = 0;
       if( selection.width > 0 && selection.height > 0 )
           track_object = -1;
       break;
   
   }
}

CvScalar hsv2rgb( float hue )
{
   int rgb[3], p, sector;
   static const int sector_data[][3]=
       {{0,2,1}, {1,2,0}, {1,0,2}, {2,0,1}, {2,1,0}, {0,1,2}};
   hue *= 0.033333333333333333333333333333333f;
   sector = cvFloor(hue);
   p = cvRound(255*(hue - sector));
   p ^= sector & 1 ? 255 : 0;

   rgb[sector_data[sector][0]] = 255;
   rgb[sector_data[sector][1]] = 0;
   rgb[sector_data[sector][2]] = p;

   return cvScalar(rgb[2], rgb[1], rgb[0],0);
}

int main( int argc, char** argv )
{
   CvCapture* capture = 0;
   capture=cvCaptureFromCAM(0);
   if( !capture )
   {
       fprintf(stderr,"Could not initialize capturing...\n");
       return -1;
   }

   printf( "Hot keys: \n"
           "\tESC - quit the program\n"
           "\tc - stop the tracking\n"
           "\tb - switch to/from backprojection view\n"
           "\th - show/hide object histogram\n"
           "To initialize tracking, select the object with mouse\n" );

   cvNamedWindow( "Histogram", 1 );
   cvNamedWindow( "CamShiftDemo", 1 );
   cvSetMouseCallback( "CamShiftDemo", on_mouse, 0 );
   cvCreateTrackbar( "Vmin", "CamShiftDemo", &vmin, 256, 0 );
   cvCreateTrackbar( "Vmax", "CamShiftDemo", &vmax, 256, 0 );
   cvCreateTrackbar( "Smin", "CamShiftDemo", &smin, 256, 0 );

   for(;;)
   {
       IplImage* frame = 0;
       int i, bin_w, c;

       frame = cvQueryFrame( capture );
       if( !frame )
           break;

       if( !image )
       {
            /* allocate all the buffers */
           image = cvCreateImage( cvGetSize(frame), 8, 3 );
           image->origin = frame->origin;
           hsv = cvCreateImage( cvGetSize(frame), 8, 3 );
           hue = cvCreateImage( cvGetSize(frame), 8, 1 );
           mask = cvCreateImage( cvGetSize(frame), 8, 1 );
           backproject = cvCreateImage( cvGetSize(frame), 8, 1 );
           hist = cvCreateHist(1, &hdims, CV_HIST_ARRAY, &hranges, 1 );
           histimg = cvCreateImage( cvSize(320,200), 8, 3 );
           cvZero( histimg );
       }

       cvCopy( frame, image, 0 );
       cvCvtColor( image, hsv, CV_BGR2HSV );

       if( track_object )
       {
           int _vmin = vmin, _vmax = vmax;

           cvInRangeS( hsv, cvScalar(0,smin,MIN(_vmin,_vmax),0),
           cvScalar(180,256,MAX(_vmin,_vmax),0), mask ); //去除噪声，在此数据内的值，确定mask为1
           cvSplit( hsv, hue, 0, 0, 0 ); //获得色调分量，并以此来做反向投影图

           if( track_object < 0 ) //框选结束;
           {
               float max_val = 0.f;
               cvSetImageROI( hue, selection );
               cvSetImageROI( mask, selection );
               cvCalcHist( &hue, hist, 0, mask );//计算选中部分直方图;
               cvGetMinMaxHistValue( hist, 0, &max_val, 0, 0 );
               cvConvertScale( hist->bins, hist->bins, max_val ? 255. / max_val : 0., 0 );
               cvResetImageROI( hue );
               cvResetImageROI( mask );
               track_window = selection;
               track_object = 1;
// 
//                 cvZero( histimg );
//                 bin_w = histimg->width / hdims;
//                 for( i = 0; i < hdims; i++ )
//                 {
//                     int val = cvRound( cvGetReal1D(hist->bins,i)*histimg->height/255 );//获取直方图的中每一项的高;
//                     CvScalar color = hsv2rgb(i*180.f/hdims);//直方图每一项的颜色是根据项数变化的;
//                     cvRectangle( histimg, cvPoint(i*bin_w,histimg->height),  //画直方图;
//                     cvPoint((i+1)*bin_w,histimg->height - val),
//                     color, -1, 8, 0 );
//                 }
           }

           cvCalcBackProject( &hue, backproject, hist );        //计算反向投影图backproject;
           cvAnd( backproject, mask, backproject, 0 );            //去除上下阀值外的点后的投影图;

           cvNamedWindow("backproject");
           cvShowImage("backproject",backproject);

           cvCamShift( backproject, track_window,                //利用camshift搜索0-255的灰度图像;
           cvTermCriteria( CV_TERMCRIT_EPS | CV_TERMCRIT_ITER, 10, 1 ),
                           &track_comp, &track_box );
           track_window = track_comp.rect;    //获得新的跟踪窗口;

           if( backproject_mode )
               cvCvtColor( backproject, image, CV_GRAY2BGR );

           if( !image->origin ) //如果为假，需要改变椭圆的角度;
               track_box.angle = -track_box.angle; 
           cvEllipseBox( image, track_box, CV_RGB(255,0,0), 3, CV_AA, 0 );//画跟踪椭圆
       }

       if( select_object && selection.width > 0 && selection.height > 0 )//在框住的时候反向显示
       {
           cvSetImageROI( image, selection );
           cvXorS( image, cvScalarAll(255), image, 0 );
           cvResetImageROI( image );
       }

       cvShowImage( "CamShiftDemo", image );
       cvShowImage( "Histogram", histimg );

       c = cvWaitKey(10);
       if( (char) c == 27 )
           break;
       switch( (char) c )
       {
       case 'b':
           backproject_mode ^= 1;
           break;
       case 'c':
           track_object = 0;
           cvZero( histimg );
           break;
       case 'h':
           show_hist ^= 1;
           if( !show_hist )
               cvDestroyWindow( "Histogram" );
           else
               cvNamedWindow( "Histogram", 1 );
           break;
       default:
           ;
       }
   }

   cvReleaseCapture( &capture );
   cvDestroyWindow("CamShiftDemo");

   return 0;
}

#ifdef _EiC
main(1,"camshiftdemo.c");
#endif 这里主要介绍下MeanShift算法的迭代过程，毕竟Camshift算法是以它为核心的。MeanShift算法是一种寻找局部极值的方法。做为一种直观上的理解是它一步一步爬向最高点即爬山算法.而怎么个爬法，用计算出的重心做为下一步窗口的中心，直到窗口的位置不再变化。在理解MeanShift算法的时候，可以先不加入核函数（是计算距离对统计分布的影响）和权重函数（如人为主观的影响）。  
在Camshift算法中MeanShift是通过1阶矩除以0阶矩来计算重心的。其算法的代码如下：
代码
CV_IMPL intcvMeanShift( const void* imgProb, CvRect windowIn,             CvTermCriteria criteria, CvConnectedComp* comp ){    CvMoments moments;    int    i = 0, eps;    CvMat  stub, *mat = (CvMat*)imgProb;//输入的整个图像    CvMat  cur_win;    CvRect cur_rect = windowIn;//当前矩形窗口初始化为输入窗口    CV_FUNCNAME( "cvMeanShift" );    if( comp )        comp->rect = windowIn;//初始化联通区域    moments.m00 = moments.m10 = moments.m01 = 0; //初始化0、1阶矩    __BEGIN__;    CV_CALL( mat = cvGetMat( mat, &stub ));    if( CV_MAT_CN( mat->type ) > 1 )        CV_ERROR( CV_BadNumChannels, cvUnsupportedFormat );    if( windowIn.height <= 0 || windowIn.width <= 0 )        CV_ERROR( CV_StsBadArg, "Input window has non-positive sizes" );    if( windowIn.x < 0 || windowIn.x + windowIn.width > mat->cols ||      //x，y是指角点坐标而不是中心坐标        windowIn.y < 0 || windowIn.y + windowIn.height > mat->rows )        CV_ERROR( CV_StsBadArg, "Initial window is not inside the image ROI" );    CV_CALL( criteria = cvCheckTermCriteria( criteria, 1., 100 ));//迭代的结束条件，    eps = cvRound( criteria.epsilon * criteria.epsilon );    for( i = 0; i < criteria.max_iter; i++ )    {        int dx, dy, nx, ny;        double inv_m00;        CV_CALL( cvGetSubRect( mat, &cur_win, cur_rect )); //cur_win指向窗口内的数据        CV_CALL( cvMoments( &cur_win, &moments ));         //计算窗口内的各种矩        /* Calculating center of mass */        if( fabs(moments.m00) < DBL_EPSILON )            break;        inv_m00 = moments.inv_sqrt_m00*moments.inv_sqrt_m00;        dx = cvRound( moments.m10 * inv_m00 - windowIn.width*0.5 );//中心点的坐标-宽的一半        dy = cvRound( moments.m01 * inv_m00 - windowIn.height*0.5 );//中心点的坐标-高的一半        nx = cur_rect.x + dx;//新的x坐标        ny = cur_rect.y + dy;//新的y坐标        if( nx < 0 )            nx = 0;        else if( nx + cur_rect.width > mat->cols )            nx = mat->cols - cur_rect.width;        if( ny < 0 )            ny = 0;        else if( ny + cur_rect.height > mat->rows )            ny = mat->rows - cur_rect.height;        dx = nx - cur_rect.x;//重新        dy = ny - cur_rect.y;        cur_rect.x = nx;     //新窗口的坐标值        cur_rect.y = ny;        /* Check for coverage centers mass & window */        if( dx*dx + dy*dy < eps )    //迭代终止            break;    }    __END__;    if( comp )//返回矩形和0阶矩    {        comp->rect = cur_rect;        comp->area = (float)moments.m00;    }    return i;  //返回迭代次数}
 
Camshift算法代码：
 
代码
CV_IMPL intcvCamShift( const void* imgProb, CvRect windowIn,            CvTermCriteria criteria,            CvConnectedComp* _comp,            CvBox2D* box ){    const int TOLERANCE = 10;    CvMoments moments;    double m00 = 0, m10, m01, mu20, mu11, mu02, inv_m00;    double a, b, c, xc, yc;    double rotate_a, rotate_c;    double theta = 0, square;    double cs, sn;    double length = 0, width = 0;    int itersUsed = 0;    CvConnectedComp comp;    CvMat  cur_win, stub, *mat = (CvMat*)imgProb;    CV_FUNCNAME( "cvCamShift" );    comp.rect = windowIn;//初始化comp    __BEGIN__;    CV_CALL( mat = cvGetMat( mat, &stub ));    CV_CALL( itersUsed = cvMeanShift( mat, windowIn, criteria, &comp ));//调用meanshift计算质心    windowIn = comp.rect;//获得新的窗口的位置    //为了容错，窗口的四边都增大了TOLERANCE    windowIn.x -= TOLERANCE;    if( windowIn.x < 0 )        windowIn.x = 0;    windowIn.y -= TOLERANCE;    if( windowIn.y < 0 )        windowIn.y = 0;    windowIn.width += 2 * TOLERANCE;    if( windowIn.x + windowIn.width > mat->width )        windowIn.width = mat->width - windowIn.x;    windowIn.height += 2 * TOLERANCE;    if( windowIn.y + windowIn.height > mat->height )        windowIn.height = mat->height - windowIn.y;    CV_CALL( cvGetSubRect( mat, &cur_win, windowIn ));//获得指向子窗口的数据指针    /* Calculating moments in new center mass */    CV_CALL( cvMoments( &cur_win, &moments ));//重新计算窗口内的各种矩    m00 = moments.m00;    m10 = moments.m10;    m01 = moments.m01;    mu11 = moments.mu11;    mu20 = moments.mu20;    mu02 = moments.mu02;    if( fabs(m00) < DBL_EPSILON )        EXIT;    inv_m00 = 1. / m00;    xc = cvRound( m10 * inv_m00 + windowIn.x );//新的中心坐标    yc = cvRound( m01 * inv_m00 + windowIn.y );    a = mu20 * inv_m00;    b = mu11 * inv_m00;    c = mu02 * inv_m00;    /* Calculating width & height */    square = sqrt( 4 * b * b + (a - c) * (a - c) );    /* Calculating orientation */    theta = atan2( 2 * b, a - c + square );    /* Calculating width & length of figure */    cs = cos( theta );    sn = sin( theta );    rotate_a = cs * cs * mu20 + 2 * cs * sn * mu11 + sn * sn * mu02;    rotate_c = sn * sn * mu20 - 2 * cs * sn * mu11 + cs * cs * mu02;    length = sqrt( rotate_a * inv_m00 ) * 4;//长与宽的计算    width = sqrt( rotate_c * inv_m00 ) * 4;    /* In case, when tetta is 0 or 1.57... the Length & Width may be exchanged */    if( length < width )    {        double t;                CV_SWAP( length, width, t );        CV_SWAP( cs, sn, t );        theta = CV_PI*0.5 - theta;    }    /* Saving results */    //由于有宽和高的重新计算，使得能自动调整窗口大小    if( _comp || box )    {        int t0, t1;        int _xc = cvRound( xc );//取整        int _yc = cvRound( yc );        t0 = cvRound( fabs( length * cs ));        t1 = cvRound( fabs( width * sn ));        t0 = MAX( t0, t1 ) + 2;//宽的重新计算        comp.rect.width = MIN( t0, (mat->width - _xc) * 2 );//保证宽不超出范围        t0 = cvRound( fabs( length * sn ));        t1 = cvRound( fabs( width * cs ));        t0 = MAX( t0, t1 ) + 2;//高的重新计算        comp.rect.height = MIN( t0, (mat->height - _yc) * 2 );//保证高不超出范围        comp.rect.x = MAX( 0, _xc - comp.rect.width / 2 );        comp.rect.y = MAX( 0, _yc - comp.rect.height / 2 );        comp.rect.width = MIN( mat->width - comp.rect.x, comp.rect.width );        comp.rect.height = MIN( mat->height - comp.rect.y, comp.rect.height );        comp.area = (float) m00;    }    __END__;    if( _comp )        *_comp = comp;        if( box )    {        box->size.height = (float)length;        box->size.width = (float)width;        box->angle = (float)(theta*180./CV_PI);        box->center = cvPoint2D32f( comp.rect.x + comp.rect.width*0.5f,                                    comp.rect.y + comp.rect.height*0.5f);    }    return itersUsed;}