Rob Hess的SIFT程序详细解释

来源:互联网 发布:骨科生物力学 知乎 编辑:程序博客网 时间:2024/04/28 09:23

最近也一直在研究SIFT(尺度不变特征匹配算法),先理解算法的数学原理,然后再一步步分析Rob Hess的程序代码:

/** default number of sampled intervals per octave sub-level 每个尺度空间的采样间隔数*/
//初始值为3
#define SIFT_INTVLS 3

/** default sigma for initial gaussian smoothing 高斯平滑的数目*/
//初始值是1.6
#define SIFT_SIGMA 1.6

/** default threshold on keypoint contrast |D(x)| 阈值点 取值0-1 小于这个即剔除低对比度的伪特征点*/
// 值越大,被剔除的特征点就越多
#define SIFT_CONTR_THR 0.04
//初始值是0.04

/** default threshold on keypoint ratio of principle curvatures */
//主曲率阈值r=10,大于10即去掉边缘响应点
#define SIFT_CURV_THR 10
//初始值是10

/** double image size before pyramid construction? 金字塔构造 */
//在建立尺度空间之前如果原图像放大一倍则取值为1,否则为0
#define SIFT_IMG_DBL 1

/** default width of descriptor histogram array */
//descr_width:计算特征描述符时邻域子块的宽度,
#define SIFT_DESCR_WIDTH 4
//初始值为4

/** default number of bins per histogram in descriptor array */
//计算特征描述符时将特征点邻域进行投影的方向数,默认为8,分别是0,45,90,135 180,225,270,315共8个方向。
#define SIFT_DESCR_HIST_BINS 8

/* assumed gaussian blur for input image 为了产生更多的keypointsLowe在SIFT算法的开始
通过线性插值将原始图像扩大一倍,并且进行了一个σ=0.5的高斯平滑预处理 */
#define SIFT_INIT_SIGMA 0.5
//初始值是0.5

/* width of border in which to ignore keypoints */
//忽略特征点的边界宽度
#define SIFT_IMG_BORDER 5

/* maximum steps of keypoint interpolation before failure 关键点插值的最大间隔*/
#define SIFT_MAX_INTERP_STEPS 5

/* default number of bins in histogram for orientation assignment 主方向分配的直方图数量*/
#define SIFT_ORI_HIST_BINS 36

/* determines gaussian sigma for orientation assignment */
//特征点主方向分配时高斯平滑σ为1.5倍特征点所在的尺度
#define SIFT_ORI_SIG_FCTR 1.5

/* determines the radius of the region used in orientation assignment */
//主方向分配时使用的区域半径
#define SIFT_ORI_RADIUS 3.0 * SIFT_ORI_SIG_FCTR

/* number of passes of orientation histogram smoothing  主方向直方图平滑*/
#define SIFT_ORI_SMOOTH_PASSES 2

/* orientation magnitude relative to max that results in new feature 方向与最大值的%80比较*/
//初始值0.8 阈值
#define SIFT_ORI_PEAK_RATIO 0.8

/* determines the size of a single descriptor orientation histogram 主方向直方图描述符尺寸*/
//初始值3.0
#define SIFT_DESCR_SCL_FCTR 3.0

/* threshold on magnitude of elements of descriptor vector 描述符向量元素幅度的阈值*/
//初始值是0.2 128维SIFT描述子中大于0.2的维度量截取为0.2
#define SIFT_DESCR_MAG_THR 0.2

/* factor used to convert floating-point descriptor to unsigned char 数据类型转换 */
//浮点数转换为无符号字符的因子值
#define SIFT_INT_DESCR_FCTR 512.0

/* returns a feature's detection data 特征提取函数,img代表需要进行特征提取的图像,
feat用来存储提取的特征向量,函数返回值为特征点数,如果失败返回-1 */
#define feat_detection_data(f) ( (struct detection_data*)(f->feature_data) )

 

特征点检测函数分析

int sift_features( IplImage* img, struct feature** feat )
{
 return _sift_features( img, feat, SIFT_INTVLS, SIFT_SIGMA, SIFT_CONTR_THR,
       SIFT_CURV_THR, SIFT_IMG_DBL, SIFT_DESCR_WIDTH,
       SIFT_DESCR_HIST_BINS );
}

/**
Finds SIFT features in an image using user-specified parameter values.  All
detected features are stored in the array pointed to by \a feat.

@param img the image in which to detect features
@param feat a pointer to an array in which to store detected features
@param intvls the number of intervals sampled per octave of scale space
@param sigma the amount of Gaussian smoothing applied to each image level
 before building the scale space representation for an octave
@param cont_thr a threshold on the value of the scale space function
 \f$\left|D(\hat{x})\right|\f$, where \f$\hat{x}\f$ is a vector specifying
 feature location and scale, used to reject unstable features;  assumes
 pixel values in the range [0, 1]
@param curv_thr threshold on a feature's ratio of principle curvatures主曲率
 used to reject features that are too edge-like
@param img_dbl should be 1 if image doubling prior to scale space
 construction is desired or 0 if not
@param descr_width the width, \f$n\f$, of the \f$n \times n\f$ array of
 orientation histograms used to compute a feature's descriptor
@param descr_hist_bins the number of orientations in each of the
 histograms in the array used to compute a feature's descriptor

@return Returns the number of keypoints stored in \a feat or -1 on failure
@see sift_keypoints()

intvls: 每个尺度空间的采样间隔数,默认值为3.
sigma: 高斯平滑的数量,默认值1.6.
contr_thr:判定特征点是否稳定,取值(0,1),默认为0.04,这个值越大,被剔除的特征点就越多。
curv_thr:判定特征点是否边缘点,默认为6.
img_dbl:在建立尺度空间前如果图像被放大了1倍则取值为1,否则为0.
descr_width:计算特征描述符时邻域子块的宽度,默认为4.
descr_hist_bins:计算特征描述符时将特征点邻域进行投影的方向数,默认为8,分别是0,45,90,135

,180,215,270,315共8个方向。

*/
int _sift_features( IplImage* img, struct feature** feat, int intvls,
       double sigma, double contr_thr, int curv_thr,
       int img_dbl, int descr_width, int descr_hist_bins )
{
 IplImage* init_img;
 IplImage*** gauss_pyr, *** dog_pyr;
 CvMemStorage* storage;
 CvSeq* features; //CvSeq定义非固定元素的序列
 int octvs, i, n = 0;

 /* check arguments 检查图像值是否合理*/
 if( ! img )
  fatal_error( "NULL pointer error, %s, line %d",  __FILE__, __LINE__ );

 if( ! feat )
  fatal_error( "NULL pointer error, %s, line %d",  __FILE__, __LINE__ );

 /* build scale space pyramid; smallest dimension of top level is ~4 pixels 建立金字塔尺度空间*/
 init_img = create_init_img( img, img_dbl, sigma ); //将图像转为8位灰度图像,然后再高斯平滑
 octvs = log( MIN( init_img->width, init_img->height ) ) / log(2) - 2;
 gauss_pyr = build_gauss_pyr( init_img, octvs, intvls, sigma ); //构建高斯尺度空间
 dog_pyr = build_dog_pyr( gauss_pyr, octvs, intvls ); //构建DOG尺度空间

 storage = cvCreateMemStorage( 0 );
 features = scale_space_extrema( dog_pyr, octvs, intvls, contr_thr,
  curv_thr, storage ); //返回特征点尺度,位置
 calc_feature_scales( features, sigma, intvls ); //在每一组中计算尺度值

 if( img_dbl ) //如果图像放大了一倍
  adjust_for_img_dbl( features ); //在构造尺度空间之前将图像的坐标及尺度扩大一倍
 calc_feature_oris( features, gauss_pyr ); //计算关键点主方向
 compute_descriptors( features, gauss_pyr, descr_width, descr_hist_bins );//

 /* sort features by decreasing scale and move from CvSeq to array 按照降序尺度为特征点排序*/
 cvSeqSort( features, (CvCmpFunc)feature_cmp, NULL );
 n = features->total;
 *feat = calloc( n, sizeof(struct feature) );
 *feat = cvCvtSeqToArray( features, *feat, CV_WHOLE_SEQ );//复制序列中的元素到一个连续的内存块中
 for( i = 0; i < n; i++ )
 {
  free( (*feat)[i].feature_data );
  (*feat)[i].feature_data = NULL;
 }

 cvReleaseMemStorage( &storage );
 cvReleaseImage( &init_img );
 release_pyr( &gauss_pyr, octvs, intvls + 3 );
 release_pyr( &dog_pyr, octvs, intvls + 2 );
 return n;
}

程序检测效果: