Rob Hess的SIFT程序详细解释

来源：互联网发布：骨科生物力学知乎编辑：程序博客网时间：2024/04/28 09:23

最近也一直在研究SIFT（尺度不变特征匹配算法），先理解算法的数学原理，然后再一步步分析Rob Hess的程序代码：

/** default number of sampled intervals per octave sub-level 每个尺度空间的采样间隔数*/
//初始值为3
#define SIFT_INTVLS 3

/** default sigma for initial gaussian smoothing 高斯平滑的数目*/
//初始值是1.6
#define SIFT_SIGMA 1.6

/** default threshold on keypoint contrast |D(x)| 阈值点取值0-1 小于这个即剔除低对比度的伪特征点*/
// 值越大，被剔除的特征点就越多
#define SIFT_CONTR_THR 0.04
//初始值是0.04

/** default threshold on keypoint ratio of principle curvatures */
//主曲率阈值r=10，大于10即去掉边缘响应点
#define SIFT_CURV_THR 10
//初始值是10

/** double image size before pyramid construction? 金字塔构造 */
//在建立尺度空间之前如果原图像放大一倍则取值为1，否则为0
#define SIFT_IMG_DBL 1

/** default width of descriptor histogram array */
//descr_width:计算特征描述符时邻域子块的宽度，
#define SIFT_DESCR_WIDTH 4
//初始值为4

/** default number of bins per histogram in descriptor array */
//计算特征描述符时将特征点邻域进行投影的方向数，默认为8，分别是0，45，90，135 180，225，270，315共8个方向。
#define SIFT_DESCR_HIST_BINS 8

/* assumed gaussian blur for input image 为了产生更多的keypointsLowe在SIFT算法的开始
通过线性插值将原始图像扩大一倍，并且进行了一个σ=0.5的高斯平滑预处理 */
#define SIFT_INIT_SIGMA 0.5
//初始值是0.5

/* width of border in which to ignore keypoints */
//忽略特征点的边界宽度
#define SIFT_IMG_BORDER 5

/* maximum steps of keypoint interpolation before failure 关键点插值的最大间隔*/
#define SIFT_MAX_INTERP_STEPS 5

/* default number of bins in histogram for orientation assignment 主方向分配的直方图数量*/
#define SIFT_ORI_HIST_BINS 36

/* determines gaussian sigma for orientation assignment */
//特征点主方向分配时高斯平滑σ为1.5倍特征点所在的尺度
#define SIFT_ORI_SIG_FCTR 1.5

/* determines the radius of the region used in orientation assignment */
//主方向分配时使用的区域半径
#define SIFT_ORI_RADIUS 3.0 * SIFT_ORI_SIG_FCTR

/* number of passes of orientation histogram smoothing 主方向直方图平滑*/
#define SIFT_ORI_SMOOTH_PASSES 2

/* orientation magnitude relative to max that results in new feature 方向与最大值的%80比较*/
//初始值0.8 阈值
#define SIFT_ORI_PEAK_RATIO 0.8

/* determines the size of a single descriptor orientation histogram 主方向直方图描述符尺寸*/
//初始值3.0
#define SIFT_DESCR_SCL_FCTR 3.0

/* threshold on magnitude of elements of descriptor vector 描述符向量元素幅度的阈值*/
//初始值是0.2 128维SIFT描述子中大于0.2的维度量截取为0.2
#define SIFT_DESCR_MAG_THR 0.2

/* factor used to convert floating-point descriptor to unsigned char 数据类型转换 */
//浮点数转换为无符号字符的因子值
#define SIFT_INT_DESCR_FCTR 512.0

/* returns a feature's detection data 特征提取函数,img代表需要进行特征提取的图像，
feat用来存储提取的特征向量，函数返回值为特征点数，如果失败返回-1 */
#define feat_detection_data(f) ( (struct detection_data*)(f->feature_data) )

特征点检测函数分析

int sift_features( IplImage* img, struct feature** feat )
{
return _sift_features( img, feat, SIFT_INTVLS, SIFT_SIGMA, SIFT_CONTR_THR,
SIFT_CURV_THR, SIFT_IMG_DBL, SIFT_DESCR_WIDTH,
SIFT_DESCR_HIST_BINS );
}

/**
Finds SIFT features in an image using user-specified parameter values. All
detected features are stored in the array pointed to by \a feat.

@param img the image in which to detect features
@param feat a pointer to an array in which to store detected features
@param intvls the number of intervals sampled per octave of scale space
@param sigma the amount of Gaussian smoothing applied to each image level
before building the scale space representation for an octave
@param cont_thr a threshold on the value of the scale space function
\f$\left|D(\hat{x})\right|\f$, where \f$\hat{x}\f$ is a vector specifying
feature location and scale, used to reject unstable features; assumes
pixel values in the range [0, 1]
@param curv_thr threshold on a feature's ratio of principle curvatures主曲率
used to reject features that are too edge-like
@param img_dbl should be 1 if image doubling prior to scale space
construction is desired or 0 if not
@param descr_width the width, \f$n\f$, of the \f$n \times n\f$ array of
orientation histograms used to compute a feature's descriptor
@param descr_hist_bins the number of orientations in each of the
histograms in the array used to compute a feature's descriptor

@return Returns the number of keypoints stored in \a feat or -1 on failure
@see sift_keypoints()

intvls: 每个尺度空间的采样间隔数，默认值为3.
sigma: 高斯平滑的数量，默认值1.6.
contr_thr:判定特征点是否稳定，取值（0，1），默认为0.04，这个值越大，被剔除的特征点就越多。
curv_thr:判定特征点是否边缘点，默认为6.
img_dbl:在建立尺度空间前如果图像被放大了1倍则取值为1，否则为0.
descr_width:计算特征描述符时邻域子块的宽度，默认为4.
descr_hist_bins:计算特征描述符时将特征点邻域进行投影的方向数，默认为8，分别是0，45，90，135

，180，215，270，315共8个方向。

*/
int _sift_features( IplImage* img, struct feature** feat, int intvls,
double sigma, double contr_thr, int curv_thr,
int img_dbl, int descr_width, int descr_hist_bins )
{
IplImage* init_img;
IplImage*** gauss_pyr, *** dog_pyr;
CvMemStorage* storage;
CvSeq* features; //CvSeq定义非固定元素的序列
int octvs, i, n = 0;

/* check arguments 检查图像值是否合理*/
if( ! img )
fatal_error( "NULL pointer error, %s, line %d", __FILE__, __LINE__ );

if( ! feat )
fatal_error( "NULL pointer error, %s, line %d", __FILE__, __LINE__ );

/* build scale space pyramid; smallest dimension of top level is ~4 pixels 建立金字塔尺度空间*/
init_img = create_init_img( img, img_dbl, sigma ); //将图像转为8位灰度图像，然后再高斯平滑
octvs = log( MIN( init_img->width, init_img->height ) ) / log(2) - 2;
gauss_pyr = build_gauss_pyr( init_img, octvs, intvls, sigma ); //构建高斯尺度空间
dog_pyr = build_dog_pyr( gauss_pyr, octvs, intvls ); //构建DOG尺度空间

storage = cvCreateMemStorage( 0 );
features = scale_space_extrema( dog_pyr, octvs, intvls, contr_thr,
curv_thr, storage ); //返回特征点尺度，位置
calc_feature_scales( features, sigma, intvls ); //在每一组中计算尺度值

if( img_dbl ) //如果图像放大了一倍
adjust_for_img_dbl( features ); //在构造尺度空间之前将图像的坐标及尺度扩大一倍
calc_feature_oris( features, gauss_pyr ); //计算关键点主方向
compute_descriptors( features, gauss_pyr, descr_width, descr_hist_bins );//

/* sort features by decreasing scale and move from CvSeq to array 按照降序尺度为特征点排序*/
cvSeqSort( features, (CvCmpFunc)feature_cmp, NULL );
n = features->total;
*feat = calloc( n, sizeof(struct feature) );
*feat = cvCvtSeqToArray( features, *feat, CV_WHOLE_SEQ );//复制序列中的元素到一个连续的内存块中
for( i = 0; i < n; i++ )
{
free( (*feat)[i].feature_data );
(*feat)[i].feature_data = NULL;
}

cvReleaseMemStorage( &storage );
cvReleaseImage( &init_img );
release_pyr( &gauss_pyr, octvs, intvls + 3 );
release_pyr( &dog_pyr, octvs, intvls + 2 );
return n;
}

程序检测效果：