opencv HOG中detectMultiScale函数详解

来源：互联网发布：兔家公子淘宝店真吗编辑：程序博客网时间：2024/04/27 17:17

参考：http://www.cnblogs.com/tornadomeet/archive/2012/08/15/2640754.html

函数作用：进行多尺度目标检测


函数接口   
void HOGDescriptor::detectMultiScale(
    const Mat& img, vector<Rect>& foundLocations, vector<double>& foundWeights,
    double hitThreshold, Size winStride, Size padding,
    double scale0, double finalThreshold, bool useMeanshiftGrouping) const                                                                           
参数注释
<1>img：源图像。
<2>foundlocations：检测出的物体的边缘。
<3>foundWeights: 检测窗口得分
<4>hit_threshold：阀值，特征向量和SVM划分超平面的距离，大于这个值的才作为目标返回。
<4>win_stride：窗口步长，必须是block步长的整数倍。
<5>padding：图片边缘补齐参数，gpu版本必须是(0,0)。
<6>scale0：检测窗口增长参数。
<7>finalThreshold：检测结果聚类参数
<8>useMeanshiftGrouping:聚类方式选择的参数

   代码注释：

973 //返回测试图片中水平方向和垂直方向共有多少个检测窗口，不能整除的话，多于的边界会不被计算在内？

974 Size HOGCache::windowsInImage(Size imageSize, Size winStride) const

975 {

976 return Size((imageSize.width - winSize.width)/winStride.width + 1,

977 (imageSize.height - winSize.height)/winStride.height + 1);

978 }

979

980

981 //给定图片的大小，已经检测窗口滑动的大小和测试图片中的检测窗口的索引，得到该索引处

982 //检测窗口的尺寸，包括坐标信息

983 Rect HOGCache::getWindow(Size imageSize, Size winStride, int idx) const

984 {

985 int nwindowsX = (imageSize.width - winSize.width)/winStride.width + 1;

986 int y = idx / nwindowsX;//商

987 int x = idx - nwindowsX*y;//余数

988 return Rect( x*winStride.width, y*winStride.height, winSize.width, winSize.height );

989 }

990

991

992 void HOGDescriptor::compute(const Mat& img, vector<float>& descriptors,

993 Size winStride, Size padding,

994 const vector<Point>& locations) const

995 {

996 //Size()表示长和宽都是0

997 if( winStride == Size() )

998 winStride = cellSize;

999 //gcd为求最大公约数，如果采用默认值的话，则2者相同

1000 Size cacheStride(gcd(winStride.width, blockStride.width),

1001 gcd(winStride.height, blockStride.height));

1002 size_t nwindows = locations.size();

1003 //alignSize(m, n)返回n的倍数大于等于m的最小值

1004 padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width);

1005 padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);

1006 Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2);

1007

1008 HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);

1009

1010 if( !nwindows )

1011 //Mat::area()表示为Mat的面积

1012 nwindows = cache.windowsInImage(paddedImgSize, winStride).area();

1013

1014 const HOGCache::BlockData* blockData = &cache.blockData[0];

1015

1016 int nblocks = cache.nblocks.area();

1017 int blockHistogramSize = cache.blockHistogramSize;

1018 size_t dsize = getDescriptorSize();//一个hog的描述长度

1019 //resize()为改变矩阵的行数，如果减少矩阵的行数则只保留减少后的

1020 //那些行，如果是增加行数，则保留所有的行。

1021 //这里将描述子长度扩展到整幅图片

1022 descriptors.resize(dsize*nwindows);

1023

1024 for( size_t i = 0; i < nwindows; i++ )

1025 {

1026 //descriptor为第i个检测窗口的描述子首位置。

1027 float* descriptor = &descriptors[i*dsize];

1028

1029 Point pt0;

1030 //非空

1031 if( !locations.empty() )

1032 {

1033 pt0 = locations[i];

1034 //非法的点

1035 if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||

1036 pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height )

1037 continue;

1038 }

1039 //locations为空

1040 else

1041 {

1042 //pt0为没有扩充前图像对应的第i个检测窗口

1043 pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl() - Point(padding);

1044 CV_Assert(pt0.x % cacheStride.width == 0 && pt0.y % cacheStride.height == 0);

1045 }

1046

1047 for( int j = 0; j < nblocks; j++ )

1048 {

1049 const HOGCache::BlockData& bj = blockData[j];

1050 //pt为block的左上角相对检测图片的坐标

1051 Point pt = pt0 + bj.imgOffset;

1052

1053 //dst为该block在整个测试图片的描述子的位置

1054 float* dst = descriptor + bj.histOfs;

1055 const float* src = cache.getBlock(pt, dst);

1056 if( src != dst )

1057 #ifdef HAVE_IPP

1058 ippsCopy_32f(src,dst,blockHistogramSize);

1059 #else

1060 for( int k = 0; k < blockHistogramSize; k++ )

1061 dst[k] = src[k];

1062 #endif

1063 }

1064 }

1065 }

1066

1067

1068 void HOGDescriptor::detect(const Mat& img,

1069 vector<Point>& hits, vector<double>& weights, double hitThreshold,

1070 Size winStride, Size padding, const vector<Point>& locations) const

1071 {

1072 //hits里面存的是符合检测到目标的窗口的左上角顶点坐标

1073 hits.clear();

1074 if( svmDetector.empty() )//svm算子不能为空，因为这是HOGDescriptor类的成员函数，里面用了很多成员变量

1075 return;

1076

1077 if( winStride == Size() )//如果窗口步长为0 ,则将其设为cell的大小

1078 winStride = cellSize;

1079 Size cacheStride(gcd(winStride.width, blockStride.width), //CacheStride为winStride和BlockStride的最大公约数

1080 gcd(winStride.height, blockStride.height));

1081 size_t nwindows = locations.size();//locations为预先传入的窗口子集，在这个子集中求目标，这个版本中没有用

1082 padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width);//将padding改成大于等于padding ,但是可以被cacheStride整除的最小数

1083 padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);

1084 Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2);//padding 以后的图片大小

1085 //这个结构的应该是应该是保存HOG描述子和其一些列参数的，构造函数会将一切数据都算好

1086 HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);

1087

1088 if( !nwindows )

1089 nwindows = cache.windowsInImage(paddedImgSize, winStride).area();//图片包含的检测窗口的个数

1090 //BlockData结构体是对应的block数据的偏移量。histOfs和imgOffset.其中histOfs表示为该block对整个滑动窗口内hog描述算子的贡献那部分向量的起始位置；imgOffset为该block在滑动窗口图片中的坐标(左上角坐标)。

1091 const HOGCache::BlockData* blockData = &cache.blockData[0];

1092

1093 int nblocks = cache.nblocks.area();//每个检测窗口的block数量

1094 int blockHistogramSize = cache.blockHistogramSize;//每个block直方图的维数

1095 size_t dsize = getDescriptorSize();

1096

1097 double rho = svmDetector.size() > dsize ? svmDetector[dsize] : 0;//判断有没有加偏移量，rho

1098 vector<float> blockHist(blockHistogramSize);

1099

1100 for( size_t i = 0; i < nwindows; i++ )//遍历每一个window将其得分与hitThreshold看其是否是目标物

1101 {

1102 Point pt0;

1103 if( !locations.empty() )

1104 {

1105 pt0 = locations[i];

1106 if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||

1107 pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height )

1108 continue;

1109 }

1110 else

1111 { //给定padding后图片的大小，返回第i个滑动窗口在原图片中的坐标信息，得到该索引处

1112 pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl() - Point(padding);

1113 CV_Assert(pt0.x % cacheStride.width == 0 && pt0.y % cacheStride.height == 0);

1114 }

1115 double s = rho;

1116 //svmVec指向svmDetector最前面那个元素

1117 const float* svmVec = &svmDetector[0];

1118 #ifdef HAVE_IPP

1119 int j;

1120 #else

1121 int j, k;

1122 #endif

1123 for( j = 0; j < nblocks; j++, svmVec += blockHistogramSize )

1124 {

1125 const HOGCache::BlockData& bj = blockData[j];//当前block在window中的偏移量

1126 Point pt = pt0 + bj.imgOffset;//pt0为window在待检测图片中的偏移量，pt是当前block在图片中的偏移量

1127

1128 //vec为测试图片pt处的block贡献的描述子指针

1129 const float* vec = cache.getBlock(pt, &blockHist[0]);//函数返回一个block描述子的指针

1130 #ifdef HAVE_IPP

1131 Ipp32f partSum;

1132 ippsDotProd_32f(vec,svmVec,blockHistogramSize,&partSum);

1133 s += (double)partSum;

1134 #else

1135 for( k = 0; k <= blockHistogramSize - 4; k += 4 ) //描述子与svm向量相乘

1136 //const float* svmVec = &svmDetector[0];

1137 s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] +

1138 vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3];

1139 for( ; k < blockHistogramSize; k++ )

1140 s += vec[k]*svmVec[k];

1141 #endif

1142 }

1143 if( s >= hitThreshold )//s是上一个for循环中每个block累加的结果，s即当前window的检测得分

1144 {

1145 hits.push_back(pt0);

1146 weights.push_back(s);

1147 }

1148 }

1149 }

1150

1151 //不用保留检测到目标的可信度，即权重

1152 void HOGDescriptor::detect(const Mat& img, vector<Point>& hits, double hitThreshold,

1153 Size winStride, Size padding, const vector<Point>& locations) const

1154 {

1155 vector<double> weightsV;

1156 detect(img, hits, weightsV, hitThreshold, winStride, padding, locations);

1157 }

1158

1159 struct HOGInvoker

1160 {

1161 HOGInvoker( const HOGDescriptor* _hog, const Mat& _img,

1162 double _hitThreshold, Size _winStride, Size _padding,

1163 const double* _levelScale, ConcurrentRectVector* _vec,

1164 ConcurrentDoubleVector* _weights=0, ConcurrentDoubleVector* _scales=0 )

1165 {

1166 hog = _hog;

1167 img = _img;

1168 hitThreshold = _hitThreshold;

1169 winStride = _winStride;

1170 padding = _padding;

1171 levelScale = _levelScale;

1172 vec = _vec;

1173 weights = _weights;

1174 scales = _scales;

1175 }

1176

1177 void operator()( const BlockedRange& range ) const

1178 {

1179 int i, i1 = range.begin(), i2 = range.end();

1180 double minScale = i1 > 0 ? levelScale[i1] : i2 > 1 ? levelScale[i1+1] : std::max(img.cols, img.rows);//当i1=0,i2=1时 minScale取max(img.cols, img.rows)

1181 //缩放的最大尺寸，缩放之后的图像不会达到这个尺寸

1182 Size maxSz(cvCeil(img.cols/minScale), cvCeil(img.rows/minScale));

1183 Mat smallerImgBuf(maxSz, img.type());//当i1==0时smallerImgBuf的大小为1*1，可能是因为i1==0时没有尺寸缩放，没有尺寸缩放时不需要smallerImgBuf来初始化

1184 vector<Point> locations;

1185 vector<double> hitsWeights;

1186

1187 for( i = i1; i < i2; i++ )

1188 {

1189 double scale = levelScale[i];

1190 Size sz(cvRound(img.cols/scale), cvRound(img.rows/scale));

1191 //smallerImg只是构造一个指针，并没有复制数据

1192 Mat smallerImg(sz, img.type(), smallerImgBuf.data);

1193 //没有尺寸缩放

1194 if( sz == img.size() )

1195 smallerImg = Mat(sz, img.type(), img.data, img.step);

1196 //有尺寸缩放

1197 else

1198 resize(img, smallerImg, sz);

1199 //检测的实际函数，该函数实际上是将返回的值存在locations和histWeights中

1200 //其中locations存的是目标区域的左上角坐标

1201 hog->detect(smallerImg, locations, hitsWeights, hitThreshold, winStride, padding);

1202 Size scaledWinSize = Size(cvRound(hog->winSize.width*scale), cvRound(hog->winSize.height*scale));//计算目标区域的大小

1203 for( size_t j = 0; j < locations.size(); j++ )

1204 {

1205 //保存目标区域

1206 vec->push_back(Rect(cvRound(locations[j].x*scale),

1207 cvRound(locations[j].y*scale),

1208 scaledWinSize.width, scaledWinSize.height));

1209 //保存缩放尺寸

1210 if (scales) {

1211 scales->push_back(scale);

1212 }

1213 }

1214 //保存svm计算后的结果值，weight指针有效才保存

1215 if (weights && (!hitsWeights.empty()))

1216 {

1217 for (size_t j = 0; j < locations.size(); j++)

1218 {

1219 weights->push_back(hitsWeights[j]);

1220 }

1221 }

1222 }

1223 }

1224

1225 const HOGDescriptor* hog;

1226 Mat img;

1227 double hitThreshold;

1228 Size winStride;

1229 Size padding;

1230 const double* levelScale;

1232 ConcurrentRectVector* vec;

1234 ConcurrentDoubleVector* weights;

1235 ConcurrentDoubleVector* scales;

1236 };

1237

1238

1239 void HOGDescriptor::detectMultiScale(

1240 const Mat& img, vector<Rect>& foundLocations, vector<double>& foundWeights,

1241 double hitThreshold, Size winStride, Size padding,

1242 double scale0, double finalThreshold, bool useMeanshiftGrouping) const

1243 {

1244 double scale = 1.;

1245 int levels = 0;

1246

1247 vector<double> levelScale;//保存图片将要缩放的尺度

1249 //nlevels默认的是64层 scale0是图像缩小参数

1250 for( levels = 0; levels < nlevels; levels++ )

1251 {

1252 levelScale.push_back(scale);

1257 //只考虑测试图片尺寸比检测窗口尺寸大以及scale0>1的情况，

//不符合要求中断循环。所以nlevel大一点没关系（并不会特别影响速度），关键的参数其实是scale0

1253 if( cvRound(img.cols/scale) < winSize.width ||

1254 cvRound(img.rows/scale) < winSize.height ||

1255 scale0 <= 1 )

1256 break;

1258 scale *= scale0;

1259 }

1260 levels = std::max(levels, 1);

1261 levelScale.resize(levels);

1262

1263 ConcurrentRectVector allCandidates;

1264 ConcurrentDoubleVector tempScales;

1265 ConcurrentDoubleVector tempWeights;

1266 vector<double> foundScales;

1267

1268 //TBB并行计算,会将参数range 传到HOGInvoker结构体的（）重载函数中，在这个里面对各个尺度的目标图片进行检测

1269 parallel_for(Range(0, (int)levelScale.size()),

1270 HOGInvoker(this, img, hitThreshold, winStride, padding, &levelScale[0], &allCandidates, &tempWeights, &tempScales));

1271 //将tempScales中的内容复制到foundScales中；这个参数其实没有什么用，保存的是检测到目标的图像对应的尺度

1272 std::copy(tempScales.begin(), tempScales.end(), back_inserter(foundScales));

1274 foundLocations.clear();

1275 //将候选目标窗口保存在foundLocations中

1276 std::copy(allCandidates.begin(), allCandidates.end(), back_inserter(foundLocations));

1277 foundWeights.clear();

1278 //将候选目标可信度保存在foundWeights中

1279 std::copy(tempWeights.begin(), tempWeights.end(), back_inserter(foundWeights));

1280 //对矩形框进行聚类

1281 if ( useMeanshiftGrouping )

1282 {

1283 groupRectangles_meanshift(foundLocations, foundWeights, foundScales, finalThreshold, winSize);

1284 }

1285 else

1286 {

1288 groupRectangles(foundLocations, (int)finalThreshold, 0.2);

1289 }

1290 }

1291

1292 //不考虑目标的置信度，通过调用包含置信度的版本

1293 void HOGDescriptor::detectMultiScale(const Mat& img, vector<Rect>& foundLocations,

1294 double hitThreshold, Size winStride, Size padding,

1295 double scale0, double finalThreshold, bool useMeanshiftGrouping) const

1296 {

1297 vector<double> foundWeights;

1298 detectMultiScale(img, foundLocations, foundWeights, hitThreshold, winStride,

1299 padding, scale0, finalThreshold, useMeanshiftGrouping);

1300 }