opencv HOG中detectMultiScale函数详解

来源:互联网 发布:兔家公子淘宝店真吗 编辑:程序博客网 时间:2024/04/27 17:17
参考:http://www.cnblogs.com/tornadomeet/archive/2012/08/15/2640754.html
  • 函数作用:进行多尺度目标检测

  • 函数接口   

void HOGDescriptor::detectMultiScale(

   const Mat& img, vector<Rect>& foundLocations, vector<double>& foundWeights,

   double hitThreshold, Size winStride, Size padding,

   double scale0, double finalThreshold, bool useMeanshiftGrouping) const                                                                           

参数注释

<1>img:源图像。
<2>foundlocations:检测出的物体的边缘。
<3>foundWeights: 检测窗口得分
<4>hit_threshold:阀值,特征向量和SVM划分超平面的距离,大于这个值的才作为目标返回。
<4>win_stride:窗口步长,必须是block步长的整数倍。
<5>padding:图片边缘补齐参数,gpu版本必须是(0,0)。
<6>scale0:检测窗口增长参数。
<7>finalThreshold:检测结果聚类参数
<8>useMeanshiftGrouping:聚类方式选择的参数
代码注释:  

973 //返回测试图片中水平方向和垂直方向共有多少个检测窗口,不能整除的话多于的边界会不被计算在内?

 974 Size HOGCache::windowsInImage(Size imageSize, Size winStride) const

 975 {

 976     return Size((imageSize.width - winSize.width)/winStride.width + 1,

 977                 (imageSize.height - winSize.height)/winStride.height + 1);

 978 }

 979

 980

 981 //给定图片的大小,已经检测窗口滑动的大小和测试图片中的检测窗口的索引,得到该索引处

 982 //检测窗口的尺寸,包括坐标信息

 983 Rect HOGCache::getWindow(Size imageSize, Size winStride, int idx) const

 984 {

 985     int nwindowsX = (imageSize.width - winSize.width)/winStride.width + 1;

 986     int y = idx / nwindowsX;//

 987     int x = idx - nwindowsX*y;//余数

 988     return Rect( x*winStride.width, y*winStride.height, winSize.width, winSize.height );

 989 }

 990

 991

 992 void HOGDescriptor::compute(const Mat& img, vector<float>& descriptors,

 993                             Size winStride, Size padding,

 994                             const vector<Point>& locations) const

 995 {

 996     //Size()表示长和宽都是0

 997     if( winStride == Size() )

 998         winStride = cellSize;

 999     //gcd为求最大公约数,如果采用默认值的话,则2者相同

1000     Size cacheStride(gcd(winStride.width, blockStride.width),

1001                      gcd(winStride.height, blockStride.height));

1002     size_t nwindows = locations.size();

1003     //alignSize(m, n)返回n的倍数大于等于m的最小值

1004     padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width);

1005     padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);

1006     Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2);

1007

1008     HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);

1009

1010     if( !nwindows )

1011         //Mat::area()表示为Mat的面积

1012         nwindows = cache.windowsInImage(paddedImgSize, winStride).area();

1013

1014     const HOGCache::BlockData* blockData = &cache.blockData[0];

1015

1016     int nblocks = cache.nblocks.area();

1017     int blockHistogramSize = cache.blockHistogramSize;

1018     size_t dsize = getDescriptorSize();//一个hog的描述长度

1019     //resize()为改变矩阵的行数,如果减少矩阵的行数则只保留减少后的

1020     //那些行,如果是增加行数,则保留所有的行。

1021     //这里将描述子长度扩展到整幅图片

1022     descriptors.resize(dsize*nwindows);

1023

1024     for( size_t i = 0; i < nwindows; i++ )

1025     {

1026         //descriptor为第i个检测窗口的描述子首位置。

1027         float* descriptor = &descriptors[i*dsize];

1028       

1029         Point pt0;

1030         //非空

1031         if( !locations.empty() )

1032         {

1033             pt0 = locations[i];

1034             //非法的点

1035             if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||

1036                 pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height )

1037                 continue;

1038         }

1039         //locations为空

1040         else

1041         {

1042             //pt0为没有扩充前图像对应的第i个检测窗口

1043             pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl() - Point(padding);

1044             CV_Assert(pt0.x % cacheStride.width == 0 && pt0.y % cacheStride.height == 0);

1045         }

1046

1047         forint j = 0; j < nblocks; j++ )

1048         {

1049             const HOGCache::BlockData& bj = blockData[j];

1050             //ptblock的左上角相对检测图片的坐标

1051             Point pt = pt0 + bj.imgOffset;

1052

1053             //dst为该block在整个测试图片的描述子的位置

1054             float* dst = descriptor + bj.histOfs;

1055             const float* src = cache.getBlock(pt, dst);

1056             if( src != dst )

1057 #ifdef HAVE_IPP

1058                ippsCopy_32f(src,dst,blockHistogramSize);

1059 #else

1060                 forint k = 0; k < blockHistogramSize; k++ )

1061                     dst[k] = src[k];

1062 #endif

1063         }

1064     }

1065 }

1066

1067

1068 void HOGDescriptor::detect(const Mat& img,

1069     vector<Point>& hits, vector<double>& weights, double hitThreshold,

1070     Size winStride, Size padding, const vector<Point>& locations) const

1071 {

1072     //hits里面存的是符合检测到目标的窗口的左上角顶点坐标

1073     hits.clear();

1074     if( svmDetector.empty() )//svm算子不能为空,因为这是HOGDescriptor类的成员函数,里面用了很多成员变量

1075         return;

1076

1077     if( winStride == Size() )//如果窗口步长为0 ,则将其设为cell的大小

1078         winStride = cellSize;

1079     Size cacheStride(gcd(winStride.width, blockStride.width),  //CacheStridewinStrideBlockStride的最大公约数

1080                      gcd(winStride.height, blockStride.height));

1081     size_t nwindows = locations.size();//locations为预先传入的窗口子集,在这个子集中求目标,这个版本中没有用

1082     padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width);//padding改成大于等于padding ,但是可以被cacheStride整除的最小数

1083     padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);

1084     Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2);//padding 以后的图片大小

1085     //这个结构的应该是应该是保存HOG描述子和其一些列参数的,构造函数会将一切数据都算好

1086     HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);

1087

1088     if( !nwindows )

1089         nwindows = cache.windowsInImage(paddedImgSize, winStride).area();//图片包含的检测窗口的个数

1090     //BlockData结构体是对应的block数据的偏移量。histOfsimgOffset.其中histOfs表示为该block对整个滑动窗口内hog描述算子的贡献那部分向量的起始位置;imgOffset为该block在滑动窗口图片中的坐标(左上角坐标)

1091     const HOGCache::BlockData* blockData = &cache.blockData[0];

1092

1093     int nblocks = cache.nblocks.area();//每个检测窗口的block数量

1094     int blockHistogramSize = cache.blockHistogramSize;//每个block直方图的维数

1095     size_t dsize = getDescriptorSize();

1096

1097     double rho = svmDetector.size() > dsize ? svmDetector[dsize] : 0;//判断有没有加偏移量,rho

1098     vector<float> blockHist(blockHistogramSize);

1099

1100     for( size_t i = 0; i < nwindows; i++ )//遍历每一个window将其得分与hitThreshold看其是否是目标物

1101     {

1102         Point pt0;

1103         if( !locations.empty() )

1104         {

1105             pt0 = locations[i];

1106             if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||

1107                 pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height )

1108                 continue;

1109         }

1110         else

1111         {   //给定padding后图片的大小,返回第i个滑动窗口在原图片中的坐标信息,得到该索引处
1112             pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl() - Point(padding);

1113             CV_Assert(pt0.x % cacheStride.width == 0 && pt0.y % cacheStride.height == 0);

1114         }

1115         double s = rho;

1116         //svmVec指向svmDetector最前面那个元素

1117         const float* svmVec = &svmDetector[0];

1118 #ifdef HAVE_IPP

1119         int j;

1120 #else

1121         int j, k;

1122 #endif

1123         for( j = 0; j < nblocks; j++, svmVec += blockHistogramSize )

1124         {

1125             const HOGCache::BlockData& bj = blockData[j];//当前blockwindow中的偏移量

1126             Point pt = pt0 + bj.imgOffset;//pt0window在待检测图片中的偏移量,pt是当前block在图片中的偏移量

1127            

1128             //vec为测试图片pt处的block贡献的描述子指针

1129             const float* vec = cache.getBlock(pt, &blockHist[0]);//函数返回一个block描述子的指针

1130 #ifdef HAVE_IPP

1131             Ipp32f partSum;

1132             ippsDotProd_32f(vec,svmVec,blockHistogramSize,&partSum);

1133             s += (double)partSum;

1134 #else

1135             for( k = 0; k <= blockHistogramSize - 4; k += 4 )  //描述子与svm向量相乘

1136                 //const float* svmVec = &svmDetector[0];

1137                 s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] +

1138                     vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3];

1139             for( ; k < blockHistogramSize; k++ )

1140                 s += vec[k]*svmVec[k];

1141 #endif

1142         }

1143         if( s >= hitThreshold )//s是上一个for循环中每个block累加的结果,s即当前window的检测得分

1144         {

1145             hits.push_back(pt0);

1146             weights.push_back(s);

1147         }

1148     }

1149 }

1150

1151 //不用保留检测到目标的可信度,即权重

1152 void HOGDescriptor::detect(const Mat& img, vector<Point>& hits, double hitThreshold,

1153                            Size winStride, Size padding, const vector<Point>& locations) const

1154 {

1155     vector<double> weightsV;

1156     detect(img, hits, weightsV, hitThreshold, winStride, padding, locations);

1157 }

1158

1159 struct HOGInvoker

1160 {

1161     HOGInvoker( const HOGDescriptor* _hog, const Mat& _img,

1162                 double _hitThreshold, Size _winStride, Size _padding,

1163                 const double* _levelScale, ConcurrentRectVector* _vec,

1164                 ConcurrentDoubleVector* _weights=0, ConcurrentDoubleVector* _scales=0 )

1165     {

1166         hog = _hog;

1167         img = _img;

1168         hitThreshold = _hitThreshold;

1169         winStride = _winStride;

1170         padding = _padding;

1171         levelScale = _levelScale;

1172         vec = _vec;

1173         weights = _weights;

1174         scales = _scales;

1175     }

1176

1177     void operator()( const BlockedRange& range ) const

1178     {

1179         int i, i1 = range.begin(), i2 = range.end();

1180         double minScale = i1 > 0 ? levelScale[i1] : i2 > 1 ? levelScale[i1+1] : std::max(img.cols, img.rows);//i1=0,i2=1 minScalemax(img.cols, img.rows)

1181         //缩放的最大尺寸,缩放之后的图像不会达到这个尺寸

1182         Size maxSz(cvCeil(img.cols/minScale), cvCeil(img.rows/minScale));

1183         Mat smallerImgBuf(maxSz, img.type());//i1==0smallerImgBuf的大小为1*1,可能是因为i1==0时没有尺寸缩放,没有尺寸缩放时不需要smallerImgBuf来初始化


1184         vector<Point> locations;

1185         vector<double> hitsWeights;

1186

1187         for( i = i1; i < i2; i++ )

1188         {

1189             double scale = levelScale[i];

1190             Size sz(cvRound(img.cols/scale), cvRound(img.rows/scale));

1191             //smallerImg只是构造一个指针,并没有复制数据

1192             Mat smallerImg(sz, img.type(), smallerImgBuf.data);

1193             //没有尺寸缩放

1194             if( sz == img.size() )

1195                 smallerImg = Mat(sz, img.type(), img.data, img.step);

1196             //有尺寸缩放

1197             else

1198                 resize(img, smallerImg, sz);



1199             //检测的实际函数,该函数实际上是将返回的值存在locationshistWeights

1200             //其中locations存的是目标区域的左上角坐标

1201             hog->detect(smallerImg, locations, hitsWeights, hitThreshold, winStride, padding);

1202             Size scaledWinSize = Size(cvRound(hog->winSize.width*scale), cvRound(hog->winSize.height*scale));//计算目标区域的大小

1203             for( size_t j = 0; j < locations.size(); j++ )

1204             {

1205                 //保存目标区域

1206                 vec->push_back(Rect(cvRound(locations[j].x*scale),

1207                                     cvRound(locations[j].y*scale),

1208                                     scaledWinSize.width, scaledWinSize.height));

1209                 //保存缩放尺寸

1210                 if (scales) {

1211                     scales->push_back(scale);

1212                 }

1213             }

1214             //保存svm计算后的结果值,weight指针有效才保存

1215             if (weights && (!hitsWeights.empty()))

1216             {

1217                 for (size_t j = 0; j < locations.size(); j++)

1218                 {

1219                     weights->push_back(hitsWeights[j]);

1220                 }

1221             }       

1222         }

1223     }

1224

1225     const HOGDescriptor* hog;

1226     Mat img;

1227     double hitThreshold;

1228     Size winStride;

1229     Size padding;

1230     const double* levelScale;

1232     ConcurrentRectVector* vec;

1234     ConcurrentDoubleVector* weights;

1235     ConcurrentDoubleVector* scales;

1236 };

1237

1238

1239 void HOGDescriptor::detectMultiScale(

1240     const Mat& img, vector<Rect>& foundLocations, vector<double>& foundWeights,

1241     double hitThreshold, Size winStride, Size padding,

1242     double scale0, double finalThreshold, bool useMeanshiftGrouping) const 

1243 {

1244     double scale = 1.;

1245     int levels = 0;

1246

1247     vector<double> levelScale;//保存图片将要缩放的尺度

1249     //nlevels默认的是64 scale0是图像缩小参数

1250     for( levels = 0; levels < nlevels; levels++ )

1251     {

1252         levelScale.push_back(scale);

1257         //只考虑测试图片尺寸比检测窗口尺寸大以及scale0>1的情况,

             //不符合要求中断循环。所以nlevel大一点没关系(并不会特别影响速度),关键的参数其实是scale0

1253         if( cvRound(img.cols/scale) < winSize.width ||

1254             cvRound(img.rows/scale) < winSize.height ||

1255             scale0 <= 1 )

1256             break;

1258         scale *= scale0;

1259     }

1260     levels = std::max(levels, 1);

1261     levelScale.resize(levels);

1262

1263     ConcurrentRectVector allCandidates;

1264     ConcurrentDoubleVector tempScales;

1265     ConcurrentDoubleVector tempWeights;

1266     vector<double> foundScales;

1267    

1268     //TBB并行计算,会将参数range 传到HOGInvoker结构体的()重载函数中,在这个里面对各个尺度的目标图片进行检测

1269     parallel_for(Range(0, (int)levelScale.size()),

1270                  HOGInvoker(this, img, hitThreshold, winStride, padding, &levelScale[0], &allCandidates, &tempWeights, &tempScales));

1271     //tempScales中的内容复制到foundScales中;这个参数其实没有什么用,保存的是检测到目标的图像对应的尺度

1272     std::copy(tempScales.begin(), tempScales.end(), back_inserter(foundScales));

1274     foundLocations.clear();

1275     //将候选目标窗口保存在foundLocations

1276     std::copy(allCandidates.begin(), allCandidates.end(), back_inserter(foundLocations));

1277     foundWeights.clear();

1278     //将候选目标可信度保存在foundWeights

1279     std::copy(tempWeights.begin(), tempWeights.end(), back_inserter(foundWeights));



1280      //对矩形框进行聚类
1281     if ( useMeanshiftGrouping )

1282     {

1283         groupRectangles_meanshift(foundLocations, foundWeights, foundScales, finalThreshold, winSize);

1284     }

1285     else

1286     {

1288         groupRectangles(foundLocations, (int)finalThreshold, 0.2);

1289     }

1290 }

1291

1292 //不考虑目标的置信度,通过调用包含置信度的版本

1293 void HOGDescriptor::detectMultiScale(const Mat& img, vector<Rect>& foundLocations,

1294                                      double hitThreshold, Size winStride, Size padding,

1295                                      double scale0, double finalThreshold, bool useMeanshiftGrouping) const 

1296 {

1297     vector<double> foundWeights;

1298     detectMultiScale(img, foundLocations, foundWeights, hitThreshold, winStride,

1299                      padding, scale0, finalThreshold, useMeanshiftGrouping);

1300 }

973 //返回测试图片中水平方向和垂直方向共有多少个检测窗口,不能整除的话多于的边界会不被计算在内?

 974 Size HOGCache::windowsInImage(Size imageSize, Size winStride) const

 975 {

 976     return Size((imageSize.width - winSize.width)/winStride.width + 1,

 977                 (imageSize.height - winSize.height)/winStride.height + 1);

 978 }

 979

 980

 981 //给定图片的大小,已经检测窗口滑动的大小和测试图片中的检测窗口的索引,得到该索引处

 982 //检测窗口的尺寸,包括坐标信息

 983 Rect HOGCache::getWindow(Size imageSize, Size winStride, int idx) const

 984 {

 985     int nwindowsX = (imageSize.width - winSize.width)/winStride.width + 1;

 986     int y = idx / nwindowsX;//

 987     int x = idx - nwindowsX*y;//余数

 988     return Rect( x*winStride.width, y*winStride.height, winSize.width, winSize.height );

 989 }

 990

 991

 992 void HOGDescriptor::compute(const Mat& img, vector<float>& descriptors,

 993                             Size winStride, Size padding,

 994                             const vector<Point>& locations) const

 995 {

 996     //Size()表示长和宽都是0

 997     if( winStride == Size() )

 998         winStride = cellSize;

 999     //gcd为求最大公约数,如果采用默认值的话,则2者相同

1000     Size cacheStride(gcd(winStride.width, blockStride.width),

1001                      gcd(winStride.height, blockStride.height));

1002     size_t nwindows = locations.size();

1003     //alignSize(m, n)返回n的倍数大于等于m的最小值

1004     padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width);

1005     padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);

1006     Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2);

1007

1008     HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);

1009

1010     if( !nwindows )

1011         //Mat::area()表示为Mat的面积

1012         nwindows = cache.windowsInImage(paddedImgSize, winStride).area();

1013

1014     const HOGCache::BlockData* blockData = &cache.blockData[0];

1015

1016     int nblocks = cache.nblocks.area();

1017     int blockHistogramSize = cache.blockHistogramSize;

1018     size_t dsize = getDescriptorSize();//一个hog的描述长度

1019     //resize()为改变矩阵的行数,如果减少矩阵的行数则只保留减少后的

1020     //那些行,如果是增加行数,则保留所有的行。

1021     //这里将描述子长度扩展到整幅图片

1022     descriptors.resize(dsize*nwindows);

1023

1024     for( size_t i = 0; i < nwindows; i++ )

1025     {

1026         //descriptor为第i个检测窗口的描述子首位置。

1027         float* descriptor = &descriptors[i*dsize];

1028       

1029         Point pt0;

1030         //非空

1031         if( !locations.empty() )

1032         {

1033             pt0 = locations[i];

1034             //非法的点

1035             if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||

1036                 pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height )

1037                 continue;

1038         }

1039         //locations为空

1040         else

1041         {

1042             //pt0为没有扩充前图像对应的第i个检测窗口

1043             pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl() - Point(padding);

1044             CV_Assert(pt0.x % cacheStride.width == 0 && pt0.y % cacheStride.height == 0);

1045         }

1046

1047         forint j = 0; j < nblocks; j++ )

1048         {

1049             const HOGCache::BlockData& bj = blockData[j];

1050             //ptblock的左上角相对检测图片的坐标

1051             Point pt = pt0 + bj.imgOffset;

1052

1053             //dst为该block在整个测试图片的描述子的位置

1054             float* dst = descriptor + bj.histOfs;

1055             const float* src = cache.getBlock(pt, dst);

1056             if( src != dst )

1057 #ifdef HAVE_IPP

1058                ippsCopy_32f(src,dst,blockHistogramSize);

1059 #else

1060                 forint k = 0; k < blockHistogramSize; k++ )

1061                     dst[k] = src[k];

1062 #endif

1063         }

1064     }

1065 }

1066

1067

1068 void HOGDescriptor::detect(const Mat& img,

1069     vector<Point>& hits, vector<double>& weights, double hitThreshold,

1070     Size winStride, Size padding, const vector<Point>& locations) const

1071 {

1072     //hits里面存的是符合检测到目标的窗口的左上角顶点坐标

1073     hits.clear();

1074     if( svmDetector.empty() )//svm算子不能为空,因为这是HOGDescriptor类的成员函数,里面用了很多成员变量

1075         return;

1076

1077     if( winStride == Size() )//如果窗口步长为0 ,则将其设为cell的大小

1078         winStride = cellSize;

1079     Size cacheStride(gcd(winStride.width, blockStride.width),  //CacheStridewinStrideBlockStride的最大公约数

1080                      gcd(winStride.height, blockStride.height));

1081     size_t nwindows = locations.size();//locations为预先传入的窗口子集,在这个子集中求目标,这个版本中没有用

1082     padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width);//padding改成大于等于padding ,但是可以被cacheStride整除的最小数

1083     padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);

1084     Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2);//padding 以后的图片大小

1085     //这个结构的应该是应该是保存HOG描述子和其一些列参数的,构造函数会将一切数据都算好

1086     HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);

1087

1088     if( !nwindows )

1089         nwindows = cache.windowsInImage(paddedImgSize, winStride).area();//图片包含的检测窗口的个数

1090     //BlockData结构体是对应的block数据的偏移量。histOfsimgOffset.其中histOfs表示为该block对整个滑动窗口内hog描述算子的贡献那部分向量的起始位置;imgOffset为该block在滑动窗口图片中的坐标(左上角坐标)

1091     const HOGCache::BlockData* blockData = &cache.blockData[0];

1092

1093     int nblocks = cache.nblocks.area();//每个检测窗口的block数量

1094     int blockHistogramSize = cache.blockHistogramSize;//每个block直方图的维数

1095     size_t dsize = getDescriptorSize();

1096

1097     double rho = svmDetector.size() > dsize ? svmDetector[dsize] : 0;//判断有没有加偏移量,rho

1098     vector<float> blockHist(blockHistogramSize);

1099

1100     for( size_t i = 0; i < nwindows; i++ )//遍历每一个window将其得分与hitThreshold看其是否是目标物

1101     {

1102         Point pt0;

1103         if( !locations.empty() )

1104         {

1105             pt0 = locations[i];

1106             if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||

1107                 pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height )

1108                 continue;

1109         }

1110         else

1111         {   //给定padding后图片的大小,返回第i个滑动窗口在原图片中的坐标信息,得到该索引处
1112             pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl() - Point(padding);

1113             CV_Assert(pt0.x % cacheStride.width == 0 && pt0.y % cacheStride.height == 0);

1114         }

1115         double s = rho;

1116         //svmVec指向svmDetector最前面那个元素

1117         const float* svmVec = &svmDetector[0];

1118 #ifdef HAVE_IPP

1119         int j;

1120 #else

1121         int j, k;

1122 #endif

1123         for( j = 0; j < nblocks; j++, svmVec += blockHistogramSize )

1124         {

1125             const HOGCache::BlockData& bj = blockData[j];//当前blockwindow中的偏移量

1126             Point pt = pt0 + bj.imgOffset;//pt0window在待检测图片中的偏移量,pt是当前block在图片中的偏移量

1127            

1128             //vec为测试图片pt处的block贡献的描述子指针

1129             const float* vec = cache.getBlock(pt, &blockHist[0]);//函数返回一个block描述子的指针

1130 #ifdef HAVE_IPP

1131             Ipp32f partSum;

1132             ippsDotProd_32f(vec,svmVec,blockHistogramSize,&partSum);

1133             s += (double)partSum;

1134 #else

1135             for( k = 0; k <= blockHistogramSize - 4; k += 4 )  //描述子与svm向量相乘

1136                 //const float* svmVec = &svmDetector[0];

1137                 s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] +

1138                     vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3];

1139             for( ; k < blockHistogramSize; k++ )

1140                 s += vec[k]*svmVec[k];

1141 #endif

1142         }

1143         if( s >= hitThreshold )//s是上一个for循环中每个block累加的结果,s即当前window的检测得分

1144         {

1145             hits.push_back(pt0);

1146             weights.push_back(s);

1147         }

1148     }

1149 }

1150

1151 //不用保留检测到目标的可信度,即权重

1152 void HOGDescriptor::detect(const Mat& img, vector<Point>& hits, double hitThreshold,

1153                            Size winStride, Size padding, const vector<Point>& locations) const

1154 {

1155     vector<double> weightsV;

1156     detect(img, hits, weightsV, hitThreshold, winStride, padding, locations);

1157 }

1158

1159 struct HOGInvoker

1160 {

1161     HOGInvoker( const HOGDescriptor* _hog, const Mat& _img,

1162                 double _hitThreshold, Size _winStride, Size _padding,

1163                 const double* _levelScale, ConcurrentRectVector* _vec,

1164                 ConcurrentDoubleVector* _weights=0, ConcurrentDoubleVector* _scales=0 )

1165     {

1166         hog = _hog;

1167         img = _img;

1168         hitThreshold = _hitThreshold;

1169         winStride = _winStride;

1170         padding = _padding;

1171         levelScale = _levelScale;

1172         vec = _vec;

1173         weights = _weights;

1174         scales = _scales;

1175     }

1176

1177     void operator()( const BlockedRange& range ) const

1178     {

1179         int i, i1 = range.begin(), i2 = range.end();

1180         double minScale = i1 > 0 ? levelScale[i1] : i2 > 1 ? levelScale[i1+1] : std::max(img.cols, img.rows);//i1=0,i2=1 minScalemax(img.cols, img.rows)

1181         //缩放的最大尺寸,缩放之后的图像不会达到这个尺寸

1182         Size maxSz(cvCeil(img.cols/minScale), cvCeil(img.rows/minScale));

1183         Mat smallerImgBuf(maxSz, img.type());//i1==0smallerImgBuf的大小为1*1,可能是因为i1==0时没有尺寸缩放,没有尺寸缩放时不需要smallerImgBuf来初始化


1184         vector<Point> locations;

1185         vector<double> hitsWeights;

1186

1187         for( i = i1; i < i2; i++ )

1188         {

1189             double scale = levelScale[i];

1190             Size sz(cvRound(img.cols/scale), cvRound(img.rows/scale));

1191             //smallerImg只是构造一个指针,并没有复制数据

1192             Mat smallerImg(sz, img.type(), smallerImgBuf.data);

1193             //没有尺寸缩放

1194             if( sz == img.size() )

1195                 smallerImg = Mat(sz, img.type(), img.data, img.step);

1196             //有尺寸缩放

1197             else

1198                 resize(img, smallerImg, sz);



1199             //检测的实际函数,该函数实际上是将返回的值存在locationshistWeights

1200             //其中locations存的是目标区域的左上角坐标

1201             hog->detect(smallerImg, locations, hitsWeights, hitThreshold, winStride, padding);

1202             Size scaledWinSize = Size(cvRound(hog->winSize.width*scale), cvRound(hog->winSize.height*scale));//计算目标区域的大小

1203             for( size_t j = 0; j < locations.size(); j++ )

1204             {

1205                 //保存目标区域

1206                 vec->push_back(Rect(cvRound(locations[j].x*scale),

1207                                     cvRound(locations[j].y*scale),

1208                                     scaledWinSize.width, scaledWinSize.height));

1209                 //保存缩放尺寸

1210                 if (scales) {

1211                     scales->push_back(scale);

1212                 }

1213             }

1214             //保存svm计算后的结果值,weight指针有效才保存

1215             if (weights && (!hitsWeights.empty()))

1216             {

1217                 for (size_t j = 0; j < locations.size(); j++)

1218                 {

1219                     weights->push_back(hitsWeights[j]);

1220                 }

1221             }       

1222         }

1223     }

1224

1225     const HOGDescriptor* hog;

1226     Mat img;

1227     double hitThreshold;

1228     Size winStride;

1229     Size padding;

1230     const double* levelScale;

1232     ConcurrentRectVector* vec;

1234     ConcurrentDoubleVector* weights;

1235     ConcurrentDoubleVector* scales;

1236 };

1237

1238

1239 void HOGDescriptor::detectMultiScale(

1240     const Mat& img, vector<Rect>& foundLocations, vector<double>& foundWeights,

1241     double hitThreshold, Size winStride, Size padding,

1242     double scale0, double finalThreshold, bool useMeanshiftGrouping) const 

1243 {

1244     double scale = 1.;

1245     int levels = 0;

1246

1247     vector<double> levelScale;//保存图片将要缩放的尺度

1249     //nlevels默认的是64 scale0是图像缩小参数

1250     for( levels = 0; levels < nlevels; levels++ )

1251     {

1252         levelScale.push_back(scale);

1257         //只考虑测试图片尺寸比检测窗口尺寸大以及scale0>1的情况,

             //不符合要求中断循环。所以nlevel大一点没关系(并不会特别影响速度),关键的参数其实是scale0

1253         if( cvRound(img.cols/scale) < winSize.width ||

1254             cvRound(img.rows/scale) < winSize.height ||

1255             scale0 <= 1 )

1256             break;

1258         scale *= scale0;

1259     }

1260     levels = std::max(levels, 1);

1261     levelScale.resize(levels);

1262

1263     ConcurrentRectVector allCandidates;

1264     ConcurrentDoubleVector tempScales;

1265     ConcurrentDoubleVector tempWeights;

1266     vector<double> foundScales;

1267    

1268     //TBB并行计算,会将参数range 传到HOGInvoker结构体的()重载函数中,在这个里面对各个尺度的目标图片进行检测

1269     parallel_for(Range(0, (int)levelScale.size()),

1270                  HOGInvoker(this, img, hitThreshold, winStride, padding, &levelScale[0], &allCandidates, &tempWeights, &tempScales));

1271     //tempScales中的内容复制到foundScales中;这个参数其实没有什么用,保存的是检测到目标的图像对应的尺度

1272     std::copy(tempScales.begin(), tempScales.end(), back_inserter(foundScales));

1274     foundLocations.clear();

1275     //将候选目标窗口保存在foundLocations

1276     std::copy(allCandidates.begin(), allCandidates.end(), back_inserter(foundLocations));

1277     foundWeights.clear();

1278     //将候选目标可信度保存在foundWeights

1279     std::copy(tempWeights.begin(), tempWeights.end(), back_inserter(foundWeights));



1280      //对矩形框进行聚类
1281     if ( useMeanshiftGrouping )

1282     {

1283         groupRectangles_meanshift(foundLocations, foundWeights, foundScales, finalThreshold, winSize);

1284     }

1285     else

1286     {

1288         groupRectangles(foundLocations, (int)finalThreshold, 0.2);

1289     }

1290 }

1291

1292 //不考虑目标的置信度,通过调用包含置信度的版本

1293 void HOGDescriptor::detectMultiScale(const Mat& img, vector<Rect>& foundLocations,

1294                                      double hitThreshold, Size winStride, Size padding,

1295                                      double scale0, double finalThreshold, bool useMeanshiftGrouping) const 

1296 {

1297     vector<double> foundWeights;

1298     detectMultiScale(img, foundLocations, foundWeights, hitThreshold, winStride,

1299                      padding, scale0, finalThreshold, useMeanshiftGrouping);

1300 }



1 0
原创粉丝点击