dlib人脸检测源码解析
来源:互联网 发布:人工智能简介 编辑:程序博客网 时间:2024/06/05 03:57
dlib人脸检测源码解析
dlib是基于c++的跨平台通用框架,其内容非常丰富,涵盖机器学习、图像处理、数据压缩等。原作者链接。dlib在image Processing 模块中定义了基于方向梯度直方图(Histogram of Oriented Gradient,HOG)特征和图像金字塔的多尺度目标检测方法。在非深度学习的领域中,其目标检测算法准确率较高,同时能保证良好的实时性。
本文将以人脸检测为例,详细解析dlib本部分源码。
源码及解析
- frontal_face_detector.h
获取已训练的目标检测模型
namespace dlib{ //miexp : 6 控制缩放步长 (6-1)/6 typedef object_detector<scan_fhog_pyramid<pyramid_down<6> > > frontal_face_detector; // 注:dlib将模型文件编码成字符串放在frontal_face_detector.h中,该函数用于解码 // 字符串获取模型文件内容 inline const std::string get_serialized_frontal_faces(); inline frontal_face_detector get_frontal_face_detector();}
- object_detector.h
目标检测主函数,分别用5种角度的目标检测器分别调用scanner.detect() 函数找出当前图像中包含人脸的位置
void object_detector<image_scanner_type>:: operator() (// miexp: 目标检测接口 const image_type& img, std::vector<rect_detection>& final_dets, double adjust_threshold ){ scanner.load(img);//载入当前帧或者图像,并提取hog特征用于检测 std::vector<std::pair<double, rectangle> > dets;//被挑选中的位置和得分 std::vector<rect_detection> dets_accum; for (unsigned long i = 0; i < w.size(); ++i)//分别检测5种角度 { const double thresh = w[i].w(scanner.get_num_dimensions()); scanner.detect(w[i].get_detect_argument(), dets, thresh + adjust_threshold);// 得到候选位置 for (unsigned long j = 0; j < dets.size(); ++j) { rect_detection temp; temp.detection_confidence = dets[j].first-thresh;//计算候选位置的detection_confidence temp.weight_index = i; temp.rect = dets[j].second; dets_accum.push_back(temp); } } // 非极大值抑制,用于筛选最准确的窗口 final_dets.clear(); if (w.size() > 1) std::sort(dets_accum.rbegin(), dets_accum.rend());//按detection_confidence排序 for (unsigned long i = 0; i < dets_accum.size(); ++i) { // 候选窗口若覆盖,则放弃当前窗口(复杂度n2) if (overlaps_any_box(final_dets, dets_accum[i].rect)) continue; final_dets.push_back(dets_accum[i]); }}
- scan_fhog_pyramid.h
load 载入图像并以金字塔形式提取hog特征
void scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::load (constimage_type& img ){ // miexp: load 载入图像并提取hog特征 unsigned long width, height; compute_fhog_window_size(width,height);//金字塔顶端最小图像,与hog窗口大小相关 10 10 //计算图像金字塔,并提取各层HOG特征存入feats中 impl::create_fhog_pyramid<Pyramid_type>(img, fe, feats, cell_size, height, width, min_pyramid_layer_width, min_pyramid_layer_height, max_pyramid_levels); }//---------------------------------------------------------------// mcreate_fhog_pyramid 构建图像HOG特征金字塔 void create_fhog_pyramid ( const image_type& img, const feature_extractor_type& fe, array<array<array2d<float> > >& feats, int cell_size, int filter_rows_padding, int filter_cols_padding, unsigned long min_pyramid_layer_width, unsigned long min_pyramid_layer_height, unsigned long max_pyramid_levels ) { unsigned long levels = 0; rectangle rect = get_rect(img); pyramid_type pyr; {//根据图像大小计算金字塔层数 do { rect = pyr.rect_down(rect); ++levels; } while (rect.width() >= min_pyramid_layer_width && rect.height() >= min_pyramid_layer_height && levels < max_pyramid_levels); } //根据金字塔层数分配内存 if (feats.max_size() < levels) feats.set_max_size(levels); feats.set_size(levels); // 构建特征金字塔 // 提取第0层hog特征 ,通过()运算符重载,会调用<fhog.h>中的 impl_extract_fhog_features函数 fe(img, feats[0], cell_size,filter_rows_padding,filter_cols_padding); if (feats.size() > 1) { typedef typename image_traits<image_type>::pixel_type pixel_type; array2d<pixel_type> temp1, temp2; pyr(img, temp1);// 下采样 //提取第1层HOG特征 fe(temp1, feats[1], cell_size,filter_rows_padding,filter_cols_padding); swap(temp1,temp2); for (unsigned long i = 2; i < feats.size(); ++i) { pyr(temp2, temp1);//下采样 //提取第i层特征 fe(temp1, feats[i], cell_size,filter_rows_padding,filter_cols_padding); swap(temp1,temp2); } } }//---------------------------------------------------------------// 调用的extract_fhog_features函数 <fhog.h>void impl_extract_fhog_features( const image_type& img_, out_type& hog, int cell_size,//本例中为8,即每个cell中有8x8个像素点 int filter_rows_padding, int filter_cols_padding ) { const_image_view<image_type> img(img_);//定义第一象限和第二象限的9个方向的单位向量 matrix<float,2,1> directions[9]; directions[0] = 1.0000, 0.0000; directions[1] = 0.9397, 0.3420; directions[2] = 0.7660, 0.6428; directions[3] = 0.500, 0.8660; directions[4] = 0.1736, 0.9848; directions[5] = -0.1736, 0.9848; directions[6] = -0.5000, 0.8660; directions[7] = -0.7660, 0.6428; directions[8] = -0.9397, 0.3420; //计算所需内存用于缓存梯度直方图及各cell梯度直方图向量的norm const int cells_nr = (int)((float)img.nr()/(float)cell_size + 0.5); const int cells_nc = (int)((float)img.nc()/(float)cell_size + 0.5); //三维矩阵 cells_nr+2 cells_nc+2 18 array2d<matrix<float,18,1> > hist(cells_nr+2, cells_nc+2); for (long r = 0; r < hist.nr(); ++r) for (long c = 0; c < hist.nc(); ++c) hist[r][c] = 0;//初始化为0 array2d<float> norm(cells_nr, cells_nc);// 各cell梯度直方图的norm值矩阵 assign_all_pixels(norm, 0);//初始化为0 // memory for HOG features const int hog_nr = std::max(cells_nr-2, 0); const int hog_nc = std::max(cells_nc-2, 0); const int padding_rows_offset = (filter_rows_padding-1)/2; const int padding_cols_offset = (filter_cols_padding-1)/2; init_hog(hog, hog_nr, hog_nc, filter_rows_padding, filter_cols_padding); const int visible_nr = std::min((long)cells_nr*cell_size,img.nr())-1; const int visible_nc = std::min((long)cells_nc*cell_size,img.nc())-1; //首先计算方向梯度直方图 for (int y = 1; y < visible_nr; y++) //遍历图像纵坐标 { const float yp = ((float)y+0.5)/(float)cell_size - 0.5; const int iyp = (int)std::floor(yp); const float vy0 = yp - iyp; const float vy1 = 1.0 - vy0; int x; for (x = 1; x < visible_nc; x++) { matrix<float, 2, 1> grad; float v; get_gradient(y,x,img,grad,v);//做水平垂直差分,计算当前像素点的梯度(dy,dx) // 即找出最大的内积绝对值 和 方向 float best_dot = 0; int best_o = 0; for (int o = 0; o < 9; o++) { const float dot = dlib::dot(directions[o], grad);//点乘计算内积 if (dot > best_dot) //在一、二象限内积为正 { best_dot = dot; best_o = o; } else if (-dot > best_dot) //在三、四象限内积为负 { best_dot = -dot; best_o = o+9; } } v = std::sqrt(v); // add to 4 histograms around pixel using bilinear interpolation const float xp = ((double)x + 0.5) / (double)cell_size - 0.5; const int ixp = (int)std::floor(xp); const float vx0 = xp - ixp; const float vx1 = 1.0 - vx0; // 插值,抑制突变 hist[iyp+1][ixp+1](best_o) += vy1*vx1*v; hist[iyp+1+1][ixp+1](best_o) += vy0*vx1*v; hist[iyp+1][ixp+1+1](best_o) += vy1*vx0*v; hist[iyp+1+1][ixp+1+1](best_o) += vy0*vx0*v; } } //计算每个cell的energy for (int r = 0; r < cells_nr; ++r) { for (int c = 0; c < cells_nc; ++c) { for (int o = 0; o < 9; o++) {// 平方和求模 norm[r][c] += (hist[r+1][c+1](o) + hist[r+1][c+1](o+9)) * (hist[r+1][c+1](o) + hist[r+1][c+1](o+9)); } } } //然后。。。。太多了不抄代码了,大致如下: for (int y = 0; y < hog_nr; y++) { for (int x = 0; x < hog_nc; x++) { for(int o = 0; o < 31; o++) { set_hog(hog,o,x,y,18维的单位向量[某种映射关系]); } } } //得到该图的hog特征,hog (h/8 ,w/8 ,31),即图像中每个8x8的块,对应一个31维的向量 }
前面提取到了金字塔每一层的hog特征,一并存入feats中,然后开始检测
void detect_from_fhog_pyramid ( const array<array<array2d<float> > >& feats, const feature_extractor_type& fe, const fhog_filterbank& w, const double thresh, const unsigned long det_box_height, const unsigned long det_box_width, const int cell_size, const int filter_rows_padding, const int filter_cols_padding, std::vector<std::pair<double, rectangle> >& dets ) { dets.clear();//初始化 array2d<float> saliency_image;//显著性系数图,系数越大,则对应位置包含目标可能性越高 pyramid_type pyr; //遍历金字塔所有层 for (unsigned long l = 0; l < feats.size(); ++l) { // 检测每一层,得到每一层的显著性系数图,及目标区域 const rectangle area = apply_filters_to_fhog(w, feats[l], saliency_image); // 遍历该区域 for (long r = area.top(); r <= area.bottom(); ++r) { for (long c = area.left(); c <= area.right(); ++c) { // if we found a detection if (saliency_image[r][c] >= thresh)//若该处显著性系数大于阈值 { // 取出该处的矩阵位置 rectangle temp = centered_rect(point(c,r),det_box_width,det_box_height); rectangle rect = fe.feats_to_image(temp,cell_size, filter_rows_padding, filter_cols_padding); rect = pyr.rect_up(rect, l);//放大到正常大小 dets.push_back(std::make_pair(saliency_image[r][c], rect)); } } } } std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);//根据显著性系数排序}
检测每一层
rectangle apply_filters_to_fhog( const fhog_filterbank& w,//读入的本地检测模型 const array<array2d<float> >& feats,//当前层hog特征集 array2d<float>& saliency_image ){ rectangle area; saliency_image.clear(); array2d<float> scratch; unsigned long i = 0; for (j=0; i < w.row_filters.size(); ++i)// 31个卷积核 {// 31个共同投票求和 for (unsigned long j = 0; j < w.row_filters[i].size(); ++j)// 2 或者 3 {// row_filters[i][j] 和w.col_filters[i][j] 均为长度为10的向量 说明检测窗口是80×80 像素 area = float_spatially_filter_image_separable(feats[i], saliency_image, w.row_filters[i][j], w.col_filters[i][j],scratch,true); //先横向卷积,后纵向卷积,两次一维卷积比二维卷积速度更快 } } if (saliency_image.size() == 0) { saliency_image.set_size(feats[0].nr(), feats[0].nc()); assign_all_pixels(saliency_image, 0); } return area;}
- spatial_filtering.h
两次一维卷积
rectangle float_spatially_filter_image_separable( const in_image_type& in_img_, out_image_type& out_img_, const matrix_exp<EXP1>& _row_filter, const matrix_exp<EXP2>& _col_filter, out_image_type& scratch_, bool add_to = false ){ const_temp_matrix<EXP1> row_filter(_row_filter);//float[10]的卷积核 const_temp_matrix<EXP2> col_filter(_col_filter);//float[10]的卷积核 const_image_view<in_image_type> in_img(in_img_); image_view<out_image_type> out_img(out_img_); out_img.set_size(in_img.nr(),in_img.nc());//卷积结果存入out_img // 卷积过程的起始位置和终止位置 const long first_row = col_filter.size()/2; const long first_col = row_filter.size()/2; const long last_row = in_img.nr() - ((col_filter.size()-1)/2); const long last_col = in_img.nc() - ((row_filter.size()-1)/2); // 使用 row filter for (long r = 0; r < in_img.nr(); ++r) { long c =first_col; for ( ; c < last_col; ++c) { float p; float temp = 0; for (long n = 0; n < row_filter.size(); ++n) { temp += in_img[r][c-first_col+n]*row_filter(n);//一维卷积 } scratch[r][c] = temp; } } // 使用 column filter for (long r = first_row; r < last_row; ++r) { long c = first_col; for (; c < last_col; ++c) { float temp = 0; for (long m = 0; m < col_filter.size(); ++m) { temp += scratch[r-first_row+m][c]*col_filter(m); } out_img[r][c] = add_to? temp : out_img[r][c] + temp; } } return non_border;}
阅读全文
0 0
- dlib人脸检测源码解析
- dlib人脸检测
- Dlib人脸检测+对齐
- Tensorflow12-Dlib人脸检测
- dlib 05 dlib自带demo 人脸检测
- dlib人脸检测功能介绍
- Dlib + VS2013 人脸检测,无需CMAKE
- Dlib人脸关键点检测顺序
- 人脸特征检测--基于DLIB库
- Qt Dlib配置实现人脸检测
- dlib人脸对齐源码详解
- dlib 04 dlib自带demo 基于HOG的人脸特征检测
- dlib 14 dlib自带demo 基于DNN的人脸检测
- opencv 人脸检测源码解析
- Dlib机器学习库系列2----人脸检测
- 基础知识(十三)dlib python人脸检测 特征点定位
- Dlib人脸特征点检测(速度优化)
- Dlib机器学习库系列2----人脸检测
- 【深度学习Deep Learning】资料大全
- Java-导论2
- [Android] 彻底了解Binder机制原理和底层实现
- 笔记本建立wifi热点
- javascript数组 去掉重复的元素 两个方法效率对比分析
- dlib人脸检测源码解析
- hbase读写流程
- Centos7 安装编译nbd模块
- 数据库那点事(Mysql)-前传
- JS完成图片轮播(JS定时操作)
- EL和JSTL
- SOA 服务化 服务治理 dubbo
- 1289:简单数据统计
- 2017 ACM-ICPC 西安网络赛 Trig Function