dlib人脸检测源码解析

来源：互联网发布：人工智能简介编辑：程序博客网时间：2024/06/05 03:57

dlib人脸检测源码解析

dlib是基于c++的跨平台通用框架，其内容非常丰富，涵盖机器学习、图像处理、数据压缩等。原作者链接。dlib在image Processing 模块中定义了基于方向梯度直方图（Histogram of Oriented Gradient，HOG）特征和图像金字塔的多尺度目标检测方法。在非深度学习的领域中，其目标检测算法准确率较高，同时能保证良好的实时性。
本文将以人脸检测为例，详细解析dlib本部分源码。

源码及解析

frontal_face_detector.h

获取已训练的目标检测模型

namespace dlib{    //miexp : 6 控制缩放步长 （6-1）/6    typedef object_detector<scan_fhog_pyramid<pyramid_down<6> > > frontal_face_detector;    // 注：dlib将模型文件编码成字符串放在frontal_face_detector.h中，该函数用于解码    //    字符串获取模型文件内容    inline const std::string get_serialized_frontal_faces();    inline frontal_face_detector get_frontal_face_detector()；}

object_detector.h

目标检测主函数，分别用5种角度的目标检测器分别调用scanner.detect() 函数找出当前图像中包含人脸的位置

void object_detector<image_scanner_type>::    operator() (// miexp: 目标检测接口        const image_type& img,        std::vector<rect_detection>& final_dets,        double adjust_threshold ）{    scanner.load(img);//载入当前帧或者图像，并提取hog特征用于检测    std::vector<std::pair<double, rectangle> > dets;//被挑选中的位置和得分    std::vector<rect_detection> dets_accum;    for (unsigned long i = 0; i < w.size(); ++i)//分别检测5种角度    {        const double thresh = w[i].w(scanner.get_num_dimensions());        scanner.detect(w[i].get_detect_argument(), dets, thresh + adjust_threshold);// 得到候选位置        for (unsigned long j = 0; j < dets.size(); ++j)        {            rect_detection temp;            temp.detection_confidence = dets[j].first-thresh;//计算候选位置的detection_confidence            temp.weight_index = i;            temp.rect = dets[j].second;            dets_accum.push_back(temp);        }    }    //  非极大值抑制，用于筛选最准确的窗口    final_dets.clear();    if (w.size() > 1)    std::sort(dets_accum.rbegin(), dets_accum.rend());//按detection_confidence排序    for (unsigned long i = 0; i < dets_accum.size(); ++i)    {        // 候选窗口若覆盖，则放弃当前窗口（复杂度n2）         if (overlaps_any_box(final_dets, dets_accum[i].rect))             continue；         final_dets.push_back(dets_accum[i]);    }}

scan_fhog_pyramid.h

load 载入图像并以金字塔形式提取hog特征

void scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::load (constimage_type& img ){    // miexp: load 载入图像并提取hog特征     unsigned long width, height;     compute_fhog_window_size(width,height);//金字塔顶端最小图像，与hog窗口大小相关 10 10     //计算图像金字塔，并提取各层HOG特征存入feats中     impl::create_fhog_pyramid<Pyramid_type>(img, fe, feats, cell_size, height,            width, min_pyramid_layer_width, min_pyramid_layer_height,            max_pyramid_levels); }//---------------------------------------------------------------// mcreate_fhog_pyramid 构建图像HOG特征金字塔 void create_fhog_pyramid (            const image_type& img,            const feature_extractor_type& fe,            array<array<array2d<float> > >& feats,            int cell_size,            int filter_rows_padding,            int filter_cols_padding,            unsigned long min_pyramid_layer_width,            unsigned long min_pyramid_layer_height,            unsigned long max_pyramid_levels        ) {    unsigned long levels = 0;    rectangle rect = get_rect(img);    pyramid_type pyr;    {//根据图像大小计算金字塔层数      do      {       rect = pyr.rect_down(rect);       ++levels;       } while (rect.width() >= min_pyramid_layer_width &&                 rect.height() >= min_pyramid_layer_height &&                levels < max_pyramid_levels);    }    //根据金字塔层数分配内存    if (feats.max_size() < levels)       feats.set_max_size(levels);    feats.set_size(levels);    // 构建特征金字塔    // 提取第0层hog特征 ，通过（）运算符重载，会调用<fhog.h>中的 impl_extract_fhog_features函数    fe(img, feats[0], cell_size,filter_rows_padding,filter_cols_padding)；    if (feats.size() > 1)    {       typedef typename image_traits<image_type>::pixel_type pixel_type;       array2d<pixel_type> temp1, temp2;       pyr(img, temp1);// 下采样       //提取第1层HOG特征       fe(temp1, feats[1], cell_size,filter_rows_padding,filter_cols_padding);       swap(temp1,temp2);       for (unsigned long i = 2; i < feats.size(); ++i)       {          pyr(temp2, temp1);//下采样          //提取第i层特征          fe(temp1, feats[i], cell_size,filter_rows_padding,filter_cols_padding);          swap(temp1,temp2);       }     } }//---------------------------------------------------------------// 调用的extract_fhog_features函数 <fhog.h>void impl_extract_fhog_features( const image_type& img_,                                  out_type& hog,                                  int cell_size,//本例中为8，即每个cell中有8x8个像素点                                 int filter_rows_padding,                                 int filter_cols_padding ) {    const_image_view<image_type> img(img_);//定义第一象限和第二象限的9个方向的单位向量    matrix<float,2,1> directions[9];    directions[0] =  1.0000, 0.0000;     directions[1] =  0.9397, 0.3420;    directions[2] =  0.7660, 0.6428;    directions[3] =  0.500,  0.8660;    directions[4] =  0.1736, 0.9848;    directions[5] = -0.1736, 0.9848;    directions[6] = -0.5000, 0.8660;    directions[7] = -0.7660, 0.6428;    directions[8] = -0.9397, 0.3420;    //计算所需内存用于缓存梯度直方图及各cell梯度直方图向量的norm    const int cells_nr = (int)((float)img.nr()/(float)cell_size + 0.5);    const int cells_nc = (int)((float)img.nc()/(float)cell_size + 0.5);    //三维矩阵 cells_nr+2  cells_nc+2  18     array2d<matrix<float,18,1> > hist(cells_nr+2, cells_nc+2);    for (long r = 0; r < hist.nr(); ++r)       for (long c = 0; c < hist.nc(); ++c)           hist[r][c] = 0;//初始化为0    array2d<float> norm(cells_nr, cells_nc);// 各cell梯度直方图的norm值矩阵    assign_all_pixels(norm, 0);//初始化为0    // memory for HOG features    const int hog_nr = std::max(cells_nr-2, 0);    const int hog_nc = std::max(cells_nc-2, 0);    const int padding_rows_offset = (filter_rows_padding-1)/2;    const int padding_cols_offset = (filter_cols_padding-1)/2;    init_hog(hog, hog_nr, hog_nc, filter_rows_padding, filter_cols_padding);    const int visible_nr = std::min((long)cells_nr*cell_size,img.nr())-1;    const int visible_nc = std::min((long)cells_nc*cell_size,img.nc())-1;    //首先计算方向梯度直方图    for (int y = 1; y < visible_nr; y++) //遍历图像纵坐标    {         const float yp = ((float)y+0.5)/(float)cell_size - 0.5;         const int iyp = (int)std::floor(yp);         const float vy0 = yp - iyp;         const float vy1 = 1.0 - vy0;         int x;         for (x = 1; x < visible_nc; x++)          {            matrix<float, 2, 1> grad;            float v;            get_gradient(y,x,img,grad,v);//做水平垂直差分，计算当前像素点的梯度（dy,dx）            // 即找出最大的内积绝对值 和 方向            float best_dot = 0;            int best_o = 0;            for (int o = 0; o < 9; o++)             {               const float dot = dlib::dot(directions[o], grad);//点乘计算内积               if (dot > best_dot) //在一、二象限内积为正               {                  best_dot = dot;                  best_o = o;               }                else if (-dot > best_dot) //在三、四象限内积为负               {                  best_dot = -dot;                  best_o = o+9;               }            }            v = std::sqrt(v);            // add to 4 histograms around pixel using bilinear interpolation            const float xp = ((double)x + 0.5) / (double)cell_size - 0.5;            const int ixp = (int)std::floor(xp);            const float vx0 = xp - ixp;            const float vx1 = 1.0 - vx0;            // 插值，抑制突变             hist[iyp+1][ixp+1](best_o) += vy1*vx1*v;             hist[iyp+1+1][ixp+1](best_o) += vy0*vx1*v;             hist[iyp+1][ixp+1+1](best_o) += vy1*vx0*v;             hist[iyp+1+1][ixp+1+1](best_o) += vy0*vx0*v;         }    }    //计算每个cell的energy    for (int r = 0; r < cells_nr; ++r)    {       for (int c = 0; c < cells_nc; ++c)       {          for (int o = 0; o < 9; o++)           {// 平方和求模            norm[r][c] += (hist[r+1][c+1](o) + hist[r+1][c+1](o+9)) *                          (hist[r+1][c+1](o) + hist[r+1][c+1](o+9));          }       }    }    //然后。。。。太多了不抄代码了，大致如下：    for (int y = 0; y < hog_nr; y++)     {       for (int x = 0; x < hog_nc; x++)        {             for（int o = 0; o < 31; o++）            {               set_hog(hog,o,x,y，18维的单位向量[某种映射关系]);            }       }    }    //得到该图的hog特征，hog  (h/8 ,w/8 ,31),即图像中每个8x8的块，对应一个31维的向量 }

前面提取到了金字塔每一层的hog特征，一并存入feats中，然后开始检测

void detect_from_fhog_pyramid ( const array<array<array2d<float> > >& feats,                                const feature_extractor_type& fe,                                const fhog_filterbank& w,                                const double thresh,                                const unsigned long det_box_height,                                const unsigned long det_box_width,                                const int cell_size,                                const int filter_rows_padding,                                const int filter_cols_padding,                                std::vector<std::pair<double, rectangle> >& dets ) {   dets.clear();//初始化   array2d<float> saliency_image;//显著性系数图，系数越大，则对应位置包含目标可能性越高   pyramid_type pyr;   //遍历金字塔所有层   for (unsigned long l = 0; l < feats.size(); ++l)   {      // 检测每一层，得到每一层的显著性系数图，及目标区域      const rectangle area = apply_filters_to_fhog(w, feats[l], saliency_image);      // 遍历该区域      for (long r = area.top(); r <= area.bottom(); ++r)      {         for (long c = area.left(); c <= area.right(); ++c)         {            // if we found a detection            if (saliency_image[r][c] >= thresh)//若该处显著性系数大于阈值            {               // 取出该处的矩阵位置               rectangle temp = centered_rect(point(c,r),det_box_width,det_box_height)；               rectangle rect = fe.feats_to_image(temp,cell_size,                                          filter_rows_padding, filter_cols_padding);               rect = pyr.rect_up(rect, l);//放大到正常大小               dets.push_back(std::make_pair(saliency_image[r][c], rect));             }          }      }   }   std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);//根据显著性系数排序}

检测每一层

rectangle apply_filters_to_fhog( const fhog_filterbank& w,//读入的本地检测模型                                 const array<array2d<float> >& feats,//当前层hog特征集                                 array2d<float>& saliency_image ){    rectangle area;    saliency_image.clear();    array2d<float> scratch;    unsigned long i = 0;    for (j=0; i < w.row_filters.size(); ++i)// 31个卷积核    {// 31个共同投票求和       for (unsigned long j = 0; j < w.row_filters[i].size(); ++j)// 2 或者 3         {// row_filters[i][j] 和w.col_filters[i][j] 均为长度为10的向量 说明检测窗口是80×80 像素                          area = float_spatially_filter_image_separable(feats[i], saliency_image,                            w.row_filters[i][j], w.col_filters[i][j],scratch,true);        //先横向卷积，后纵向卷积，两次一维卷积比二维卷积速度更快       }    }    if (saliency_image.size() == 0)    {        saliency_image.set_size(feats[0].nr(), feats[0].nc());        assign_all_pixels(saliency_image, 0);    }    return area;}

spatial_filtering.h

两次一维卷积

rectangle float_spatially_filter_image_separable( const in_image_type& in_img_,                                                 out_image_type& out_img_,                                                 const matrix_exp<EXP1>& _row_filter,                                                 const matrix_exp<EXP2>& _col_filter,                                                 out_image_type& scratch_,                                                 bool add_to = false    ){    const_temp_matrix<EXP1> row_filter(_row_filter);//float[10]的卷积核    const_temp_matrix<EXP2> col_filter(_col_filter);//float[10]的卷积核    const_image_view<in_image_type> in_img(in_img_);    image_view<out_image_type> out_img(out_img_);    out_img.set_size(in_img.nr(),in_img.nc());//卷积结果存入out_img    // 卷积过程的起始位置和终止位置    const long first_row = col_filter.size()/2;    const long first_col = row_filter.size()/2;    const long last_row = in_img.nr() - ((col_filter.size()-1)/2);    const long last_col = in_img.nc() - ((row_filter.size()-1)/2);    // 使用 row filter    for (long r = 0; r < in_img.nr(); ++r)    {       long c =first_col;       for ( ; c < last_col; ++c)       {           float p;           float temp = 0;           for (long n = 0; n < row_filter.size(); ++n)           {               temp += in_img[r][c-first_col+n]*row_filter(n);//一维卷积           }           scratch[r][c] = temp;       }    }    // 使用 column filter     for (long r = first_row; r < last_row; ++r)    {        long c = first_col;        for (; c < last_col; ++c)        {            float temp = 0;            for (long m = 0; m < col_filter.size(); ++m)            {               temp += scratch[r-first_row+m][c]*col_filter(m);            }            out_img[r][c] = add_to? temp : out_img[r][c] + temp;        }    }    return non_border;}

阅读全文

0 0