SLIC超像素分割的算法介绍和源码分析

来源：互联网发布：淘宝刷单是什么意思编辑：程序博客网时间：2024/04/29 20:07

前述

最近在看显著性检测，发现很多算法的基础是超像素分割，而正在看的Saliency Optimization from Robust Background Detection算法的预处理是SLIC算法，于是便找了SLIC算法的论文进行学习，在学习过程中也顺便翻译了论文:http://blog.csdn.net/zhj_matlab/article/details/52973723。论文也给出了源码:http://ivrl.epfl.ch/research/superpixels。不过我查看的源码不是主页上给的，而是南开大学给出的源码:http://mmcheng.net/zh/salobjbenchmark/。个人感觉论文的原理很简单，不过一些处理方式给人感觉比较新颖而又有道理。

一、算法描述

第一步

初始化聚类中心，因为算法给定了距离，所以算法的初始中心为所划分区域的中心，即根据所给定的数量，划定所属类别的初始地区，如下图红框所示:
这里写图片描述
现在我们要将N为9的9*9的图像划分成为k为4的4个大小接近的超像素，做法就是首先以步距为的距离划分超像素，然后将中心位置的结点为超像素的中心结点。设置中心像素的时候论文还进行了进一步的处理，即在中心结点的3*3领域内选取梯度最小的点作为初始超像素的中心结点，也就是下图中分别取黄色区域中梯度最小的点作为中心结点，好处是可以避免将超像素定位在边缘上，并且减少用噪声像素接种超像素的机会。不过官网上给出的matlab代码和下面要分析的南开大学给出的源码并没有进行这一步操作。个人觉得进行这步操作效果会得到改善，不过改善效果有限，有没有这一步影响不大，一个像素的间距对像素颜色的影响有限。
这里写图片描述

第二步

这里写图片描述

其中m表示空间和像素颜色的相对重要性的度量。当m大时，空间邻近性更重要，并且所得到的超像素更紧凑（即它们具有更低的面积与周长比）。当m小时，所得到的超像素更紧密地粘附到图像边界，但是具有较小的规则尺寸和形状。当使用CIELAB色彩空间时，m可以在[1,40]的范围内。
第二个比较新颖的地方是计算距离的时候作者与传统的采用Kmeans进行分割的算法不同，并不是对整个空间的所有像素进行计算，而是限定了区域，区域大小为2S，即寻找时以初始聚类中心为寻找中心，确定一个2S*2S的矩形，如下图所示:
这里写图片描述
图1：减少超像素搜索区域。SLIC的复杂性在图像O（N）中的像素数目中是线性的，而常规的k均值算法是O（kNI），其中I是迭代次数。这在分配步骤中提供了每个聚类中心的搜索空间。（a）在常规k均值算法中，从每个聚类中心到图像中的每个像素计算距离。（b）SLIC仅计算从每个聚类中心到2S×2S区域内的像素的距离。注意，期望的超像素大小仅为S×S，由较小的正方形表示。这种方法不仅减少了距离计算，而且使得SLIC的复杂性与超像素的数量无关。

好处是显而易见的，限制搜索区域的大小显着地减少了距离计算的数量，这样可以极大的加快速度，可以将算法控制为线性复杂度。
接着便是对kMeans算法进行迭代，直到算法收敛或迭代次数大于某一个值，根据论文大部分图像在迭代次数为10以内，具体迭代思路如下:
这里写图片描述

第三步

采用连通分量算法进行进一步的处理，这一步是我一开始不太明白的地方，不太懂什么是孤立的结点。直到我调试代码才明白，即迭代完成后有可能产生如下图所示的形状，图中的黄色方框所框出的结点也就是所谓的孤立点，为了使分割效果更好，我们通常不希望存在这种结点，于是可以采用连通分量进行下一步地修正。
这里写图片描述
图中的绿框为类别2的搜索范围，所以有可能产生图中黄色框中的孤立结点
作者采用的连通分量的概念来解决这个问题，简单说来就是，根据4邻域连通或8邻域连通(代码采用的是4邻域的连接方式)的连通算法，则图中的黄色方框皆为一个联通分量，判断这个联通分量的面积，如果面积过小，则将联通分量的分类分给最近的类别，即左上角的两个2分为1，左下角分为3，右下角分为4(具体的实现可以参看代码)。

源码分析

作者给出的是并不是纯粹的matlab代码，而是采用c写的生成的mex文件，类似于C/C++中的dll。此类mex文件可以通过查看C/C++中的mexFunction来查看代码思路，具体的编码和调试过程参考:http://blog.csdn.net/zhj_matlab/article/details/52972571。
而这次的代码主要有两个文件，分别是SLIC.cpp和SLIC_mex.cpp，其中SLIC_mex.cpp只写了mexFunction，有点类似于一个接口为的是能让matlab调用C++程序，主要的算法部分在SLIC.cpp，其中有4个函数，Run_SLIC_GivenPatchNum(根据超像素数目进行计算)
、Run_SLIC_GivenPatchSize(根据超像素尺寸进行计算)、PerformLabXYKMeans(Kmeans算法的主体)、EnforceLabelConnectivity(采用连通分量进行进一步的调整)。具体流程见代码:
SLIC.cpp

#include <mex.h>#include <stdio.h>#include "SLIC.h"#include "Rgb2Lab.h"void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]){    char usageStr[] = "Usage: idxImg = SLIC_mex(image(3-Channel uint8 image), spNum(double scalar), compactness(double scalar))\n";//函数的输入和输出介绍    //image 输入的三通道图像   spNum 超像素的数目  compactness 即论文中提到的m    //Check input    const mxArray *pmxImg = prhs[0];    if (nrhs != 3 || !mxIsUint8(pmxImg) || !mxIsDouble(prhs[1]) || !mxIsDouble(prhs[2]))        mexErrMsgTxt(usageStr);    mwSize chn = mxGetNumberOfDimensions(pmxImg);//获取图像的维度    if (3 != chn)        mexErrMsgTxt(usageStr);    const mwSize *sz = mxGetDimensions(pmxImg);//类似于matlab的size函数    mwSize height = sz[0], width = sz[1], num_pix = height * width;    unsigned int iPatchNum = unsigned int( mxGetScalar(prhs[1]) );    float compactness = float( mxGetScalar(prhs[2]) );    //Transfer matlab matrix    ImageSimpleUChar img_r, img_g, img_b;    img_r.Create(width, height);    img_g.Create(width, height);    img_b.Create(width, height);    unsigned char *pImgData = (unsigned char*)mxGetData(pmxImg);    for (int x = 0; x < width; x++)    {        for (int y = 0; y < height; y++)        {            img_r.Pixel(x,y) = pImgData[y];            img_g.Pixel(x,y) = pImgData[y + num_pix];            img_b.Pixel(x,y) = pImgData[y + num_pix * 2];        }        pImgData += height;    }    //Rgb --> Lab    ImageSimpleFloat img_L, img_A, img_B;    Rgb2Lab(img_r, img_g, img_b, img_L, img_A, img_B);    //Do SLIC    ImageSimpleUInt idxImg;    idxImg.Create(width, height);    int iSuperPixelNum = Run_SLIC_GivenPatchNum(img_L, img_A, img_B, iPatchNum, compactness, idxImg);//计算每个像素所属的超像素    //Transfer back to matlab    plhs[0] = mxCreateDoubleMatrix(height, width, mxREAL);    double *pdIdxImg = mxGetPr(plhs[0]);    for (int x = 0; x < width; x++)     {        for (int y = 0; y < height; y++)        {            unsigned int id = idxImg.Pixel(x, y);            pdIdxImg[y] = double(id) + 1;        }        pdIdxImg += height;    }    plhs[1] = mxCreateDoubleMatrix(1, 1, mxREAL);    *mxGetPr(plhs[1]) = double(iSuperPixelNum);    return;}

SLIC_mex.cpp

typedef unsigned int UINT;#include "SLIC.h"const int MAXITER = 10;inline double max(double a, double b) {return a > b ? a : b;}inline double min(double a, double b) {return a < b ? a : b;}void PerformLabXYKMeans(    vector<double> &kseedsl,    vector<double> &kseedsa,    vector<double> &kseedsb,    vector<double> &kseedsx,    vector<double> &kseedsy,    ImageSimpleFloat &LImg, ImageSimpleFloat &AImg, ImageSimpleFloat &BImg,    const int iPatchSize,    const double spatialFactor,    ImageSimpleUInt &idxImg){    UINT uiWidth = LImg.Width();    UINT uiHeight = LImg.Height();    const double searchRange = iPatchSize;    const size_t seedNum = kseedsl.size();    vector<double> clustersize(seedNum, 0);    vector<double> centroidl(seedNum, 0);    vector<double> centroida(seedNum, 0);    vector<double> centroidb(seedNum, 0);    vector<double> centroidx(seedNum, 0);    vector<double> centroidy(seedNum, 0);    ImageSimpleDouble minDistImage(uiWidth, uiHeight);    minDistImage.FillPixels(DBL_MAX);//初始距离设置为一个很大的值    ImageSimpleUInt lastIdxImg(uiWidth, uiHeight);    lastIdxImg.FillPixels(0);//初始类别设置为0    bool converged = false;//标记K-means是否收敛    int iter = 0;//标记迭代次数    const UINT pixNum = uiWidth * uiHeight;    while(!converged && iter < MAXITER)//最大迭代次数为10    {        int x1, y1, x2, y2;        double l, a, b;        double distCol;        double distxy;        double dist;        minDistImage.FillPixels(DBL_MAX);        for( size_t n = 0; n < seedNum; n++ )        {            //计算以种子为中心的周围区域            y1 = (int)max(0.0,          kseedsy[n]-searchRange);            y2 = (int)min((double)uiHeight, kseedsy[n]+searchRange);            x1 = (int)max(0.0,          kseedsx[n]-searchRange);            x2 = (int)min((double)uiWidth,  kseedsx[n]+searchRange);            for( int y = y1; y < y2; y++ )            {                for( int x = x1; x < x2; x++ )                {                    l = LImg.Pixel(x,y);                    a = AImg.Pixel(x,y);                    b = BImg.Pixel(x,y);                    //计算与类中心的距离，因为比的是大小，所以可以不开根号                    distCol = (l - kseedsl[n])*(l - kseedsl[n]) +                        (a - kseedsa[n])*(a - kseedsa[n]) +                        (b - kseedsb[n])*(b - kseedsb[n]);                    distxy = (x - kseedsx[n])*(x - kseedsx[n]) +                        (y - kseedsy[n])*(y - kseedsy[n]);                    dist = (distCol) + (distxy * spatialFactor);//sqrt(distCol) + sqrt(distxy * spatialFactor);                    if( dist < minDistImage.Pixel(x,y) )                    {                        //如果某点小于与目前标定的类中心的距离则重新更新距离和类别                        minDistImage.Pixel(x,y) = dist;                        idxImg.Pixel(x,y)  = ImageSimpleUInt::PixelType(n);                    }                }            }        }        // Recalculate the centroid and store in the seed values        //重新计算类中心的参数值        centroidl.assign(seedNum, 0);        centroida.assign(seedNum, 0);        centroidb.assign(seedNum, 0);        centroidx.assign(seedNum, 0);        centroidy.assign(seedNum, 0);        clustersize.assign(seedNum, 0);        for (UINT y = 0; y < uiHeight; y ++)        {            for (UINT x = 0; x < uiWidth; x ++)            {                ImageSimpleUInt::PixelType idx = idxImg.Pixel(x,y);                centroidl[idx] += LImg.Pixel(x,y);                centroida[idx] += AImg.Pixel(x,y);                centroidb[idx] += BImg.Pixel(x,y);                centroidx[idx] += x;                centroidy[idx] += y;                clustersize[idx] += 1.0;            }        }        for( UINT k = 0; k < seedNum; k++ )        {            assert(clustersize[k] > 0);            double inv = 1.0 / clustersize[k];            kseedsl[k] = centroidl[k] * inv;            kseedsa[k] = centroida[k] * inv;            kseedsb[k] = centroidb[k] * inv;            kseedsx[k] = centroidx[k] * inv;            kseedsy[k] = centroidy[k] * inv;        }        //Judge convergence        converged = true;        //如果整幅图像所属类别不变，则算法收敛        for (UINT x = 0; x < pixNum; x ++)        {            if (lastIdxImg[x] != idxImg[x])            {                converged = false;                break;            }        }        lastIdxImg = idxImg;        iter ++;    }}int EnforceLabelConnectivity( ImageSimpleUInt &idxImg, const int iPatchSize ){    //  const int dx8[8] = {-1, -1,  0,  1, 1, 1, 0, -1};    //  const int dy8[8] = { 0, -1, -1, -1, 0, 1, 1,  1};    UINT uiWidth = idxImg.Width();    UINT uiHeight = idxImg.Height();    //定义的邻接类型为4邻接    const int dx4[4] = {-1,  0,  1,  0};    const int dy4[4] = { 0, -1,  0,  1};    const int pixNum = uiWidth* uiHeight;    const int AreaThresh = iPatchSize * iPatchSize / 4;    ImageSimpleInt newIdxImg(uiWidth, uiHeight);    newIdxImg.FillPixels(-1);    int label = 0;    int adjlabel = 0;    int* xvec = new int[pixNum];            //this is actually a queue    int* yvec = new int[pixNum];    for( UINT q = 0; q < uiHeight; q++ )    {        for( UINT p = 0; p < uiWidth; p++ )        {            if( newIdxImg.Pixel(p,q) < 0 )  //"< 0 " means unprocessed                //即每次以没有重新定义的类别开始计算连通分量            {                newIdxImg.Pixel(p,q) = label;                //Add current pixel to the queue                xvec[0] = p;                yvec[0] = q;                //Adjacent label for current region, this may be used for region merging                for( int n = 0; n < 4; n++ )                {                    int x = xvec[0] + dx4[n];                    int y = yvec[0] + dy4[n];                    if( (x >= 0 && x < (int)uiWidth) && (y >= 0 && y < (int)uiHeight) )                    {                        //Note, adjacent label for the first(top-left corner) patch is unset, so it's initial value 0.                        if(newIdxImg.Pixel(x,y) >= 0)                            adjlabel = newIdxImg.Pixel(x,y);                    }                }                int count = 1;                for( int c = 0; c < count; c++ )    //count will be updated, so xvec and yvec are queues                {                    for( int n = 0; n < 4; n++ )                    {                        int x = xvec[c] + dx4[n];                        int y = yvec[c] + dy4[n];                        if( (x >= 0 && x < (int)uiWidth) && (y >= 0 && y < (int)uiHeight) )                        {                            if( newIdxImg.Pixel(x,y) < 0 && idxImg.Pixel(x,y) == idxImg.Pixel(p,q) )                            {                                xvec[count] = x;                                yvec[count] = y;                                newIdxImg.Pixel(x,y) = label;                                count++;                            }                        }                    }                }                // If segment size is less then a limit, assign an adjacent label found before, and decrement label count.                //如果连通分量过小，则赋予其周围超像素的类别                if(count <= AreaThresh)                {                    for( int c = 0; c < count; c++ )                    {                        newIdxImg.Pixel(xvec[c], yvec[c]) = adjlabel;                    }                    label--;                }                label++;            }        }    }    //Transfer newIdxImg to idxImg    for (UINT y = 0; y < uiHeight; y ++)    {        for (UINT x = 0; x < uiWidth; x ++)        {            assert(newIdxImg.Pixel(x, y) >= 0);            idxImg.Pixel(x, y) = newIdxImg.Pixel(x, y);        }    }    delete [] xvec;    delete [] yvec;    return label;}int Run_SLIC_GivenPatchNum(ImageSimpleFloat &LImg, ImageSimpleFloat &AImg, ImageSimpleFloat &BImg, unsigned int iPatchNum, float compactness, ImageSimpleUInt &idxImg){    //根据超像素数目及图像大小确定步长    UINT uiWidth = LImg.Width();    UINT uiHeight = LImg.Height();    UINT STEP = UINT(sqrt(float(uiWidth * uiHeight) / iPatchNum) + 0.5f);//计算步长    return Run_SLIC_GivenPatchSize(LImg, AImg, BImg, STEP, compactness, idxImg);}int Run_SLIC_GivenPatchSize(ImageSimpleFloat &LImg, ImageSimpleFloat &AImg, ImageSimpleFloat &BImg, unsigned int uiPatchSize, float compactness, ImageSimpleUInt &idxImg){    const UINT MINSPSIZE = 3;    UINT uiWidth = LImg.Width(), uiHeight = LImg.Height();    uiPatchSize = max(uiPatchSize, MINSPSIZE);    assert(uiPatchSize <= min(uiWidth, uiHeight));    if (idxImg.Width() != uiWidth || idxImg.Height() != uiHeight)    {        idxImg.Create(uiWidth, uiHeight);    }    // initialize seeds    const UINT seedNum_x = UINT(uiWidth / uiPatchSize);    const UINT seedNum_y = UINT(uiHeight / uiPatchSize);    const UINT seedNum = seedNum_x * seedNum_y;//计算种子数    vector<double> kseedsx(seedNum), kseedsy(seedNum), kseedsl(seedNum), kseedsa(seedNum), kseedsb(seedNum);    float step_x = float(uiWidth) / seedNum_x;//计算x的滑动步长    float step_y = float(uiHeight) / seedNum_y;//计算y的滑动步长    assert(step_x >= MINSPSIZE && step_y >= MINSPSIZE);    int n = 0;    for (UINT y = 0; y < seedNum_y; y ++)    {        for (UINT x = 0; x < seedNum_x; x ++)        {            //计算种子的中心位置            kseedsx[n] = step_x * (x + 0.5) + 0.5;            kseedsy[n] = step_y * (y + 0.5) + 0.5;            UINT sx = (UINT)kseedsx[n];            UINT sy = (UINT)kseedsy[n];            //获取种子中心位置的Lab值            kseedsl[n] = LImg.Pixel(sx, sy);            kseedsa[n] = AImg.Pixel(sx, sy);            kseedsb[n] = BImg.Pixel(sx, sy);            n++;        }    }       const double spatialFactor = 1.0 / ( (uiPatchSize/compactness) * (uiPatchSize/compactness) );    //采用Kmeans算法计算超像素    PerformLabXYKMeans(kseedsl, kseedsa, kseedsb, kseedsx, kseedsy, LImg, AImg, BImg, uiPatchSize, spatialFactor, idxImg);    //Assign small patches to its neighbor patch    return EnforceLabelConnectivity(idxImg, uiPatchSize);}

上述两个CPP给出了主要步骤，完整代码见:http://download.csdn.net/detail/zhj_matlab/9672425

2 0