Java中调用pyrMeanShiftFiltering函数，对meanshift函数解释

来源：互联网发布：剑侠情缘进阶数据编辑：程序博客网时间：2024/05/17 00:58

首先看看源函数：

<span style="font-size:18px;"><span style="font-family:Times New Roman;font-size:18px;">public static void pyrMeanShiftFiltering(Mat src, Mat dst, double sp, double sr)    {                pyrMeanShiftFiltering_1(src.nativeObj, dst.nativeObj, sp, sr);                return;    }</span></span>

参数解释：

src：输入的8-比特,3-信道图象

dst：和源图象相同大小,相同格式的输出图象.
sp：空间窗的半径（The spatial window radius.）
sr：色彩窗的半径（The color window radius.）

opencv中的源码

进入函数pyrMeanShiftFiltering中看到源码是c++写的：

<span style="font-size:18px;"><span style="font-size:18px;">// C++:  void pyrMeanShiftFiltering(Mat src, Mat& dst, double sp, double sr, int maxLevel = 1, TermCriteria termcrit = TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS,5,1))    private static native void pyrMeanShiftFiltering_0(long src_nativeObj, long dst_nativeObj, double sp, double sr, int maxLevel, int termcrit_type, int termcrit_maxCount, double termcrit_epsilon);    private static native void pyrMeanShiftFiltering_1(long src_nativeObj, long dst_nativeObj, double sp, double sr);</span></span>

于是到opencv源码包中找到meanshift的源码imageSegmentation.cpp如下：

<span style="font-size:18px;">/** * @function Watershed_and_Distance_Transform.cpp * @brief Sample code showing how to segment overlapping objects using Laplacian filtering, in addition to Watershed and Distance Transformation * @author OpenCV Team */#include <opencv2/opencv.hpp>#include <iostream>using namespace std;using namespace cv;int main(int, char** argv){//! [load_image]    // Load the image    Mat src = imread(argv[1]);    // Check if everything was fine    if (!src.data)        return -1;    // Show source image    imshow("Source Image", src);//! [load_image]//! [black_bg]    // Change the background from white to black, since that will help later to extract    // better results during the use of Distance Transform    for( int x = 0; x < src.rows; x++ ) {      for( int y = 0; y < src.cols; y++ ) {          if ( src.at<Vec3b>(x, y) == Vec3b(255,255,255) ) {            src.at<Vec3b>(x, y)[0] = 0;            src.at<Vec3b>(x, y)[1] = 0;            src.at<Vec3b>(x, y)[2] = 0;          }        }    }    // Show output image    imshow("Black Background Image", src);//! [black_bg]//! [sharp]    // Create a kernel that we will use for accuting/sharpening our image    Mat kernel = (Mat_<float>(3,3) <<            1,  1, 1,            1, -8, 1,            1,  1, 1); // an approximation of second derivative, a quite strong kernel    // do the laplacian filtering as it is    // well, we need to convert everything in something more deeper then CV_8U    // because the kernel has some negative values,    // and we can expect in general to have a Laplacian image with negative values    // BUT a 8bits unsigned int (the one we are working with) can contain values from 0 to 255    // so the possible negative number will be truncated    Mat imgLaplacian;    Mat sharp = src; // copy source image to another temporary one    filter2D(sharp, imgLaplacian, CV_32F, kernel);    src.convertTo(sharp, CV_32F);    Mat imgResult = sharp - imgLaplacian;    // convert back to 8bits gray scale    imgResult.convertTo(imgResult, CV_8UC3);    imgLaplacian.convertTo(imgLaplacian, CV_8UC3);    // imshow( "Laplace Filtered Image", imgLaplacian );    imshow( "New Sharped Image", imgResult );//! [sharp]    src = imgResult; // copy back//! [bin]    // Create binary image from source image    Mat bw;    cvtColor(src, bw, CV_BGR2GRAY);    threshold(bw, bw, 40, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);    imshow("Binary Image", bw);//! [bin]//! [dist]    // Perform the distance transform algorithm    Mat dist;    distanceTransform(bw, dist, CV_DIST_L2, 3);    // Normalize the distance image for range = {0.0, 1.0}    // so we can visualize and threshold it    normalize(dist, dist, 0, 1., NORM_MINMAX);    imshow("Distance Transform Image", dist);//! [dist]//! [peaks]    // Threshold to obtain the peaks    // This will be the markers for the foreground objects    threshold(dist, dist, .4, 1., CV_THRESH_BINARY);    // Dilate a bit the dist image    Mat kernel1 = Mat::ones(3, 3, CV_8UC1);    dilate(dist, dist, kernel1);    imshow("Peaks", dist);//! [peaks]//! [seeds]    // Create the CV_8U version of the distance image    // It is needed for findContours()    Mat dist_8u;    dist.convertTo(dist_8u, CV_8U);    // Find total markers    vector<vector<Point> > contours;    findContours(dist_8u, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);    // Create the marker image for the watershed algorithm    Mat markers = Mat::zeros(dist.size(), CV_32SC1);    // Draw the foreground markers    for (size_t i = 0; i < contours.size(); i++)        drawContours(markers, contours, static_cast<int>(i), Scalar::all(static_cast<int>(i)+1), -1);    // Draw the background marker    circle(markers, Point(5,5), 3, CV_RGB(255,255,255), -1);    imshow("Markers", markers*10000);//! [seeds]//! [watershed]    // Perform the watershed algorithm    watershed(src, markers);    Mat mark = Mat::zeros(markers.size(), CV_8UC1);    markers.convertTo(mark, CV_8UC1);    bitwise_not(mark, mark);//    imshow("Markers_v2", mark); // uncomment this if you want to see how the mark                                  // image looks like at that point    // Generate random colors    vector<Vec3b> colors;    for (size_t i = 0; i < contours.size(); i++)    {        int b = theRNG().uniform(0, 255);        int g = theRNG().uniform(0, 255);        int r = theRNG().uniform(0, 255);        colors.push_back(Vec3b((uchar)b, (uchar)g, (uchar)r));    }    // Create the result image    Mat dst = Mat::zeros(markers.size(), CV_8UC3);    // Fill labeled objects with random colors    for (int i = 0; i < markers.rows; i++)    {        for (int j = 0; j < markers.cols; j++)        {            int index = markers.at<int>(i,j);            if (index > 0 && index <= static_cast<int>(contours.size()))                dst.at<Vec3b>(i,j) = colors[index-1];            else                dst.at<Vec3b>(i,j) = Vec3b(0,0,0);        }    }    // Visualize the final image    imshow("Final Result", dst);//! [watershed]    waitKey(0);    return 0;}</span>

样例：

<span style="font-size:18px;">cvPyrMeanShiftFiltering(srcImage,dstImage,3,7,0,cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS,5,1));*/CV_IMPL voidcvPyrMeanShiftFiltering( const CvArr* srcarr, CvArr* dstarr,                          double sp0, double sr, int max_level,                         CvTermCriteria termcrit ){    const int cn = 3;    const int MAX_LEVELS = 8;    CvMat* src_pyramid[MAX_LEVELS+1];    CvMat* dst_pyramid[MAX_LEVELS+1];    CvMat* mask0 = 0;    int i, j, level;    //uchar* submask = 0;    #define cdiff(ofs0) (tab[c0-dptr[ofs0]+255] + \        tab[c1-dptr[(ofs0)+1]+255] + tab[c2-dptr[(ofs0)+2]+255] >= isr22)    memset( src_pyramid, 0, sizeof(src_pyramid) );    memset( dst_pyramid, 0, sizeof(dst_pyramid) );        CV_FUNCNAME( "cvPyrMeanShiftFiltering" );    __BEGIN__;    double sr2 = sr * sr;    int isr2 = cvRound(sr2), isr22 = MAX(isr2,16);    int tab[768];    CvMat sstub0, *src0;    CvMat dstub0, *dst0;    CV_CALL( src0 = cvGetMat( srcarr, &sstub0 ));    CV_CALL( dst0 = cvGetMat( dstarr, &dstub0 ));    if( CV_MAT_TYPE(src0->type) != CV_8UC3 )        CV_ERROR( CV_StsUnsupportedFormat, "Only 8-bit, 3-channel images are supported" );        if( !CV_ARE_TYPES_EQ( src0, dst0 ))        CV_ERROR( CV_StsUnmatchedFormats, "The input and output images must have the same type" );    if( !CV_ARE_SIZES_EQ( src0, dst0 ))        CV_ERROR( CV_StsUnmatchedSizes, "The input and output images must have the same size" );    if( (unsigned)max_level > (unsigned)MAX_LEVELS )        CV_ERROR( CV_StsOutOfRange, "The number of pyramid levels is too large or negative" );    if( !(termcrit.type & CV_TERMCRIT_ITER) )        termcrit.max_iter = 5;    termcrit.max_iter = MAX(termcrit.max_iter,1);    termcrit.max_iter = MIN(termcrit.max_iter,100);    if( !(termcrit.type & CV_TERMCRIT_EPS) )        termcrit.epsilon = 1.f;    termcrit.epsilon = MAX(termcrit.epsilon, 0.f);    for( i = 0; i < 768; i++ )        tab[i] = (i - 255)*(i - 255);    // 1. construct pyramid    src_pyramid[0] = src0;    dst_pyramid[0] = dst0; /*    for( level = 1; level <= max_level; level++ )    {        CV_CALL( src_pyramid[level] = cvCreateMat( (src_pyramid[level-1]->rows+1)/2,                        (src_pyramid[level-1]->cols+1)/2, src_pyramid[level-1]->type ));        CV_CALL( dst_pyramid[level] = cvCreateMat( src_pyramid[level]->rows,                        src_pyramid[level]->cols, src_pyramid[level]->type ));        CV_CALL( cvPyrDown( src_pyramid[level-1], src_pyramid[level] ));        //CV_CALL( cvResize( src_pyramid[level-1], src_pyramid[level], CV_INTER_AREA ));    } */    CV_CALL( mask0 = cvCreateMat( src0->rows, src0->cols, CV_8UC1 ));    //CV_CALL( submask = (uchar*)cvAlloc( (sp+2)*(sp+2) ));    // 2. apply meanshift, starting from the pyramid top (i.e. the smallest layer)    for( level = max_level; level >= 0; level-- )    {        CvMat* src = src_pyramid[level];        CvSize size = cvGetMatSize(src);        uchar* sptr = src->data.ptr;        int sstep = src->step; /* 以字节为单位的行数据长度 */        uchar* mask = 0;        int mstep = 0;        uchar* dptr;        int dstep;        float sp = (float)(sp0 / (1 << level));        sp = MAX( sp, 1 );//这里保证了sp最小也为1，也就保证了下面进行窗口计算时，窗口大小最小也为（1*2+1）*（1*2+1）  //由于max_level的值为0，所以下面的代码不执行        //if( level < max_level )        //{        //    CvSize size1 = cvGetMatSize(dst_pyramid[level+1]);        //    CvMat m = cvMat( size.height, size.width, CV_8UC1, mask0->data.ptr );        //    dstep = dst_pyramid[level+1]->step;        //    dptr = dst_pyramid[level+1]->data.ptr + dstep + cn;        //    mstep = m.step;        //    mask = m.data.ptr + mstep;        //    //cvResize( dst_pyramid[level+1], dst_pyramid[level], CV_INTER_CUBIC );        //    cvPyrUp( dst_pyramid[level+1], dst_pyramid[level] );        //    cvZero( &m );        //    for( i = 1; i < size1.height-1; i++, dptr += dstep - (size1.width-2)*3, mask += mstep*2 )        //    {        //        for( j = 1; j < size1.width-1; j++, dptr += cn )        //        {        //            int c0 = dptr[0], c1 = dptr[1], c2 = dptr[2];        //            mask[j*2 - 1] = cdiff(-3) || cdiff(3) || cdiff(-dstep-3) || cdiff(-dstep) ||        //                cdiff(-dstep+3) || cdiff(dstep-3) || cdiff(dstep) || cdiff(dstep+3);        //        }        //    }        //    cvDilate( &m, &m, 0, 1 );        //    mask = m.data.ptr;        //}        dptr = dst_pyramid[level]->data.ptr;        dstep = dst_pyramid[level]->step;  //一个像素占三个字节,下面的循环对每一个象元进行处理，由于是对三个波段的数据进行  //处理，所以每个象元占三个字节，保存三个8位的象元值        for( i = 0; i < size.height; i++, sptr += sstep - size.width*3,                                        dptr += dstep - size.width*3, //防止数据的遗漏矩阵，每处理一行数据对齐一次 如果width是4的倍数，                                                                        //则不会出现这种情况，分析代码时可假设                                           mask += mstep )               //是这样的，方便分析 注意for循环的执行顺序         {               for( j = 0; j < size.width; j++, sptr += 3, dptr += 3 )            {        //x0记录当前处理象元的列号，y0记录当前处理象元的行号                int x0 = j, y0 = i, x1, y1, iter;                int c0, c1, c2;                //if( mask && !mask[j] )                //    continue;                c0 = sptr[0], c1 = sptr[1], c2 = sptr[2];   //记录第一个处理象元的三个分量（RGB）                // iterate meanshift procedure                for( iter = 0; iter < termcrit.max_iter; iter++ ) //所谓的MeanShift算法迭代核                {                    uchar* ptr;                    int x, y, count = 0;                    int minx, miny, maxx, maxy;                    int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;                    double icount;                    int stop_flag;                    //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)                    minx = cvRound(x0 - sp); minx = MAX(minx, 0);                    miny = cvRound(y0 - sp); miny = MAX(miny, 0);                    maxx = cvRound(x0 + sp); maxx = MIN(maxx, size.width-1);                    maxy = cvRound(y0 + sp); maxy = MIN(maxy, size.height-1);     //根据上面的代码可以得到窗口大小为：(2*sp+1）*（2*sp+1）,当统计到达影像边界时，     //重新考虑，这就是上面代码MAX和MIN两个函数所控制的内容                     //sptr记录了当前处理象元的指针     //ptr记录了以当前处理象元为中心的处理窗口中的第一个象元指针                         ptr = sptr + (miny - i)*sstep + (minx - j)*3;     /*double sr2 = sr * sr;     int isr2 = cvRound(sr2), isr22 = MAX(isr2,16);     for( i = 0; i < 768; i++ )     tab[i] = (i - 255)*(i - 255);     */     /*ptr += sstep - (maxx-minx+1)*3该语句的含义是处理完     指定窗口中一行数据时，使窗口的指针指向窗口中的下一行的     第一个数据     */                    for( y = miny; y <= maxy; y++, ptr += sstep - (maxx-minx+1)*3 )                    {                        int row_count = 0;                        x = minx;                      /*  for( ; x + 3 <= maxx; x += 4, ptr += 12 ) //窗口中的每一行分两部分循环                        {                                  int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )                            {                                                        s0 += t0; s1 += t1; s2 += t2;                                sx += x; row_count++;                            }                            t0 = ptr[3], t1 = ptr[4], t2 = ptr[5];                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )                            {                                                        s0 += t0; s1 += t1; s2 += t2;                                sx += x+1; row_count++;                            }                            t0 = ptr[6], t1 = ptr[7], t2 = ptr[8];                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )                            {                                                        s0 += t0; s1 += t1; s2 += t2;                                sx += x+2; row_count++;                            }                            t0 = ptr[9], t1 = ptr[10], t2 = ptr[11];                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )                            {                                                        s0 += t0; s1 += t1; s2 += t2;                                sx += x+3; row_count++;                            }                        } //窗口一行第一部分计算结束*/                                                for( ; x <= maxx; x++, ptr += 3 )                        {                                  int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )                            {                                                        s0 += t0; s1 += t1; s2 += t2;                                sx += x; row_count++;                            }                        } //窗口一行第二部分计算结束                        count += row_count; //count记录了窗口中所有符合条件的样本点数目                        sy += y*row_count;                    } //窗口循环结束                                if( count == 0 )                        break;                    icount = 1./count; //1.表示把1转换为浮点类型                    x1 = cvRound(sx*icount);                    y1 = cvRound(sy*icount);                    s0 = cvRound(s0*icount);                    s1 = cvRound(s1*icount);                    s2 = cvRound(s2*icount);                            stop_flag = x0 == x1 && y0 == y1 || abs(x1-x0) + abs(y1-y0) +                        tab[s0 - c0 + 255] + tab[s1 - c1 + 255] +                        tab[s2 - c2 + 255] <= termcrit.epsilon;                                    x0 = x1; y0 = y1;                    c0 = s0; c1 = s1; c2 = s2;                    if( stop_flag )                        break;                } //针对这个窗口的迭代结束                dptr[0] = (uchar)c0;                dptr[1] = (uchar)c1;                dptr[2] = (uchar)c2;            }        }    }      __END__;    for( i = 1; i <= MAX_LEVELS; i++ )    {        cvReleaseMat( &src_pyramid[i] );        cvReleaseMat( &dst_pyramid[i] );    }    cvReleaseMat( &mask0 );}</span>

0 0