VEC-C之滑窗应用案例-3X3高斯滤波器

来源:互联网 发布:程序员5年职业规划 编辑:程序博客网 时间:2024/05/31 20:51

一,不使用VEC-C版本:

#include "highgui.h"#include "opencv2/opencv.hpp"#include "opencv2/imgproc.hpp"#include <iostream>#include <string>using namespace cv;using namespace std;void gaussian3x3_ref(uchar *p_u8Src, uchar *p_u8Dst, uint u32Rows, uint u32Cols);int rmain(int argc, char *argv[]){const char* imagename = "E:\\test\\ce.jpeg";//从文件中读入图像IplImage* img=cvLoadImage(imagename,0);uchar* src=(uchar*)img->imageData;int width=img->width;int height=img->height;IplImage* dst=cvCreateImage(CvSize(width,height),img->depth,0);uchar* dst_u=(uchar*)dst->imageData;gaussian3x3_ref((uchar*)src, (uchar*)dst_u, width,height);cvShowImage("src", img);cvShowImage("dst", dst);waitKey();return 0;}void gaussian3x3_ref(uchar *p_u8Src, uchar *p_u8Dst, uint u32Rows, uint u32Cols){uint i,j;char s8Kernel[9] = {1,2,1,2,4,2,1,2,1};uchar sat, ps = 4; ushort res;//horizontalfor (j = 0; j < u32Rows; j++){uchar* p_in_u8 = &p_u8Src[j*u32Cols];uchar* p_out_u8 = &p_u8Dst[j*u32Cols];for (i = 0; i <u32Cols; i++){uint acc = 0;acc +=  p_in_u8[-1 - u32Rows] * s8Kernel[0]; acc +=  p_in_u8[ 0 - u32Rows] * s8Kernel[1];acc +=  p_in_u8[ 1 - u32Rows] * s8Kernel[2];acc +=  p_in_u8[-1             ] * s8Kernel[3]; acc +=  p_in_u8[ 0             ] * s8Kernel[4];acc +=  p_in_u8[ 1             ] * s8Kernel[5];acc +=  p_in_u8[-1 + u32Rows] * s8Kernel[6]; acc +=  p_in_u8[ 0 + u32Rows] * s8Kernel[7];acc +=  p_in_u8[ 1 + u32Rows] * s8Kernel[8];res = (acc >> ps) &0xFFFF;sat = (res>255) ? 255 : (uchar)res;p_out_u8[i] = sat;p_in_u8++;}}}
使用VEC-C版本:

#include <opencv2/opencv.hpp>#include <vec-c.h>using namespace std;int hist[256];ushort p_u16DstB0[256*16];ushort p_u16DstB1[256*16];void gaussian3x3(uchar *p_u8Src, uchar *p_u8Dst, uint u32Rows, uint u32Cols);int main(){{short16 inN2,inN3;short16 inN,inN1;short inn[16]={3,0,1,0,1,0,1,0,0,0,0,0,0,0,0};short in[16]={9,4369,0,3,4,5,6,7,8,9,10,11,12,13,14,15};inN=*(short16*)inn;//short16 v0=(short16)vpld(in,inN,inN1,inN2,inN3);vpld(rel,in,inN,inN2,inN3);//ushort coeff[16] = { 1, 2, 1, 0, 2, 4, 2, 0, 1, 2, 1, 0, 0, 0, 0, 0 };//short16 vtemp; //ushort16 vec_weights0;//vec_weights0 = vpld(rel, coeff,vtemp);short p_out_u8[16];vst(inN2,(short*)p_out_u8,(short)0xffff);for(int i=0;i<16;i++){cout<<p_out_u8[i]<<endl;}cout<<endl;short p_out_u[16];vst(inN3,(short*)p_out_u,(short)0xffff);for(int i=0;i<16;i++){cout<<p_out_u[i]<<endl;}getchar();}//for(int i=0;i<256;i++)//hist[i]=0;//for(int i=0;i<256*16;i++)//p_u16DstB0[i]=0;//const char* imagename = "E:\\test\\ce.jpeg";////从文件中读入图像//IplImage* img=cvLoadImage(imagename,0);//uchar* src=(uchar*)img->imageData;//for(int i=0;i<20;i++)//src[i]=1;////int width = img->width;//图片宽度//int height = img->height;//图片高度//IplImage* dst=cvCreateImage(CvSize(width,height),img->depth,0);//uchar* dst_u=(uchar*)dst->imageData;////src[15]=255;////TODO vec-c progress//gaussian3x3((uchar*)src,(uchar*)dst_u,width,height);////显示图像//cvShowImage("src", img);//cvShowImage("dst", dst);////cvSvSaveImage("E:\\test\\cev.jpeg",img);////cv::waitKey();return 0;}void gaussian3x3(uchar *p_u8Src, uchar *p_u8Dst, uint u32Rows, uint u32Cols){ushort coeff[16] = { 1, 2, 1, 0, 2, 4, 2, 0, 1, 2, 1, 0, 0, 0, 0, 0 };uchar32 v0,v1,v2;ushort16 v3,v_coeff;short16 vOff, vtemp; ushort j,i;uint u32OutLoop    = (u32Rows + 15)>> 4; //每次做16个高斯滤波uint16 vacc0;ushort step[16]  = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};v_coeff = *(ushort16*)coeff;vtemp = *(short16*)step; //数组初始化向量ushort vprRightMask, vprMask;/* 2个掩码 */vprMask = 0xffff;//4个f表示全取vprRightMask = 0xffff;if (u32OutLoop != u32Cols >> 4)vprRightMask = (1 << (u32Cols & 15)) - 1;for (i = 0; i <u32OutLoop; i++)//水平{uchar* p_in_u8A = (uchar *)&p_u8Src[-1 - 1 * u32Rows + i * 16];uchar* p_in_u8B = (uchar *)&p_u8Src[-1 + i * 16];uchar* p_in_u8C = (uchar *)&p_u8Src[-1 + 1 * u32Rows + i * 16];uchar* p_out_u8 = (uchar *)&p_u8Dst[i * 16];if (i == u32OutLoop-1)vprMask = vprRightMask;v0 = *(uchar32*)p_in_u8A;v1 = *(uchar32*)p_in_u8B;v2 = *(uchar32*)p_in_u8C;p_in_u8A += 3 * u32Rows;for (j = 0; j < u32Cols; ++j){/*#define SW_CONFIG(init_psh,num_filter,src_offset,coeff_offset,step,pattern)*//*滑窗长度为8,1次移1位,结果为移动的coeff与v0的乘和*/vacc0 = (uint16) vswmpy5(v0, v0, v_coeff, (uint)0);//对v0滑窗,v0后面那个v0只是凑长度,/*将滑窗的结果向量与向量vacc0做内和*d[20:16]=4表示coeff偏移4位*accumulate相当于vacc1 = (uint16) vswmpy5(v1, v1, v_coeff, (uint)4<<16);vacc0=vintrasum(vacc1,vacc0);*/vacc0 = vswmac5(accumulate, v1, v1, v_coeff, (uint)4<<16, vacc0);/*psl表示结果向量使用逻辑移位,也就是每个元素除以2^n,忽略符号*d[5:0]=4表示shift=4,element要除以2^4*d[20:16]=8表示coeff偏移8位*/v3   = (ushort16) vswmac5(psl, v2, v2, v_coeff, (uint)4|8<<16, vacc0);/*以上3步就一次性做了16位的3X3高斯滤波*/vst(sat, v3, (uchar16*)p_out_u8, vprMask);p_out_u8+=u32Rows;v0 = v1;v1 = v2;v2 = *(uchar32*)p_in_u8A;p_in_u8A += u32Rows;}//m}//n}



原创粉丝点击