曲波变换

来源:互联网 发布:hdc电视直播软件 编辑:程序博客网 时间:2024/05/21 09:11


曲波变换部分代码,CUDA代码。



void fdct_wrapping(float2* coeff,int *fdct_width,int *fdct_height,float * pData,int nbscales,int *nbangles,int mDirects,int width,int height,int bigN1,int bigN2)

{


  
int width_t,height_t;
int subl;
int HiSize[2];
int width_wedge,first_row,first_col;
float num;
float M_horiz,M_vert;
int first_wedge_endpoint_vert,length_corner_wedge,slope_wedge_right,mid_line_right,l;
const int mWidth=width;//int(width*0.53f);//bigN2;//
const int mHeight=height;//int(height*0.53f);//bigN1;//


f_ifftshift(pData,Fdata,width,height);//输入数据做平移

//float2Comp<<<height,width>>>(pData,Fdata,width);//float型变成复数
cufftExecC2C(fftPlan,(cufftComplex *)Fdata, (cufftComplex *)Fdata,CUFFT_FORWARD);//对输入数据做傅立叶正变换
////fftshift 往大取整










   //对复数做平移
shiftAndNorm(Fdata,width,height,rsqrtf(width*height));




float M1=height/3.0f;//y
float M2=width/3.0f;//x




    UpdateX<<<bigN1,bigN2>>>(Fdata,inFdata,width,height,M1,M2,bigN2);//对输入变量进行扩展

const int window_length_1=int(2*M1)-int(M1)-1-((height%3)==0);
const int window_length_2=int(2*M2)-int(M2)-1-((width%3)==0);

fdct_InitialWeight<<<1,(window_length_1+1)>>>(wl1_out,wr1_out,window_length_1);
fdct_InitialWeight<<<1,(window_length_2+1)>>>(wl2_out,wr2_out,window_length_2);


//SET_FCOMPLEX_BASE_1;
int lowPassL1=((window_length_1+1)<<1)+(int(M1)<<1)+1;


if((height%3)==0)
{
lowPassL1=lowPassL1+2;
UpdateLowPass<<<1,lowPassL1>>>(wl1_out,wr1_out,LowPass1,(window_length_1+1));
}
else
beLowPass<<<1,lowPassL1>>>(wl1_out,wr1_out,LowPass1,(window_length_1+1));
//这边有问题
int lowPassL2=((window_length_2+1)<<1)+(int(M2)<<1)+1;
if((width%3)==0)
{
lowPassL2=lowPassL2+2;
UpdateLowPass<<<1,lowPassL2>>>(wl2_out,wr2_out,LowPass2,(window_length_2+1));
}
else
beLowPass<<<1,lowPassL2>>>(wl2_out,wr2_out,LowPass2,(window_length_2+1));
InitLowPassFilter<<<lowPassL1,lowPassL2>>>(inFdata,LowPass1,LowPass2);//inFdata--Xlow  //低通滤波


//cudaStream_t    stream;
//cudaStreamCreate( &stream );


clock_t start,finish;
float costtime;


for(int j=(nbscales-1);j>=1;j--)
{




M1=M1/2.0f;
M2=M2/2.0f;


int size=((int(4*M1)<<1)+1)*((int(4*M2)<<1)+1);
int width_tp=((int(4*M2)<<1)+1);
int height_tp=((int(4*M1)<<1)+1);

HiSize[0]=width_tp;
HiSize[1]=height_tp;
cudaMemcpy(inHighFdata,inFdata,size*sizeof(float2),cudaMemcpyDeviceToDevice);


width_t=(int(2*M2)<<1)+1;
height_t=(int(2*M1)<<1)+1;
UpdateXLow<<<height_t,width_t>>>(inFdata,Fdata,M1,M2,width_tp);//output--Fdata(Xlow)压缩图像
UpdateXHigh<<<height_t,width_t>>>(inHighFdata,Fdata,fdct_hipass+j*width*height,M1,M2,width_tp);//output--inHighFdata(Xhi)
LowPassFilter<<<height_t,width_t>>>(Fdata,fdct_LowPass+j*width*height);  //对压缩图进行低通   
cudaMemcpy(inFdata,Fdata,width_t*height_t*sizeof(float2),cudaMemcpyDeviceToDevice);
//cudaMemcpyAsync( inFdata, Fdata,width_t*height_t*sizeof(float2),cudaMemcpyDeviceToDevice);

//SET_FCOMPLEX_BASE;
l=-1;
const int nbquadrants=4;
int nbangles_perquad=(nbangles[j]>>2);
int quadrant=1;
for(quadrant=1;quadrant<=nbquadrants;quadrant++ )
{


bool qM21=(quadrant%2==1);
bool qM20=(quadrant%2==0);
M_horiz=M2*(qM21)+M1*(qM20);
M_vert=M1*(qM21)+M2*(qM20);


l++;
width_wedge=(*(WrapDataWidth+j*mDirects+l));
length_corner_wedge=(*(WrapDataHeight+j*mDirects+l));
first_row=int(4*M_vert)+2-(int((length_corner_wedge+1)>>1)+1*((length_corner_wedge+1)%2!=0))+((length_corner_wedge+1)%2)*((quadrant-2)==(_mod((quadrant-2),2)));
first_col=int(4*M_horiz)+2-(int((width_wedge+1)>>1)+1*((width_wedge+1)%2!=0))+((width_wedge+1)%2)*((quadrant-3)==_mod((quadrant-3),2));


//更新每层上每个角度的图形--Fdata
beWrapData<<<(*(WrapDataHeight+j*mDirects+l)),(*(WrapDataWidth+j*mDirects+l))>>>(Fdata,fdct_wlr+j*mDirects*mWidth*mHeight+l*mWidth*mHeight,inHighFdata,left_line+j*mDirects*height+l*height,first_col,first_row,width_wedge,length_corner_wedge,HiSize[0]);
   




// {
// FILE *fp; 
// float *test;
// fp=fopen("D:\\test2.bin","wb");
// test=(float *)malloc(361*49*sizeof(float));
// //Comp2float<<<49,361>>>(Fdata,wr1_out);
// cudaMemcpy(test,fdct_wlr+j*mDirects*mWidth*mHeight+l*mWidth*mHeight,361*49*sizeof(float),cudaMemcpyDeviceToHost);
// fwrite(test,1,361*49*sizeof(float),fp);
// free(test);
// fclose(fp);
//}

//对wrapped_data_r进行旋转



CpuRotateData(Fdata,wrapped_data_r,-(quadrant-1),width_wedge,length_corner_wedge);


  c_ifftshift(wrapped_data_r,*(fdct_width+j*mDirects+l),*(fdct_height+j*mDirects+l));
cufftExecC2C(fftPlan1[j*32+l], (cufftComplex *)wrapped_data_r, (cufftComplex *)coeff+j*mDirects*mWidth*mHeight+l*mWidth*mHeight,CUFFT_INVERSE);







shiftAndNorm(coeff+j*mDirects*mWidth*mHeight+l*mWidth*mHeight,(*(fdct_width+j*mDirects+l)),(*(fdct_height+j*mDirects+l)),1.0f/sqrtf(*(fdct_width+j*mDirects+l)*(*(fdct_height+j*mDirects+l))));





size=int(4*M_vert)-int(M_vert);
first_row=int(4*M_vert)+2-(int((size+1)>>1)+1*((size+1)%2!=0))+((size+1)%2)*((quadrant-2)==_mod((quadrant-2),2));
start=clock();
for(subl=2;subl<=(nbangles_perquad-1);subl++)
{
l++;
width_wedge=(*(WrapDataWidth+j*mDirects+l));
first_col=int(4*M_horiz)+2-(int((width_wedge+1)>>1)+1*((width_wedge+1)%2!=0))+((width_wedge+1)%2)*((quadrant-3)==_mod((quadrant-3),2));

   beSublWrapData<<<(*(WrapDataHeight+j*mDirects+l)),(*(WrapDataWidth+j*mDirects+l))>>>(Fdata,fdct_wlr+j*mDirects*mWidth*mHeight+l*mWidth*mHeight,inHighFdata,left_line+j*mDirects*height+l*height,first_col,first_row,width_wedge,(*(WrapDataHeight+j*mDirects+l)),HiSize[0]);

CpuRotateData(Fdata,wrapped_data_r,-(quadrant-1),(*(WrapDataWidth+j*mDirects+l)),(*(WrapDataHeight+j*mDirects+l)));

c_ifftshift(wrapped_data_r,*(fdct_width+j*mDirects+l),(*(fdct_height+j*mDirects+l)));
      cufftExecC2C(fftPlan1[j*32+l], (cufftComplex *)wrapped_data_r, (cufftComplex *)coeff+j*mDirects*mWidth*mHeight+l*mWidth*mHeight,CUFFT_INVERSE);
      shiftAndNorm(coeff+j*mDirects*mWidth*mHeight+l*mWidth*mHeight,(*(fdct_width+j*mDirects+l)),(*(fdct_height+j*mDirects+l)),1.0f/sqrtf(*(fdct_width+j*mDirects+l)*(*(fdct_height+j*mDirects+l))));


}

finish=clock();
costtime=(float)(finish-start)/CLOCKS_PER_SEC;
  // Right corner wedge
l++;
width_wedge=(*(WrapDataWidth+j*mDirects+l));
first_row=int(4*M_vert)+2-(int((length_corner_wedge+1)>>1)+1*((length_corner_wedge+1)%2!=0))+((length_corner_wedge+1)%2)*((quadrant-2)==_mod((quadrant-2),2));
first_col=int(4*M_horiz)+2-(int((width_wedge+1)>>1)+1*((width_wedge+1)%2!=0))+((width_wedge+1)%2)*((quadrant-3)==_mod((quadrant-3),2));
beRightWrapData<<<(*(WrapDataHeight+j*mDirects+l)),(*(WrapDataWidth+j*mDirects+l))>>>(Fdata,fdct_wlr+j*mDirects*mWidth*mHeight+l*mWidth*mHeight,inHighFdata,left_line+j*mDirects*height+l*height,first_col,first_row,(*(WrapDataWidth+j*mDirects+l)),(*(WrapDataHeight+j*mDirects+l)),HiSize[0],M_horiz);



CpuRotateData(Fdata,wrapped_data_r,-(quadrant-1),(*(WrapDataWidth+j*mDirects+l)),(*(WrapDataHeight+j*mDirects+l)));
c_ifftshift(wrapped_data_r,*(fdct_width+j*mDirects+l),(*(fdct_height+j*mDirects+l)));


cufftExecC2C(fftPlan1[j*32+l], (cufftComplex *)wrapped_data_r, (cufftComplex *)coeff+j*mDirects*mWidth*mHeight+l*mWidth*mHeight,CUFFT_INVERSE);
shiftAndNorm(coeff+j*mDirects*mWidth*mHeight+l*mWidth*mHeight,(*(fdct_width+j*mDirects+l)),(*(fdct_height+j*mDirects+l)),1.0f/sqrtf(*(fdct_width+j*mDirects+l)*(*(fdct_height+j*mDirects+l))));

            if(quadrant <nbquadrants)
{
CpuRotate(inHighFdata,Fdata,1,HiSize[0],HiSize[1],HiSize,HiSize+1);

cudaMemcpy(inHighFdata,Fdata,HiSize[1]*HiSize[0]*sizeof(float2),cudaMemcpyDeviceToDevice);
 //  cudaMemcpyAsync( inHighFdata,Fdata,HiSize[1]*HiSize[0]*sizeof(float2),cudaMemcpyDeviceToDevice,stream);

//cudaStreamSynchronize( stream );




}

}


}


//cudaStreamDestroy( stream );
c_ifftshift(inFdata,width_t,height_t);
cufftExecC2C(fftPlan1[0], (cufftComplex *)inFdata, (cufftComplex *)coeff,CUFFT_INVERSE);
shiftAndNorm(coeff,width_t,height_t,1.0f/sqrtf(width_t*height_t));


}
原创粉丝点击