图像预处理SSE加速,90度旋转和垂直镜像

来源:互联网 发布:gcc windows下载 编辑:程序博客网 时间:2024/05/22 16:41
</pre><p>图像预处理操作的90度整倍数旋转、镜像,都可由以下两个基本操作组合得出,测试比单点操作速度提升约3-5倍。</p><p></p><p></p><pre code_snippet_id="1889082" snippet_file_name="blog_20160920_3_6201408" name="code" class="cpp">//镜像-1*16单字节矩阵 void MirrorMatrix(__m128i *pSrc_tmp ,__m128i *pDst_tmp , __m128i sort){_mm_storeu_si128(pDst_tmp, _mm_shuffle_epi8(_mm_loadu_si128(pSrc_tmp),sort));}

//转置-16*16单字节矩阵 void TransposeMatrix(__m128i** pSrc_tmp , __m128i** pDst_tmp){int i,j,k,bitcount = 16;__m128i src[16],dest[16];//对应原始地址的数据__m128i tlh1[16];//前8位l,后8位h == tlh3再用此地址__m128i tlh2[16];//前4位l_l,后4位l_h,后4位h_l,后4位h_hfor (i = 0;i<16;i++){src[i] = _mm_loadu_si128(pSrc_tmp[i]); }for (i = 0;i<8;i++){tlh1[i] = _mm_unpacklo_epi8(src[i*2],src[i*2+1]);tlh1[8+i] = _mm_unpackhi_epi8(src[i*2],src[i*2+1]);}k = 4;for (i = 0;i<k;i++){for (j = 0;j<2;j++){tlh2[2*j*k+i] = _mm_unpacklo_epi16(tlh1[2*j*k+i*2],tlh1[2*j*k+i*2+1]);tlh2[(2*j+1)*k+i] = _mm_unpackhi_epi16(tlh1[2*j*k+i*2],tlh1[2*j*k+i*2+1]);}}k = 2;for (i = 0;i<k;i++){for (j = 0;j<4;j++){tlh1[2*j*k+i] = _mm_unpacklo_epi32(tlh2[2*j*k+i*2],tlh2[2*j*k+i*2+1]);tlh1[(2*j+1)*k+i] = _mm_unpackhi_epi32(tlh2[2*j*k+i*2],tlh2[2*j*k+i*2+1]);}}for (j = 0;j<8;j++){dest[2*j] = _mm_unpacklo_epi64(tlh1[2*j],tlh1[2*j+1]);dest[2*j+1] = _mm_unpackhi_epi64(tlh1[2*j],tlh1[2*j+1]);}for (i = 0;i<16;i++){_mm_storeu_si128(pDst_tmp[i], dest[i]);}}

0 0
原创粉丝点击