Assembly x64 Intro - SSE2 IDCT I16X16 DC
来源:互联网 发布:杨君优化人生全本 编辑:程序博客网 时间:2024/05/29 10:41
/*
* pfIDctI16x16Dc: do luma idct of an MB for I16x16 mode, when only dc value are non-zero
*/
void WelsIDctRecI16x16Dc_c (uint8_t* pRec, int32_t iStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDctDc)
{
int32_t i, j;
for (i = 0; i < 16; i ++) {
for (j = 0; j < 16; j++) {
pRec[j] = WelsClip1 (pPred[j] + ((pDctDc[ (i & 0x0C) + (j >> 2)] + 32) >> 6));
}
pRec += iStride;
pPred += iPredStride;
}
}
;***********************************************************************
; void WelsIDctRecI16x16Dc_sse2(uint8_t *rec, int32_t stride, uint8_t *pred, int32_t pred_stride, int16_t *dct_dc)
;***********************************************************************
WELS_EXTERN WelsIDctRecI16x16Dc_sse2
%assign push_num 0
LOAD_5_PARA
PUSH_XMM 8
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
pxor xmm7, xmm7
WELS_DW32 xmm6
SSE2_Load8DC xmm0, xmm1, xmm2, xmm3, xmm6, [r4]
SSE2_StoreDiff4x8p xmm0, xmm1, xmm5, xmm7, r0, r2, r1, r3
lea r0, [r0 + 2 * r1]
lea r2, [r2 + 2 * r3]
SSE2_StoreDiff4x8p xmm0, xmm1, xmm5, xmm7, r0, r2, r1, r3
lea r0, [r0 + 2 * r1]
lea r2, [r2 + 2 * r3]
SSE2_StoreDiff4x8p xmm2, xmm3, xmm5, xmm7, r0, r2, r1, r3
lea r0, [r0 + 2 * r1]
lea r2, [r2 + 2 * r3]
SSE2_StoreDiff4x8p xmm2, xmm3, xmm5, xmm7, r0, r2, r1, r3
SSE2_Load8DC xmm0, xmm1, xmm2, xmm3, xmm6, [r4 + 16]
lea r0, [r0 + 2 * r1]
lea r2, [r2 + 2 * r3]
SSE2_StoreDiff4x8p xmm0, xmm1, xmm5, xmm7, r0, r2, r1, r3
lea r0, [r0 + 2 * r1]
lea r2, [r2 + 2 * r3]
SSE2_StoreDiff4x8p xmm0, xmm1, xmm5, xmm7, r0, r2, r1, r3
lea r0, [r0 + 2 * r1]
lea r2, [r2 + 2 * r3]
SSE2_StoreDiff4x8p xmm2, xmm3, xmm5, xmm7, r0, r2, r1, r3
lea r0, [r0 + 2 * r1]
lea r2, [r2 + 2 * r3]
SSE2_StoreDiff4x8p xmm2, xmm3, xmm5, xmm7, r0, r2, r1, r3
POP_XMM
LOAD_5_PARA_POP
ret
- Assembly x64 Intro - SSE2 IDCT I16X16 DC
- Assembly x64 Intro - SSE2 IDCT
- Assembly x64 Intro - SSE2 8DC Load
- Assembly x64 Intro - SSE2 Hadamard 4 DC
- Assembly x64 Intro - SSE2 Copy16Times
- Assembly x64 Intro - SSE2 Copy8Times
- Assembly x64 Intro - SSE2 DCT
- Assembly x64 Intro - SSE2 4x4D Transpose
- Assembly x64 Intro - SSE2 2x4x4W Transpose
- Assembly x64 Intro - SSE2 4x8 Load
- Assembly x64 Intro - SSE2 4x8 Store
- Assembly x64 Intro - SSE2 Diff 8 Load
- Assembly x64 Intro - SSE2 Diff 4x8 Store
- Assembly x64 Intro - Arrays
- Assembly x64 Intro - Nasm Example
- Assembly x64 Intro - Nasm Syntax
- Assembly x64 Intro - Arith Operate
- Assembly x64 Intro - Loop Example
- linux 关机重启 shutdown和reboot的区别
- HDOJ 1563 Find your present!
- ucosIII 系统任务
- Python包安装——mayavi安装
- 第10周 【项目2 - 二叉树遍历的递归算法】
- Assembly x64 Intro - SSE2 IDCT I16X16 DC
- 创业者融资攻略:什么时候可以“视金钱如粪土”?
- [转]在SpringMVC中获取request对象的几种方式
- 【代码笔记】iOS-传身份证号码可返回生日字符串
- 第16周项目1-选择排序之堆排序(6)
- 第14周项目1-(2)验证分块查找算法
- LeetCode-7-Reverse Integer(Nmber-Strng)-Easy
- Joomla获取某项目录的地址
- 乔布斯:人生的秘密