[x265] [PATCH] fix SATD32x32 16bits cumulate sum overflow (x86 version only)
Min Chen
chenm003 at 163.com
Fri Jan 24 11:10:21 CET 2014
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1390558213 -28800
# Node ID f5692a4f0af668f35f502402c7e6995de2d623d2
# Parent 23c65133c5553c07fe5e53f9ae87dcb07fab548f
fix SATD32x32 16bits cumulate sum overflow (x86 version only)
diff -r 23c65133c555 -r f5692a4f0af6 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Thu Jan 23 20:41:05 2014 -0600
+++ b/source/common/x86/pixel-a.asm Fri Jan 24 18:10:13 2014 +0800
@@ -1361,43 +1361,41 @@
SATD_END_SSE2 m6, m7
%else
cglobal pixel_satd_32x32, 4,7,8,0-gprsize ;if !WIN64
-
SATD_START_SSE2 m6, m7
mov r6, r0
mov [rsp], r2
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
-%if HIGH_BIT_DEPTH
- pxor m7, m7
-%endif
- SATD_ACCUM m6, m0, m7
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
lea r0, [r6 + 8*SIZEOF_PIXEL]
mov r2, [rsp]
add r2, 8*SIZEOF_PIXEL
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- SATD_ACCUM m6, m0, m7
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
lea r0, [r6 + 16*SIZEOF_PIXEL]
mov r2, [rsp]
add r2, 16*SIZEOF_PIXEL
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- SATD_ACCUM m6, m0, m7
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
lea r0, [r6 + 24*SIZEOF_PIXEL]
mov r2, [rsp]
add r2, 24*SIZEOF_PIXEL
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- call pixel_satd_8x8_internal
- SATD_END_SSE2 m6, m7
-
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ call pixel_satd_8x8_internal2
+ pxor m7, m7
+ movhlps m7, m6
+ paddd m6, m7
+ pshufd m7, m6, 1
+ paddd m6, m7
+ movd eax, m6
+ RET
%endif
%if WIN64
More information about the x265-devel
mailing list