[x265] [PATCH] fix SATD32x32 16bits cumulate sum overflow (x86 version only)

Min Chen chenm003 at 163.com
Fri Jan 24 11:10:21 CET 2014


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1390558213 -28800
# Node ID f5692a4f0af668f35f502402c7e6995de2d623d2
# Parent  23c65133c5553c07fe5e53f9ae87dcb07fab548f
fix SATD32x32 16bits cumulate sum overflow (x86 version only)

diff -r 23c65133c555 -r f5692a4f0af6 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm	Thu Jan 23 20:41:05 2014 -0600
+++ b/source/common/x86/pixel-a.asm	Fri Jan 24 18:10:13 2014 +0800
@@ -1361,43 +1361,41 @@
     SATD_END_SSE2 m6, m7
 %else
 cglobal pixel_satd_32x32, 4,7,8,0-gprsize   ;if !WIN64
-
     SATD_START_SSE2 m6, m7
     mov r6, r0
     mov [rsp], r2
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-%if HIGH_BIT_DEPTH
-    pxor       m7, m7
-%endif
-    SATD_ACCUM m6, m0, m7
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
     lea r0, [r6 + 8*SIZEOF_PIXEL]
     mov r2, [rsp]
     add r2, 8*SIZEOF_PIXEL
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    SATD_ACCUM m6, m0, m7
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
     lea r0, [r6 + 16*SIZEOF_PIXEL]
     mov r2, [rsp]
     add r2, 16*SIZEOF_PIXEL
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    SATD_ACCUM m6, m0, m7
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
     lea r0, [r6 + 24*SIZEOF_PIXEL]
     mov r2, [rsp]
     add r2, 24*SIZEOF_PIXEL
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    SATD_END_SSE2 m6, m7
-
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    pxor    m7, m7
+    movhlps m7, m6
+    paddd   m6, m7
+    pshufd  m7, m6, 1
+    paddd   m6, m7
+    movd   eax, m6
+    RET
 %endif
 
 %if WIN64



More information about the x265-devel mailing list