[x265] [PATCH] pixel_add_ps_4x4, fixed reading uninitialized pixels

praveen at multicorewareinc.com praveen at multicorewareinc.com
Thu Jan 9 12:21:24 CET 2014


# HG changeset patch
# User Praveen Tiwari
# Date 1389266361 -19800
# Node ID 77d4d14242fa43a3ec33c5e6fbbed327425cb226
# Parent  c4edab8dab65b393ab9d48f7533df554f41ca4fe
pixel_add_ps_4x4, fixed reading uninitialized pixels

diff -r c4edab8dab65 -r 77d4d14242fa source/common/x86/pixeladd8.asm
--- a/source/common/x86/pixeladd8.asm	Tue Jan 07 18:36:17 2014 +0530
+++ b/source/common/x86/pixeladd8.asm	Thu Jan 09 16:49:21 2014 +0530
@@ -302,57 +302,51 @@
     movh     [r0 + r1],      m2
 %else
 INIT_XMM sse4
-cglobal pixel_add_ps_%1x%2, 6, 7, 2, dest, destride, src0, scr1, srcStride0, srcStride1
+cglobal pixel_add_ps_%1x%2, 6, 7, 3, dest, destride, src0, scr1, srcStride0, srcStride1
 
-add         r5,            r5
-
-mov         r6d,           %2/4
+    add         r5,            r5
+    mov         r6d,           %2/4
 
 .loop
+     movd        m0, [r2]
+     movd        m1, [r2 + r4]
+     punpckldq   m0, m1
+     pmovzxbw    m1, m0
 
-    pmovzxbw    m0,            [r2]
-    movh        m1,            [r3]
+     movh        m0, [r3]
+     movh        m2, [r3 + r5]
+     punpcklqdq  m0, m2
 
-    paddw       m0,            m1
-    packuswb    m0,            m0
+     paddw       m1, m0
+     packuswb    m1, m1
 
-    movd        [r0],          m0
+     movd        [r0],      m1
+     pextrd      [r0 + r1], m1, 1
 
-    pmovzxbw    m0,            [r2 + r4]
-    movh        m1,            [r3 + r5]
+     movd        m0, [r2 + 2 * r4]
+     lea         r2, [r2 + 2 * r4]
+     movd        m1, [r2 + r4]
+     punpckldq   m0, m1
+     pmovzxbw    m1, m0
 
-    paddw       m0,            m1
-    packuswb    m0,            m0
+     movh        m0, [r3 + 2 * r5]
+     lea         r3, [r3 + 2 * r5]
+     movh        m2, [r3 + r5]
+     punpcklqdq  m0, m2
 
-    movd        [r0 + r1],     m0
+     paddw       m1, m0
+     packuswb    m1, m1
 
-    pmovzxbw    m0,            [r2 + 2 * r4]
-    movh        m1,            [r3 + 2 * r5]
-
-    paddw       m0,            m1
-    packuswb    m0,            m0
-
-    movd        [r0 + 2 * r1], m0
-
-    lea         r0,            [r0 + 2 * r1]
-    lea         r2,            [r2 + 2 * r4]
-    lea         r3,            [r3 + 2 * r5]
-
-    pmovzxbw    m0,            [r2 + r4]
-    movh        m1,            [r3 + r5]
-
-    paddw       m0,            m1
-    packuswb    m0,            m0
-
-    movd        [r0 + r1],     m0
+     movd        [r0 + 2 * r1], m1
+     lea         r0,            [r0 + 2 * r1]
+     pextrd      [r0 + r1],     m1, 1
 %endif
     dec         r6d
     lea         r0,            [r0 + 2 * r1]
     lea         r2,            [r2 + 2 * r4]
     lea         r3,            [r3 + 2 * r5]
     jnz         .loop
-
-RET
+    RET
 %endmacro
 
 PIXEL_ADD_PS_W4_H4   4,  4


More information about the x265-devel mailing list