[x265] [PATCH] eliminated register copy from BLOCKCOPY_PS_W16_H4 macro

praveen at multicorewareinc.com praveen at multicorewareinc.com
Mon Nov 11 12:14:54 CET 2013


# HG changeset patch
# User Praveen Tiwari
# Date 1384168485 -19800
# Node ID bce503510c7262f2e12583b57b2a03370cecd33a
# Parent  a4198e18262ff5ce0655fa322e1ccdb69a44f933
eliminated register copy from BLOCKCOPY_PS_W16_H4 macro

diff -r a4198e18262f -r bce503510c72 source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm	Mon Nov 11 16:37:51 2013 +0530
+++ b/source/common/x86/blockcopy8.asm	Mon Nov 11 16:44:45 2013 +0530
@@ -1838,19 +1838,19 @@
 
 .loop
       movu       m1,                 [r2]
-      punpcklbw  m2,                 m1,            m0
+      pmovzxbw   m2,                 m1
       movu       [r0],               m2
       punpckhbw  m1,                 m0
       movu       [r0 + 16],          m1
 
       movu       m1,                 [r2 + r3]
-      punpcklbw  m2,                 m1,            m0
+      pmovzxbw   m2,                 m1
       movu       [r0 + r1],          m2
       punpckhbw  m1,                 m0
       movu       [r0 + r1 + 16],     m1
 
       movu       m1,                 [r2 + 2 * r3]
-      punpcklbw  m2,                 m1,            m0
+      pmovzxbw   m2,                 m1
       movu       [r0 + 2 * r1],      m2
       punpckhbw  m1,                 m0
       movu       [r0 + 2 * r1 + 16], m1
@@ -1859,7 +1859,7 @@
       lea        r2,                 [r2 + 2 * r3]
 
       movu       m1,                 [r2 + r3]
-      punpcklbw  m2,                 m1,            m0
+      pmovzxbw   m2,                 m1
       movu       [r0 + r1],          m2
       punpckhbw  m1,                 m0
       movu       [r0 + r1 + 16],     m1


More information about the x265-devel mailing list