[x265] [PATCH] eliminated register copy from BLOCKCOPY_PS_W16_H4 macro
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Mon Nov 11 12:14:54 CET 2013
# HG changeset patch
# User Praveen Tiwari
# Date 1384168485 -19800
# Node ID bce503510c7262f2e12583b57b2a03370cecd33a
# Parent a4198e18262ff5ce0655fa322e1ccdb69a44f933
eliminated register copy from BLOCKCOPY_PS_W16_H4 macro
diff -r a4198e18262f -r bce503510c72 source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm Mon Nov 11 16:37:51 2013 +0530
+++ b/source/common/x86/blockcopy8.asm Mon Nov 11 16:44:45 2013 +0530
@@ -1838,19 +1838,19 @@
.loop
movu m1, [r2]
- punpcklbw m2, m1, m0
+ pmovzxbw m2, m1
movu [r0], m2
punpckhbw m1, m0
movu [r0 + 16], m1
movu m1, [r2 + r3]
- punpcklbw m2, m1, m0
+ pmovzxbw m2, m1
movu [r0 + r1], m2
punpckhbw m1, m0
movu [r0 + r1 + 16], m1
movu m1, [r2 + 2 * r3]
- punpcklbw m2, m1, m0
+ pmovzxbw m2, m1
movu [r0 + 2 * r1], m2
punpckhbw m1, m0
movu [r0 + 2 * r1 + 16], m1
@@ -1859,7 +1859,7 @@
lea r2, [r2 + 2 * r3]
movu m1, [r2 + r3]
- punpcklbw m2, m1, m0
+ pmovzxbw m2, m1
movu [r0 + r1], m2
punpckhbw m1, m0
movu [r0 + r1 + 16], m1
More information about the x265-devel
mailing list