[x265] [PATCH] blockcopy_pp_6x16 asm code optimization: eliminated branch instructions and optimize register uses

praveen at multicorewareinc.com praveen at multicorewareinc.com
Thu Feb 5 12:18:43 CET 2015


# HG changeset patch
# User Praveen Tiwari
# Date 1423135116 -19800
# Node ID 499eddf5c1e4dfcb8447d65cb0b48d633b3660a5
# Parent  d6c384f3625984a57335c679dbc4fcf37fd4f072
blockcopy_pp_6x16 asm code optimization: eliminated branch instructions and optimize register uses

improved, 411.14c -> 396.59c

diff -r d6c384f36259 -r 499eddf5c1e4 source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm	Thu Feb 05 16:27:13 2015 +0530
+++ b/source/common/x86/blockcopy8.asm	Thu Feb 05 16:48:36 2015 +0530
@@ -305,9 +305,9 @@
 ; void blockcopy_pp_6x16(pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride)
 ;-----------------------------------------------------------------------------
 INIT_XMM sse2
-cglobal blockcopy_pp_6x16, 4, 7, 2
-    mov     r6d,    16/2
-.loop:
+cglobal blockcopy_pp_6x16, 4, 6, 2
+
+%rep 8
     movd    m0,     [r2]
     mov     r4w,    [r2 + 4]
     movd    m1,     [r2 + r3]
@@ -318,8 +318,7 @@
     movd    [r0 + r1],      m1
     mov     [r0 + r1 + 4],  r5w
     lea     r0,     [r0 + r1 * 2]
-    dec     r6d
-    jnz     .loop
+%endrep
     RET
 
 


More information about the x265-devel mailing list