[x265] [PATCH] blockcopy_pp_6x16 asm code optimization: eliminated branch instructions and optimize register uses
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Thu Feb 5 12:18:43 CET 2015
# HG changeset patch
# User Praveen Tiwari
# Date 1423135116 -19800
# Node ID 499eddf5c1e4dfcb8447d65cb0b48d633b3660a5
# Parent d6c384f3625984a57335c679dbc4fcf37fd4f072
blockcopy_pp_6x16 asm code optimization: eliminated branch instructions and optimize register uses
improved, 411.14c -> 396.59c
diff -r d6c384f36259 -r 499eddf5c1e4 source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm Thu Feb 05 16:27:13 2015 +0530
+++ b/source/common/x86/blockcopy8.asm Thu Feb 05 16:48:36 2015 +0530
@@ -305,9 +305,9 @@
; void blockcopy_pp_6x16(pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride)
;-----------------------------------------------------------------------------
INIT_XMM sse2
-cglobal blockcopy_pp_6x16, 4, 7, 2
- mov r6d, 16/2
-.loop:
+cglobal blockcopy_pp_6x16, 4, 6, 2
+
+%rep 8
movd m0, [r2]
mov r4w, [r2 + 4]
movd m1, [r2 + r3]
@@ -318,8 +318,7 @@
movd [r0 + r1], m1
mov [r0 + r1 + 4], r5w
lea r0, [r0 + r1 * 2]
- dec r6d
- jnz .loop
+%endrep
RET
More information about the x265-devel
mailing list