[x265] [PATCH] blockcopy_pp_4x32 asm code: eliminated branch instructions and optimize register uses
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Thu Feb 5 11:57:21 CET 2015
# HG changeset patch
# User Praveen Tiwari
# Date 1423133833 -19800
# Node ID d6c384f3625984a57335c679dbc4fcf37fd4f072
# Parent 7105ee079176c18464982fdc15ed0e0e05f5d015
blockcopy_pp_4x32 asm code: eliminated branch instructions and optimize register uses
diff -r 7105ee079176 -r d6c384f36259 source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm Thu Feb 05 16:21:10 2015 +0530
+++ b/source/common/x86/blockcopy8.asm Thu Feb 05 16:27:13 2015 +0530
@@ -217,47 +217,38 @@
RET
;-----------------------------------------------------------------------------
-; void blockcopy_pp_%1x%2(pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride)
+; void blockcopy_pp_4x32(pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride)
;-----------------------------------------------------------------------------
-%macro BLOCKCOPY_PP_W4_H8 2
INIT_XMM sse2
-cglobal blockcopy_pp_%1x%2, 4, 7, 4
- mov r4d, %2/8
- lea r5, [3 * r1]
- lea r6, [3 * r3]
-
-.loop:
+cglobal blockcopy_pp_4x32, 4, 6, 4
+
+ lea r4, [3 * r1]
+ lea r5, [3 * r3]
+
movd m0, [r2]
movd m1, [r2 + r3]
movd m2, [r2 + 2 * r3]
- movd m3, [r2 + r6]
+ movd m3, [r2 + r5]
movd [r0], m0
movd [r0 + r1], m1
movd [r0 + 2 * r1], m2
- movd [r0 + r5], m3
-
+ movd [r0 + r4], m3
+
+%rep 7
lea r2, [r2 + 4 * r3]
movd m0, [r2]
movd m1, [r2 + r3]
movd m2, [r2 + 2 * r3]
- movd m3, [r2 + r6]
+ movd m3, [r2 + r5]
lea r0, [r0 + 4 * r1]
movd [r0], m0
movd [r0 + r1], m1
movd [r0 + 2 * r1], m2
- movd [r0 + r5], m3
-
- lea r0, [r0 + 4 * r1]
- lea r2, [r2 + 4 * r3]
-
- dec r4d
- jnz .loop
+ movd [r0 + r4], m3
+%endrep
RET
-%endmacro
-
-BLOCKCOPY_PP_W4_H8 4, 32
;-----------------------------------------------------------------------------
; void blockcopy_pp_6x8(pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride)
More information about the x265-devel
mailing list