[x265] [PATCH Review Only] asm code for blockcopy_sp, 8x4 partion
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Tue Nov 5 15:32:18 CET 2013
# HG changeset patch
# User Praveen Tiwari
# Date 1383661923 -19800
# Node ID aac83f1770f0551b34b14f77dc43a68108f8de09
# Parent 47e2da8747e7e622d56b9f3acfecfc0676a09a2c
asm code for blockcopy_sp, 8x4 partion
diff -r 47e2da8747e7 -r aac83f1770f0 source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm Tue Nov 05 16:17:48 2013 +0530
+++ b/source/common/x86/blockcopy8.asm Tue Nov 05 20:02:03 2013 +0530
@@ -27,6 +27,8 @@
SECTION_RODATA 32
+tab_Vm: db 0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0
+
SECTION .text
;-----------------------------------------------------------------------------
@@ -796,3 +798,32 @@
BLOCKCOPY_PP_W64_H2 64, 32
BLOCKCOPY_PP_W64_H2 64, 48
BLOCKCOPY_PP_W64_H2 64, 64
+
+;-----------------------------------------------------------------------------
+; void blockcopy_sp_8x4(pixel *dest, intptr_t destStride, int16_t *src, intptr_t srcStride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal blockcopy_sp_8x4, 4, 5, 5, dest, destStride, src, srcStride
+
+add r3, r3
+
+mova m0, [tab_Vm]
+
+movu m1, [r2]
+movu m2, [r2 + r3]
+movu m3, [r2 + 2 * r3]
+lea r4, [r2 + 2 * r3]
+movu m4, [r4 + r3]
+
+pshufb m1, m0
+pshufb m2, m0
+pshufb m3, m0
+pshufb m4, m0
+
+movh [r0], m1
+movh [r0 + r1], m2
+movh [r0 + 2 * r1], m3
+lea r4, [r0 + 2 * r1]
+movh [r4 + r1], m4
+
+RET
More information about the x265-devel
mailing list