[x265] [PATCH Review Only] asm code for blockcopy_sp, 8x4 partion

praveen at multicorewareinc.com praveen at multicorewareinc.com
Tue Nov 5 15:32:18 CET 2013


# HG changeset patch
# User Praveen Tiwari
# Date 1383661923 -19800
# Node ID aac83f1770f0551b34b14f77dc43a68108f8de09
# Parent  47e2da8747e7e622d56b9f3acfecfc0676a09a2c
asm code for blockcopy_sp, 8x4 partion

diff -r 47e2da8747e7 -r aac83f1770f0 source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm	Tue Nov 05 16:17:48 2013 +0530
+++ b/source/common/x86/blockcopy8.asm	Tue Nov 05 20:02:03 2013 +0530
@@ -27,6 +27,8 @@
 
 SECTION_RODATA 32
 
+tab_Vm:    db 0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0
+
 SECTION .text
 
 ;-----------------------------------------------------------------------------
@@ -796,3 +798,32 @@
 BLOCKCOPY_PP_W64_H2 64, 32
 BLOCKCOPY_PP_W64_H2 64, 48
 BLOCKCOPY_PP_W64_H2 64, 64
+
+;-----------------------------------------------------------------------------
+; void blockcopy_sp_8x4(pixel *dest, intptr_t destStride, int16_t *src, intptr_t srcStride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal blockcopy_sp_8x4, 4, 5, 5, dest, destStride, src, srcStride
+
+add         r3,      r3
+
+mova        m0,     [tab_Vm]
+
+movu        m1,     [r2]
+movu        m2,     [r2 + r3]
+movu        m3,     [r2 + 2 * r3]
+lea         r4,     [r2 + 2 * r3]
+movu        m4,     [r4 + r3]
+
+pshufb     m1,      m0
+pshufb     m2,      m0
+pshufb     m3,      m0
+pshufb     m4,      m0
+
+movh       [r0],          m1
+movh       [r0 + r1],     m2
+movh       [r0 + 2 * r1], m3
+lea         r4,           [r0 + 2 * r1]
+movh       [r4 + r1],          m4
+
+RET


More information about the x265-devel mailing list