[x265] [PATCH] arm: Implement scale1D_128to64_neon ARM NEON
radhakrishnan at multicorewareinc.com
radhakrishnan at multicorewareinc.com
Thu Mar 10 06:59:52 CET 2016
# HG changeset patch
# User Radhakrishnan VR <radhakrishnan at multicorewareinc.com>
# Date 1457504186 -19800
# Wed Mar 09 11:46:26 2016 +0530
# Node ID 7f3b515b345b8bc462b1f2c9af1409ac727336a0
# Parent 076b1abaa434c9c396aa6b8835ed212067e9e352
arm: Implement scale1D_128to64_neon ARM NEON
diff -r 076b1abaa434 -r 7f3b515b345b source/common/arm/asm-primitives.cpp
--- a/source/common/arm/asm-primitives.cpp Tue Mar 08 11:14:22 2016 +0530
+++ b/source/common/arm/asm-primitives.cpp Wed Mar 09 11:46:26 2016 +0530
@@ -43,6 +43,9 @@
{
if (cpuMask & X265_CPU_NEON)
{
+ // scale1D_128to64
+ p.scale1D_128to64 = PFX(scale1D_128to64_neon);
+
// copy_count
p.cu[BLOCK_4x4].copy_cnt = PFX(copy_cnt_4_neon);
p.cu[BLOCK_8x8].copy_cnt = PFX(copy_cnt_8_neon);
diff -r 076b1abaa434 -r 7f3b515b345b source/common/arm/pixel-util.S
--- a/source/common/arm/pixel-util.S Tue Mar 08 11:14:22 2016 +0530
+++ b/source/common/arm/pixel-util.S Wed Mar 09 11:46:26 2016 +0530
@@ -626,3 +626,23 @@
pop {r4, r5}
bx lr
endfunc
+
+// void scale1D_128to64(pixel *dst, const pixel *src)
+function x265_scale1D_128to64_neon
+ mov r12, #32
+.rept 2
+ vld2.u8 {q8, q9}, [r1]!
+ vld2.u8 {q10, q11}, [r1]!
+ vld2.u8 {q12, q13}, [r1]!
+ vld2.u8 {q14, q15}, [r1], r12
+
+ vrhadd.u8 q0, q8, q9
+ vrhadd.u8 q1, q10, q11
+ vrhadd.u8 q2, q12, q13
+ vrhadd.u8 q3, q14, q15
+
+ vst1.u8 {q0, q1}, [r0]!
+ vst1.u8 {q2, q3}, [r0], r12
+.endr
+ bx lr
+endfunc
diff -r 076b1abaa434 -r 7f3b515b345b source/common/arm/pixel-util.h
--- a/source/common/arm/pixel-util.h Tue Mar 08 11:14:22 2016 +0530
+++ b/source/common/arm/pixel-util.h Wed Mar 09 11:46:26 2016 +0530
@@ -34,4 +34,6 @@
void x265_getResidual8_neon(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
void x265_getResidual16_neon(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
void x265_getResidual32_neon(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
+
+void x265_scale1D_128to64_neon(pixel *dst, const pixel *src);
#endif // ifndef X265_PIXEL_UTIL_ARM_H
More information about the x265-devel
mailing list