[x265] [PATCH] arm: Implement scale2D_64to32_neon ARM NEON
radhakrishnan at multicorewareinc.com
radhakrishnan at multicorewareinc.com
Thu Mar 10 07:00:20 CET 2016
# HG changeset patch
# User Radhakrishnan VR <radhakrishnan at multicorewareinc.com>
# Date 1457514246 -19800
# Wed Mar 09 14:34:06 2016 +0530
# Node ID d96f05d083a9c75a5fdf8e5ede6607bc3c091175
# Parent 7f3b515b345b8bc462b1f2c9af1409ac727336a0
arm: Implement scale2D_64to32_neon ARM NEON
diff -r 7f3b515b345b -r d96f05d083a9 source/common/arm/asm-primitives.cpp
--- a/source/common/arm/asm-primitives.cpp Wed Mar 09 11:46:26 2016 +0530
+++ b/source/common/arm/asm-primitives.cpp Wed Mar 09 14:34:06 2016 +0530
@@ -43,6 +43,9 @@
{
if (cpuMask & X265_CPU_NEON)
{
+ //scale2D_64to32
+ p.scale2D_64to32 = PFX(scale2D_64to32_neon);
+
// scale1D_128to64
p.scale1D_128to64 = PFX(scale1D_128to64_neon);
diff -r 7f3b515b345b -r d96f05d083a9 source/common/arm/pixel-util.S
--- a/source/common/arm/pixel-util.S Wed Mar 09 11:46:26 2016 +0530
+++ b/source/common/arm/pixel-util.S Wed Mar 09 14:34:06 2016 +0530
@@ -646,3 +646,40 @@
.endr
bx lr
endfunc
+
+// void scale2D_64to32(pixel* dst, const pixel* src, intptr_t stride)
+function x265_scale2D_64to32_neon
+ sub r2, #32
+ mov r3, #16
+loop_scale2D:
+ subs r3, #1
+.rept 2
+ vld2.8 {q8, q9}, [r1]!
+ vld2.8 {q10, q11}, [r1], r2
+ vld2.8 {q12, q13}, [r1]!
+ vld2.8 {q14, q15}, [r1], r2
+
+ vaddl.u8 q0, d16, d18
+ vaddl.u8 q1, d17, d19
+ vaddl.u8 q2, d20, d22
+ vaddl.u8 q3, d21, d23
+
+ vaddl.u8 q8, d24, d26
+ vaddl.u8 q9, d25, d27
+ vaddl.u8 q10, d28, d30
+ vaddl.u8 q11, d29, d31
+
+ vadd.u16 q0, q8
+ vadd.u16 q1, q9
+ vadd.u16 q2, q10
+ vadd.u16 q3, q11
+
+ vrshrn.u16 d16, q0, #2
+ vrshrn.u16 d17, q1, #2
+ vrshrn.u16 d18, q2, #2
+ vrshrn.u16 d19, q3, #2
+ vst1.8 {q8, q9}, [r0]!
+.endr
+ bne loop_scale2D
+ bx lr
+endfunc
diff -r 7f3b515b345b -r d96f05d083a9 source/common/arm/pixel-util.h
--- a/source/common/arm/pixel-util.h Wed Mar 09 11:46:26 2016 +0530
+++ b/source/common/arm/pixel-util.h Wed Mar 09 14:34:06 2016 +0530
@@ -36,4 +36,5 @@
void x265_getResidual32_neon(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
void x265_scale1D_128to64_neon(pixel *dst, const pixel *src);
+void x265_scale2D_64to32_neon(pixel* dst, const pixel* src, intptr_t stride);
#endif // ifndef X265_PIXEL_UTIL_ARM_H
More information about the x265-devel
mailing list