[x265] [PATCH] arm: Implement scale1D_128to64_neon ARM NEON

radhakrishnan at multicorewareinc.com radhakrishnan at multicorewareinc.com
Thu Mar 10 06:59:52 CET 2016


# HG changeset patch
# User Radhakrishnan VR <radhakrishnan at multicorewareinc.com>
# Date 1457504186 -19800
#      Wed Mar 09 11:46:26 2016 +0530
# Node ID 7f3b515b345b8bc462b1f2c9af1409ac727336a0
# Parent  076b1abaa434c9c396aa6b8835ed212067e9e352
arm: Implement scale1D_128to64_neon ARM NEON

diff -r 076b1abaa434 -r 7f3b515b345b source/common/arm/asm-primitives.cpp
--- a/source/common/arm/asm-primitives.cpp	Tue Mar 08 11:14:22 2016 +0530
+++ b/source/common/arm/asm-primitives.cpp	Wed Mar 09 11:46:26 2016 +0530
@@ -43,6 +43,9 @@
 {
     if (cpuMask & X265_CPU_NEON)
     {
+        // scale1D_128to64
+        p.scale1D_128to64 = PFX(scale1D_128to64_neon);
+
         // copy_count
         p.cu[BLOCK_4x4].copy_cnt     = PFX(copy_cnt_4_neon);
         p.cu[BLOCK_8x8].copy_cnt     = PFX(copy_cnt_8_neon);
diff -r 076b1abaa434 -r 7f3b515b345b source/common/arm/pixel-util.S
--- a/source/common/arm/pixel-util.S	Tue Mar 08 11:14:22 2016 +0530
+++ b/source/common/arm/pixel-util.S	Wed Mar 09 11:46:26 2016 +0530
@@ -626,3 +626,23 @@
     pop             {r4, r5}
     bx              lr
 endfunc
+
+// void scale1D_128to64(pixel *dst, const pixel *src)
+function x265_scale1D_128to64_neon 
+    mov             r12, #32
+.rept 2
+    vld2.u8         {q8, q9}, [r1]!
+    vld2.u8         {q10, q11}, [r1]!
+    vld2.u8         {q12, q13}, [r1]!
+    vld2.u8         {q14, q15}, [r1], r12
+
+    vrhadd.u8       q0, q8, q9
+    vrhadd.u8       q1, q10, q11
+    vrhadd.u8       q2, q12, q13
+    vrhadd.u8       q3, q14, q15
+
+    vst1.u8         {q0, q1}, [r0]!
+    vst1.u8         {q2, q3}, [r0], r12
+.endr
+    bx              lr
+endfunc
diff -r 076b1abaa434 -r 7f3b515b345b source/common/arm/pixel-util.h
--- a/source/common/arm/pixel-util.h	Tue Mar 08 11:14:22 2016 +0530
+++ b/source/common/arm/pixel-util.h	Wed Mar 09 11:46:26 2016 +0530
@@ -34,4 +34,6 @@
 void x265_getResidual8_neon(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
 void x265_getResidual16_neon(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
 void x265_getResidual32_neon(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
+
+void x265_scale1D_128to64_neon(pixel *dst, const pixel *src);
 #endif // ifndef X265_PIXEL_UTIL_ARM_H


More information about the x265-devel mailing list