[x265] [PATCH] arm: Implement scale2D_64to32_neon ARM NEON

radhakrishnan at multicorewareinc.com radhakrishnan at multicorewareinc.com
Thu Mar 10 07:00:20 CET 2016


# HG changeset patch
# User Radhakrishnan VR <radhakrishnan at multicorewareinc.com>
# Date 1457514246 -19800
#      Wed Mar 09 14:34:06 2016 +0530
# Node ID d96f05d083a9c75a5fdf8e5ede6607bc3c091175
# Parent  7f3b515b345b8bc462b1f2c9af1409ac727336a0
arm: Implement scale2D_64to32_neon ARM NEON

diff -r 7f3b515b345b -r d96f05d083a9 source/common/arm/asm-primitives.cpp
--- a/source/common/arm/asm-primitives.cpp	Wed Mar 09 11:46:26 2016 +0530
+++ b/source/common/arm/asm-primitives.cpp	Wed Mar 09 14:34:06 2016 +0530
@@ -43,6 +43,9 @@
 {
     if (cpuMask & X265_CPU_NEON)
     {
+        //scale2D_64to32
+        p.scale2D_64to32  = PFX(scale2D_64to32_neon);
+
         // scale1D_128to64
         p.scale1D_128to64 = PFX(scale1D_128to64_neon);
 
diff -r 7f3b515b345b -r d96f05d083a9 source/common/arm/pixel-util.S
--- a/source/common/arm/pixel-util.S	Wed Mar 09 11:46:26 2016 +0530
+++ b/source/common/arm/pixel-util.S	Wed Mar 09 14:34:06 2016 +0530
@@ -646,3 +646,40 @@
 .endr
     bx              lr
 endfunc
+
+// void scale2D_64to32(pixel* dst, const pixel* src, intptr_t stride)
+function x265_scale2D_64to32_neon
+    sub             r2, #32
+    mov             r3, #16
+loop_scale2D:
+    subs            r3, #1
+.rept 2
+    vld2.8          {q8, q9}, [r1]!
+    vld2.8          {q10, q11}, [r1], r2
+    vld2.8          {q12, q13}, [r1]!
+    vld2.8          {q14, q15}, [r1], r2
+
+    vaddl.u8        q0, d16, d18
+    vaddl.u8        q1, d17, d19
+    vaddl.u8        q2, d20, d22
+    vaddl.u8        q3, d21, d23
+
+    vaddl.u8        q8, d24, d26
+    vaddl.u8        q9, d25, d27
+    vaddl.u8        q10, d28, d30
+    vaddl.u8        q11, d29, d31
+
+    vadd.u16        q0, q8
+    vadd.u16        q1, q9
+    vadd.u16        q2, q10
+    vadd.u16        q3, q11
+
+    vrshrn.u16      d16, q0, #2
+    vrshrn.u16      d17, q1, #2
+    vrshrn.u16      d18, q2, #2
+    vrshrn.u16      d19, q3, #2
+    vst1.8          {q8, q9}, [r0]!
+.endr
+    bne             loop_scale2D
+    bx              lr
+endfunc
diff -r 7f3b515b345b -r d96f05d083a9 source/common/arm/pixel-util.h
--- a/source/common/arm/pixel-util.h	Wed Mar 09 11:46:26 2016 +0530
+++ b/source/common/arm/pixel-util.h	Wed Mar 09 14:34:06 2016 +0530
@@ -36,4 +36,5 @@
 void x265_getResidual32_neon(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
 
 void x265_scale1D_128to64_neon(pixel *dst, const pixel *src);
+void x265_scale2D_64to32_neon(pixel* dst, const pixel* src, intptr_t stride);
 #endif // ifndef X265_PIXEL_UTIL_ARM_H


More information about the x265-devel mailing list