[x265] [PATCH] arm: Implement dequant_normal ARM NEON

Wed Apr 20 13:14:39 CEST 2016

# HG changeset patch
# User Radhakrishnan VR <radhakrishnan at multicorewareinc.com>
# Date 1460718872 -19800
#      Fri Apr 15 16:44:32 2016 +0530
# Node ID 534b8e2845b8156010b3c79bfa88c81c7b0b9295
# Parent  c1bee15b165dd29e524501ba969973f24ea29007
arm: Implement dequant_normal ARM NEON

diff -r c1bee15b165d -r 534b8e2845b8 source/common/arm/asm-primitives.cpp

--- a/source/common/arm/asm-primitives.cpp	Fri Apr 15 11:39:39 2016 +0530
+++ b/source/common/arm/asm-primitives.cpp	Fri Apr 15 16:44:32 2016 +0530
@@ -45,6 +45,8 @@
     {
         // dequant_scaling
          p.dequant_scaling = PFX(dequant_scaling_neon);
+         p.dequant_normal  = PFX(dequant_normal_neon);
+
         // luma satd
          p.pu[LUMA_4x4].satd   = PFX(pixel_satd_4x4_neon);
          p.pu[LUMA_4x8].satd   = PFX(pixel_satd_4x8_neon);
diff -r c1bee15b165d -r 534b8e2845b8 source/common/arm/pixel-util.S
--- a/source/common/arm/pixel-util.S	Fri Apr 15 11:39:39 2016 +0530
+++ b/source/common/arm/pixel-util.S	Fri Apr 15 16:44:32 2016 +0530
@@ -2027,3 +2027,46 @@
     pop             {r4, r5, r6, r7}
     bx              lr
 endfunc
+
+// void dequant_normal_c(const int16_t* quantCoef, int16_t* coef, int num, int scale, int shift)
+function x265_dequant_normal_neon
+    push            {r4, r5, r6}
+    ldr             r4, [sp, #12]       // shift
+#if HIGH_BIT_DEPTH
+    cmp             r3, #32767
+    jle             .skip
+    shr             r3, (BIT_DEPTH - 8)
+    sub             r4, (BIT_DEPTH - 8)
+.skip:
+#endif
+    mov             r12, #1
+    sub             r5, r4, #1
+    lsr             r2, #3              // num / 8
+    lsl             r5, r12, r5         // 1 << shift - 1
+
+    neg             r6, r4
+    vdup.32         q0, r3
+    vdup.32         q1, r6
+    vdup.32         q2, r5
+
+dqn_loop1:
+    vld1.16         {q3}, [r0]!
+    vmovl.s16       q8, d6
+    vmovl.s16       q9, d7
+
+    vmul.s32        q8, q0
+    vmul.s32        q9, q0
+    vadd.s32        q8, q2
+    vadd.s32        q9, q2
+
+    vshl.s32        q8, q1
+    vshl.s32        q9, q1
+    vqmovn.s32      d16, q8
+    vqmovn.s32      d17, q9
+
+    subs            r2, #1
+    vst1.16         {q8}, [r1]!
+    bne             dqn_loop1
+    pop             {r4, r5, r6}
+    bx              lr
+endfunc
diff -r c1bee15b165d -r 534b8e2845b8 source/common/arm/pixel-util.h
--- a/source/common/arm/pixel-util.h	Fri Apr 15 11:39:39 2016 +0530
+++ b/source/common/arm/pixel-util.h	Fri Apr 15 16:44:32 2016 +0530
@@ -80,4 +80,5 @@
 int x265_pixel_sa8d_64x64_neon(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2);
 
 void x265_dequant_scaling_neon(const int16_t* quantCoef, const int32_t* deQuantCoef, int16_t* coef, int num, int per, int shift);
+void x265_dequant_normal_neon(const int16_t* quantCoef, int16_t* coef, int num, int scale, int shift);
 #endif // ifndef X265_PIXEL_UTIL_ARM_H