[x265] [PATCH] arm: Implement nquant
ramya at multicorewareinc.com
ramya at multicorewareinc.com
Fri Apr 22 12:51:43 CEST 2016
# HG changeset patch
# User Ramya Sriraman<ramya at multicorewareinc.com>
# Date 1461321845 -19800
# Fri Apr 22 16:14:05 2016 +0530
# Node ID 750a20cdf7dcb381f9008f51473fbd74d45b7f5e
# Parent e21b86fb24567ad92be78eaadd4278fd34a7d161
arm: Implement nquant
diff -r e21b86fb2456 -r 750a20cdf7dc source/common/arm/asm-primitives.cpp
--- a/source/common/arm/asm-primitives.cpp Wed Apr 20 18:44:13 2016 +0530
+++ b/source/common/arm/asm-primitives.cpp Fri Apr 22 16:14:05 2016 +0530
@@ -822,6 +822,7 @@
// quant
p.quant = PFX(quant_neon);
+ p.nquant = PFX(nquant_neon);
}
if (cpuMask & X265_CPU_ARMV6)
{
diff -r e21b86fb2456 -r 750a20cdf7dc source/common/arm/pixel-util.S
--- a/source/common/arm/pixel-util.S Wed Apr 20 18:44:13 2016 +0530
+++ b/source/common/arm/pixel-util.S Fri Apr 22 16:14:05 2016 +0530
@@ -2022,3 +2022,54 @@
bx lr
endfunc
+function x265_nquant_neon
+ push {r4-r7}
+ vdup.s32 q8, r3 // qbits
+ vneg.s32 q8, q8 // -qbits
+ ldr r4, [sp, #4* 4] // add
+ vdup.s32 q9, r4
+ ldr r5, [sp, #4* 4 + 4] // numcoeff
+ mov r4, #1
+ vdup.s32 q10, r4
+
+ lsr r5, r5 ,#2
+ eor r6, r6
+
+.loop_nquant:
+
+ vld1.s16 d0, [r0]!
+ vmovl.s16 q1, d0 // coef[blockpos]
+
+ vclt.s32 q4, q1, #0
+ vorr.s32 q4, q4, q10 // q4= sign
+
+
+ vabs.s32 q1, q1 // q1=level=abs(coef[blockpos])
+ vld1.s32 {q0}, [r1]! // quantCoeff[blockpos]
+ vmul.s32 q0, q0, q1 // q0=tmplevel = abs(level) * quantCoeff[blockpos];
+
+ vadd.s32 q1, q0, q9 // q1= tmplevel+add
+ vshl.s32 q1, q1, q8 // q1= level =(tmplevel+add) >> qbits
+
+ // numsig
+ vclz.s32 q2, q1
+ vshr.u32 q2, #5
+ vadd.u32 d4, d5
+ vpadd.u32 d4, d4
+ vmov.32 r12, d4[0]
+ mov r7, #4
+ sub r7, r7, r12
+ add r6, r7
+
+ vmul.s32 q2, q1, q4
+ vqmovn.s32 d0, q2
+ vabs.s16 d1, d0
+ vst1.s16 d1, [r2]!
+
+ subs r5, #1
+ bne .loop_nquant
+
+ mov r0, r6
+ pop {r4-r7}
+ bx lr
+endfunc
diff -r e21b86fb2456 -r 750a20cdf7dc source/common/arm/pixel-util.h
--- a/source/common/arm/pixel-util.h Wed Apr 20 18:44:13 2016 +0530
+++ b/source/common/arm/pixel-util.h Fri Apr 22 16:14:05 2016 +0530
@@ -80,4 +80,5 @@
int x265_pixel_sa8d_64x64_neon(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2);
uint32_t x265_quant_neon(const int16_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff);
+uint32_t x265_nquant_neon(const int16_t* coef, const int32_t* quantCoeff, int16_t* qCoef, int qBits, int add, int numCoeff);
#endif // ifndef X265_PIXEL_UTIL_ARM_H
More information about the x265-devel
mailing list