[x265] [PATCH 3 of 5] asm: modify nquant() output to reduce abs operator in rdoQuant()

Min Chen chenm003 at 163.com
Fri Sep 25 02:15:35 CEST 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1443138813 18000
# Node ID 89bb8d801d1a8dbb5461f5b4d5376d5d0b75bd90
# Parent  bcd926000cd8fecf2c2c52af49e94d4405b66280
asm: modify nquant() output to reduce abs operator in rdoQuant()
---
 source/common/dct.cpp             |    5 ++++-
 source/common/quant.cpp           |    2 +-
 source/common/x86/pixel-util8.asm |    7 +++++--
 3 files changed, 10 insertions(+), 4 deletions(-)

diff -r bcd926000cd8 -r 89bb8d801d1a source/common/dct.cpp
--- a/source/common/dct.cpp	Thu Sep 24 18:53:31 2015 -0500
+++ b/source/common/dct.cpp	Thu Sep 24 18:53:33 2015 -0500
@@ -703,7 +703,10 @@
         if (level)
             ++numSig;
         level *= sign;
-        qCoef[blockpos] = (int16_t)x265_clip3(-32768, 32767, level);
+
+        // TODO: when we limit range to [-32767, 32767], we can get more performance with output change
+        //       But nquant is a little percent in rdoQuant, so I keep old dynamic range for compatible
+        qCoef[blockpos] = (int16_t)abs(x265_clip3(-32768, 32767, level));
     }
 
     return numSig;
diff -r bcd926000cd8 -r 89bb8d801d1a source/common/quant.cpp
--- a/source/common/quant.cpp	Thu Sep 24 18:53:31 2015 -0500
+++ b/source/common/quant.cpp	Thu Sep 24 18:53:33 2015 -0500
@@ -841,7 +841,7 @@
         {
             scanPos              = (cgScanPos << MLS_CG_SIZE) + scanPosinCG;
             uint32_t blkPos      = codeParams.scan[scanPos];
-            uint32_t maxAbsLevel = abs(dstCoeff[blkPos]);             /* abs(quantized coeff) */
+            uint32_t maxAbsLevel = dstCoeff[blkPos];                  /* abs(quantized coeff) */
             int signCoef         = m_resiDctCoeff[blkPos];            /* pre-quantization DCT coeff */
             int predictedCoef    = m_fencDctCoeff[blkPos] - signCoef; /* predicted DCT = source DCT - residual DCT*/
 
diff -r bcd926000cd8 -r 89bb8d801d1a source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm	Thu Sep 24 18:53:31 2015 -0500
+++ b/source/common/x86/pixel-util8.asm	Thu Sep 24 18:53:33 2015 -0500
@@ -792,6 +792,7 @@
     pshufd      m6, m6, 0       ; m6 = add
     mov         r3d, r4d        ; r3 = numCoeff
     shr         r4d, 3
+    pxor        m4, m4
 
 .loop:
     pmovsxwd    m0, [r0]        ; m0 = level
@@ -810,13 +811,13 @@
     psignd      m3, m1
 
     packssdw    m2, m3
+    pabsw       m2, m2
 
     movu        [r2], m2
     add         r0, 16
     add         r1, 32
     add         r2, 16
 
-    pxor        m4, m4
     pcmpeqw     m2, m4
     psubw       m7, m2
 
@@ -862,9 +863,11 @@
     psignd      m2, m0
 
     packssdw    m1, m2
+    pabsw       m1, m1
+
     vpermq      m2, m1, q3120
-
     movu        [r2], m2
+
     add         r0, mmsize
     add         r1, mmsize * 2
     add         r2, mmsize



More information about the x265-devel mailing list