[x265] [PATCH] dequant_normal optimization, downscaling quantCoef from int32_t* to int16_t*

Tue Sep 2 16:08:33 CEST 2014

# HG changeset patch
# User Praveen Tiwari
# Date 1409637112 -19800
# Node ID 32abebf1dd44d8328a32e7441382e459733233b7
# Parent  380a796052afc62cac7e480fde70e3766a940246
dequant_normal optimization, downscaling quantCoef from int32_t* to int16_t*

diff -r 380a796052af -r 32abebf1dd44 source/common/dct.cpp

--- a/source/common/dct.cpp	Mon Aug 25 12:49:37 2014 +0530
+++ b/source/common/dct.cpp	Tue Sep 02 11:21:52 2014 +0530
@@ -718,7 +718,7 @@
     }
 }
 
-void dequant_normal_c(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift)
+void dequant_normal_c(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift)
 {
 #if HIGH_BIT_DEPTH
     X265_CHECK(scale < 32768 || ((scale & 3) == 0 && shift > 2), "dequant invalid scale %d\n", scale);
@@ -732,14 +732,11 @@
 
     int add, coeffQ;
 
-    int clipQCoef;
-
     add = 1 << (shift - 1);
 
     for (int n = 0; n < num; n++)
     {
-        clipQCoef = Clip3(-32768, 32767, quantCoef[n]);
-        coeffQ = (clipQCoef * scale + add) >> shift;
+        coeffQ = (quantCoef[n] * scale + add) >> shift;
         coef[n] = Clip3(-32768, 32767, coeffQ);
     }
 }
diff -r 380a796052af -r 32abebf1dd44 source/common/primitives.h
--- a/source/common/primitives.h	Mon Aug 25 12:49:37 2014 +0530
+++ b/source/common/primitives.h	Tue Sep 02 11:21:52 2014 +0530
@@ -165,7 +165,7 @@
 typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
 typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
 typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
-typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
+typedef void (*dequant_normal_t)(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
 typedef int  (*count_nonzero_t)(const int16_t *quantCoeff, int numCoeff);
 
 typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
diff -r 380a796052af -r 32abebf1dd44 source/common/quant.cpp
--- a/source/common/quant.cpp	Mon Aug 25 12:49:37 2014 +0530
+++ b/source/common/quant.cpp	Tue Sep 02 11:21:52 2014 +0530
@@ -410,6 +410,15 @@
     int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift;
     int numCoeff = 1 << log2TrSize * 2;
 
+    /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is
+     * optimize to take coefficients as int16_t*, it will be cleanse.*/
+    assert(numCoeff <= 1024);
+    ALIGN_VAR_16(int16_t, qCoeff[32 * 32]);
+    for (int i = 0; i < numCoeff; i++)
+    {
+        qCoeff[i] = (int16_t)Clip3(-32768, 32767, coeff[i]);
+    }
+
     if (m_scalingList->m_bEnabled)
     {
         int scalingListType = (bIntra ? 0 : 3) + ttype;
@@ -419,7 +428,7 @@
     else
     {
         int scale = m_scalingList->s_invQuantScales[rem] << per;
-        primitives.dequant_normal(coeff, m_resiDctCoeff, numCoeff, scale, shift);
+        primitives.dequant_normal(qCoeff, m_resiDctCoeff, numCoeff, scale, shift);
     }
 
     if (useTransformSkip)
diff -r 380a796052af -r 32abebf1dd44 source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h	Mon Aug 25 12:49:37 2014 +0530
+++ b/source/common/x86/pixel-util.h	Tue Sep 02 11:21:52 2014 +0530
@@ -46,7 +46,7 @@
 
 uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
 uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
-void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
+void x265_dequant_normal_sse4(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
 int x265_count_nonzero_ssse3(const int16_t *quantCoeff, int numCoeff);
 
 void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
diff -r 380a796052af -r 32abebf1dd44 source/test/mbdstharness.cpp
--- a/source/test/mbdstharness.cpp	Mon Aug 25 12:49:37 2014 +0530
+++ b/source/test/mbdstharness.cpp	Tue Sep 02 11:21:52 2014 +0530
@@ -154,10 +154,10 @@
         int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize;
         int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift;
 
-        ref(int_test_buff[index] + j, mintbuf1, width * height, scale, shift);
-        checked(opt, int_test_buff[index] + j, mintbuf2, width * height, scale, shift);
+        ref(short_test_buff[index] + j, mintbuf3, width * height, scale, shift);
+        checked(opt, short_test_buff[index] + j, mintbuf4, width * height, scale, shift);
 
-        if (memcmp(mintbuf1, mintbuf2, sizeof(int) * height * width))
+        if (memcmp(mintbuf3, mintbuf4, sizeof(int) * height * width))
             return false;
 
         reportfail();
@@ -410,7 +410,7 @@
     if (opt.dequant_normal)
     {
         printf("dequant_normal\t");
-        REPORT_SPEEDUP(opt.dequant_normal, ref.dequant_normal, int_test_buff[0], mintbuf3, 32 * 32, 70, 1);
+        REPORT_SPEEDUP(opt.dequant_normal, ref.dequant_normal, short_test_buff[0], mintbuf3, 32 * 32, 70, 1);
     }
 
     if (opt.dequant_scaling)