[x265] [PATCH] split dequant to normal and scaling path

Min Chen chenm003 at 163.com
Fri Nov 22 12:40:25 CET 2013


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1385120067 -28800
# Node ID d6ebc2bf8497b0c24750f7b566e923ce7cfc7893
# Parent  ca8b260da44fff16e29a65653618ffc94123c8cc
split dequant to normal and scaling path

diff -r ca8b260da44f -r d6ebc2bf8497 source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp	Fri Nov 22 19:34:10 2013 +0800
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp	Fri Nov 22 19:34:27 2013 +0800
@@ -409,8 +409,21 @@
     int rem = m_qpParam.m_rem;
     bool useScalingList = getUseScalingList();
     uint32_t log2TrSize = g_convertToBit[width] + 2;
+    int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize;
+    int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift;
     int32_t *dequantCoef = getDequantCoeff(scalingListType, m_qpParam.m_rem, log2TrSize - 2);
-    primitives.dequant(coeff, m_tmpCoeff, width, height, per, rem, useScalingList, log2TrSize, dequantCoef);
+
+    if (!useScalingList)
+    {
+        static const int invQuantScales[6] = { 40, 45, 51, 57, 64, 72 };
+        int scale = invQuantScales[rem] << per;
+        primitives.dequant_normal(coeff, m_tmpCoeff, width * height, scale, shift);
+    }
+    else
+    {
+        // CHECK_ME: the code is not verify since this is DEAD path
+        primitives.dequant_scaling(coeff, dequantCoef, m_tmpCoeff, width * height, per, shift);
+    }
 
     if (useTransformSkip == true)
     {
diff -r ca8b260da44f -r d6ebc2bf8497 source/common/dct.cpp
--- a/source/common/dct.cpp	Fri Nov 22 19:34:10 2013 +0800
+++ b/source/common/dct.cpp	Fri Nov 22 19:34:27 2013 +0800
@@ -718,57 +718,52 @@
     }
 }
 
-void dequant_c(const int32_t* quantCoef, int32_t* coef, int width, int height, int per, int rem, bool useScalingList, unsigned int log2TrSize, int32_t *dequantCoef)
+void dequant_normal_c(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift)
 {
-    int invQuantScales[6] = { 40, 45, 51, 57, 64, 72 };
-
-    if (width > 32)
-    {
-        width  = 32;
-        height = 32;
-    }
+    static const int invQuantScales[6] = { 40, 45, 51, 57, 64, 72 };
+    assert(num <= 32 * 32);
 
     int add, coeffQ;
-    int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize;
-    int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift;
 
     int clipQCoef;
 
-    if (useScalingList)
+    add = 1 << (shift - 1);
+
+    for (int n = 0; n < num; n++)
     {
-        shift += 4;
+        clipQCoef = Clip3(-32768, 32767, quantCoef[n]);
+        coeffQ = (clipQCoef * scale + add) >> shift;
+        coef[n] = Clip3(-32768, 32767, coeffQ);
+    }
+}
 
-        if (shift > per)
+void dequant_scaling_c(const int32_t* quantCoef, const int32_t *deQuantCoef, int32_t* coef, int num, int per, int shift)
+{
+    assert(num <= 32 * 32);
+
+    int add, coeffQ;
+    int clipQCoef;
+
+    shift += 4;
+
+    if (shift > per)
+    {
+        add = 1 << (shift - per - 1);
+
+        for (int n = 0; n < num; n++)
         {
-            add = 1 << (shift - per - 1);
-
-            for (int n = 0; n < width * height; n++)
-            {
-                clipQCoef = Clip3(-32768, 32767, quantCoef[n]);
-                coeffQ = ((clipQCoef * dequantCoef[n]) + add) >> (shift - per);
-                coef[n] = Clip3(-32768, 32767, coeffQ);
-            }
-        }
-        else
-        {
-            for (int n = 0; n < width * height; n++)
-            {
-                clipQCoef = Clip3(-32768, 32767, quantCoef[n]);
-                coeffQ   = Clip3(-32768, 32767, clipQCoef * dequantCoef[n]);
-                coef[n] = Clip3(-32768, 32767, coeffQ << (per - shift));
-            }
+            clipQCoef = Clip3(-32768, 32767, quantCoef[n]);
+            coeffQ = ((clipQCoef * deQuantCoef[n]) + add) >> (shift - per);
+            coef[n] = Clip3(-32768, 32767, coeffQ);
         }
     }
     else
     {
-        add = 1 << (shift - 1);
-        int scale = invQuantScales[rem] << per;
-
-        for (int n = 0; n < width * height; n++)
+        for (int n = 0; n < num; n++)
         {
             clipQCoef = Clip3(-32768, 32767, quantCoef[n]);
-            coeffQ = (clipQCoef * scale + add) >> shift;
-            coef[n] = Clip3(-32768, 32767, coeffQ);
+            coeffQ   = Clip3(-32768, 32767, clipQCoef * deQuantCoef[n]);
+            coef[n] = Clip3(-32768, 32767, coeffQ << (per - shift));
         }
     }
 }
@@ -804,7 +799,8 @@
 
 void Setup_C_DCTPrimitives(EncoderPrimitives& p)
 {
-    p.dequant = dequant_c;
+    p.dequant_scaling = dequant_scaling_c;
+    p.dequant_normal = dequant_normal_c;
     p.quant = quant_c;
     p.dct[DST_4x4] = dst4_c;
     p.dct[DCT_4x4] = dct4_c;
diff -r ca8b260da44f -r d6ebc2bf8497 source/common/primitives.h
--- a/source/common/primitives.h	Fri Nov 22 19:34:10 2013 +0800
+++ b/source/common/primitives.h	Fri Nov 22 19:34:27 2013 +0800
@@ -178,8 +178,8 @@
 typedef void (*calcrecon_t)(pixel* pred, int16_t* residual, pixel* recon, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
 typedef void (*transpose_t)(pixel* dst, pixel* src, intptr_t stride);
 typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* lastPos);
-typedef void (*dequant_t)(const int32_t* src, int32_t* dst, int width, int height, int mcqp_miper, int mcqp_mirem, bool useScalingList,
-                          unsigned int trSizeLog2, int32_t *dequantCoef);
+typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
+typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
 
 typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
 typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
@@ -261,7 +261,8 @@
     dct_t           dct[NUM_DCTS];
     idct_t          idct[NUM_IDCTS];
     quant_t         quant;
-    dequant_t       dequant;
+    dequant_scaling_t dequant_scaling;
+    dequant_normal_t dequant_normal;
 
     calcresidual_t  calcresidual[NUM_SQUARE_BLOCKS];
     calcrecon_t     calcrecon[NUM_SQUARE_BLOCKS];
diff -r ca8b260da44f -r d6ebc2bf8497 source/common/vec/dct-sse41.cpp
--- a/source/common/vec/dct-sse41.cpp	Fri Nov 22 19:34:10 2013 +0800
+++ b/source/common/vec/dct-sse41.cpp	Fri Nov 22 19:34:27 2013 +0800
@@ -40,114 +40,103 @@
 using namespace x265;
 
 namespace {
-void dequant(const int32_t* quantCoef, int32_t* coef, int width, int height, int per, int rem, bool useScalingList, unsigned int log2TrSize, int32_t *deQuantCoef)
+// TODO: normal and 8bpp dequant have only 16-bits dynamic rang, we can reduce 32-bits multiplication later
+void dequant_normal(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift)
 {
-    int invQuantScales[6] = { 40, 45, 51, 57, 64, 72 };
+    int valueToAdd = 1 << (shift - 1);
+    __m128i vScale = _mm_set1_epi32(scale);
+    __m128i vAdd = _mm_set1_epi32(valueToAdd);
 
-    if (width > 32)
+    for (int n = 0; n < num; n = n + 8)
     {
-        width  = 32;
-        height = 32;
+        __m128i quantCoef1, quantCoef2, quantCoef12, sign;
+
+        quantCoef1 = _mm_loadu_si128((__m128i*)(quantCoef + n));
+        quantCoef2 = _mm_loadu_si128((__m128i*)(quantCoef + n + 4));
+
+        quantCoef12 = _mm_packs_epi32(quantCoef1, quantCoef2);
+        sign = _mm_srai_epi16(quantCoef12, 15);
+        quantCoef1 = _mm_unpacklo_epi16(quantCoef12, sign);
+        quantCoef2 = _mm_unpackhi_epi16(quantCoef12, sign);
+
+        quantCoef1 = _mm_sra_epi32(_mm_add_epi32(_mm_mullo_epi32(quantCoef1, vScale), vAdd), _mm_cvtsi32_si128(shift));
+        quantCoef2 = _mm_sra_epi32(_mm_add_epi32(_mm_mullo_epi32(quantCoef2, vScale), vAdd), _mm_cvtsi32_si128(shift));
+
+        quantCoef12 = _mm_packs_epi32(quantCoef1, quantCoef2);
+        sign = _mm_srai_epi16(quantCoef12, 15);
+        quantCoef1 = _mm_unpacklo_epi16(quantCoef12, sign);
+        _mm_storeu_si128((__m128i*)(coef + n), quantCoef1);
+        quantCoef2 = _mm_unpackhi_epi16(quantCoef12, sign);
+        _mm_storeu_si128((__m128i*)(coef + n + 4), quantCoef2);
     }
+}
+
+void dequant_scaling(const int32_t* quantCoef, const int32_t *deQuantCoef, int32_t* coef, int num, int per, int shift)
+{
+    assert(num <= 32 * 32);
 
     int valueToAdd;
-    int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize;
-    int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift;
 
-    if (useScalingList)
+    shift += 4;
+
+    if (shift > per)
     {
-        shift += 4;
+        valueToAdd = 1 << (shift - per - 1);
+        __m128i IAdd = _mm_set1_epi32(valueToAdd);
 
-        if (shift > per)
+        for (int n = 0; n < num; n = n + 8)
         {
-            valueToAdd = 1 << (shift - per - 1);
-            __m128i IAdd = _mm_set1_epi32(valueToAdd);
-
-            for (int n = 0; n < width * height; n = n + 8)
-            {
-                __m128i quantCoef1, quantCoef2, deQuantCoef1, deQuantCoef2, quantCoef12, sign;
-
-                quantCoef1 = _mm_loadu_si128((__m128i*)(quantCoef + n));
-                quantCoef2 = _mm_loadu_si128((__m128i*)(quantCoef + n + 4));
-
-                deQuantCoef1 = _mm_loadu_si128((__m128i*)(deQuantCoef + n));
-                deQuantCoef2 = _mm_loadu_si128((__m128i*)(deQuantCoef + n + 4));
-
-                quantCoef12 = _mm_packs_epi32(quantCoef1, quantCoef2);
-                sign = _mm_srai_epi16(quantCoef12, 15);
-                quantCoef1 = _mm_unpacklo_epi16(quantCoef12, sign);
-                quantCoef2 = _mm_unpackhi_epi16(quantCoef12, sign);
-
-                quantCoef1 = _mm_sra_epi32(_mm_add_epi32(_mm_mullo_epi32(quantCoef1, deQuantCoef1), IAdd), _mm_cvtsi32_si128(shift - per));
-                quantCoef2 = _mm_sra_epi32(_mm_add_epi32(_mm_mullo_epi32(quantCoef2, deQuantCoef2), IAdd), _mm_cvtsi32_si128(shift - per));
-
-                quantCoef12 = _mm_packs_epi32(quantCoef1, quantCoef2);
-                sign = _mm_srai_epi16(quantCoef12, 15);
-                quantCoef1 = _mm_unpacklo_epi16(quantCoef12, sign);
-                _mm_storeu_si128((__m128i*)(coef + n), quantCoef1);
-                quantCoef2 = _mm_unpackhi_epi16(quantCoef12, sign);
-                _mm_storeu_si128((__m128i*)(coef + n + 4), quantCoef2);
-            }
-        }
-        else
-        {
-            for (int n = 0; n < width * height; n = n + 8)
-            {
-                __m128i quantCoef1, quantCoef2, deQuantCoef1, deQuantCoef2, quantCoef12, sign;
-
-                quantCoef1 = _mm_loadu_si128((__m128i*)(quantCoef + n));
-                quantCoef2 = _mm_loadu_si128((__m128i*)(quantCoef + n + 4));
-
-                deQuantCoef1 = _mm_loadu_si128((__m128i*)(deQuantCoef + n));
-                deQuantCoef2 = _mm_loadu_si128((__m128i*)(deQuantCoef + n + 4));
-
-                quantCoef12 = _mm_packs_epi32(quantCoef1, quantCoef2);
-                sign = _mm_srai_epi16(quantCoef12, 15);
-                quantCoef1 = _mm_unpacklo_epi16(quantCoef12, sign);
-                quantCoef2 = _mm_unpackhi_epi16(quantCoef12, sign);
-
-                quantCoef1 = _mm_mullo_epi32(quantCoef1, deQuantCoef1);
-                quantCoef2 = _mm_mullo_epi32(quantCoef2, deQuantCoef2);
-
-                quantCoef12 = _mm_packs_epi32(quantCoef1, quantCoef2);
-                sign = _mm_srai_epi16(quantCoef12, 15);
-                quantCoef1 = _mm_unpacklo_epi16(quantCoef12, sign);
-                quantCoef2 = _mm_unpackhi_epi16(quantCoef12, sign);
-
-                quantCoef1 = _mm_sll_epi32(quantCoef1, _mm_cvtsi32_si128(per - shift));
-                quantCoef2 = _mm_sll_epi32(quantCoef2, _mm_cvtsi32_si128(per - shift));
-
-                quantCoef12 = _mm_packs_epi32(quantCoef1, quantCoef2);
-                sign = _mm_srai_epi16(quantCoef12, 15);
-                quantCoef1 = _mm_unpacklo_epi16(quantCoef12, sign);
-                _mm_storeu_si128((__m128i*)(coef + n), quantCoef1);
-                quantCoef2 = _mm_unpackhi_epi16(quantCoef12, sign);
-                _mm_storeu_si128((__m128i*)(coef + n + 4), quantCoef2);
-            }
-        }
-    }
-    else
-    {
-        valueToAdd = 1 << (shift - 1);
-        int scale = invQuantScales[rem] << per;
-
-        __m128i vScale = _mm_set1_epi32(scale);
-        __m128i vAdd = _mm_set1_epi32(valueToAdd);
-
-        for (int n = 0; n < width * height; n = n + 8)
-        {
-            __m128i quantCoef1, quantCoef2, quantCoef12, sign;
+            __m128i quantCoef1, quantCoef2, deQuantCoef1, deQuantCoef2, quantCoef12, sign;
 
             quantCoef1 = _mm_loadu_si128((__m128i*)(quantCoef + n));
             quantCoef2 = _mm_loadu_si128((__m128i*)(quantCoef + n + 4));
 
+            deQuantCoef1 = _mm_loadu_si128((__m128i*)(deQuantCoef + n));
+            deQuantCoef2 = _mm_loadu_si128((__m128i*)(deQuantCoef + n + 4));
+
             quantCoef12 = _mm_packs_epi32(quantCoef1, quantCoef2);
             sign = _mm_srai_epi16(quantCoef12, 15);
             quantCoef1 = _mm_unpacklo_epi16(quantCoef12, sign);
             quantCoef2 = _mm_unpackhi_epi16(quantCoef12, sign);
 
-            quantCoef1 = _mm_sra_epi32(_mm_add_epi32(_mm_mullo_epi32(quantCoef1, vScale), vAdd), _mm_cvtsi32_si128(shift));
-            quantCoef2 = _mm_sra_epi32(_mm_add_epi32(_mm_mullo_epi32(quantCoef2, vScale), vAdd), _mm_cvtsi32_si128(shift));
+            quantCoef1 = _mm_sra_epi32(_mm_add_epi32(_mm_mullo_epi32(quantCoef1, deQuantCoef1), IAdd), _mm_cvtsi32_si128(shift - per));
+            quantCoef2 = _mm_sra_epi32(_mm_add_epi32(_mm_mullo_epi32(quantCoef2, deQuantCoef2), IAdd), _mm_cvtsi32_si128(shift - per));
+
+            quantCoef12 = _mm_packs_epi32(quantCoef1, quantCoef2);
+            sign = _mm_srai_epi16(quantCoef12, 15);
+            quantCoef1 = _mm_unpacklo_epi16(quantCoef12, sign);
+            _mm_storeu_si128((__m128i*)(coef + n), quantCoef1);
+            quantCoef2 = _mm_unpackhi_epi16(quantCoef12, sign);
+            _mm_storeu_si128((__m128i*)(coef + n + 4), quantCoef2);
+        }
+    }
+    else
+    {
+        for (int n = 0; n < num; n = n + 8)
+        {
+            __m128i quantCoef1, quantCoef2, deQuantCoef1, deQuantCoef2, quantCoef12, sign;
+
+            quantCoef1 = _mm_loadu_si128((__m128i*)(quantCoef + n));
+            quantCoef2 = _mm_loadu_si128((__m128i*)(quantCoef + n + 4));
+
+            deQuantCoef1 = _mm_loadu_si128((__m128i*)(deQuantCoef + n));
+            deQuantCoef2 = _mm_loadu_si128((__m128i*)(deQuantCoef + n + 4));
+
+            quantCoef12 = _mm_packs_epi32(quantCoef1, quantCoef2);
+            sign = _mm_srai_epi16(quantCoef12, 15);
+            quantCoef1 = _mm_unpacklo_epi16(quantCoef12, sign);
+            quantCoef2 = _mm_unpackhi_epi16(quantCoef12, sign);
+
+            quantCoef1 = _mm_mullo_epi32(quantCoef1, deQuantCoef1);
+            quantCoef2 = _mm_mullo_epi32(quantCoef2, deQuantCoef2);
+
+            quantCoef12 = _mm_packs_epi32(quantCoef1, quantCoef2);
+            sign = _mm_srai_epi16(quantCoef12, 15);
+            quantCoef1 = _mm_unpacklo_epi16(quantCoef12, sign);
+            quantCoef2 = _mm_unpackhi_epi16(quantCoef12, sign);
+
+            quantCoef1 = _mm_sll_epi32(quantCoef1, _mm_cvtsi32_si128(per - shift));
+            quantCoef2 = _mm_sll_epi32(quantCoef2, _mm_cvtsi32_si128(per - shift));
 
             quantCoef12 = _mm_packs_epi32(quantCoef1, quantCoef2);
             sign = _mm_srai_epi16(quantCoef12, 15);
@@ -273,7 +262,8 @@
 namespace x265 {
 void Setup_Vec_DCTPrimitives_sse41(EncoderPrimitives &p)
 {
-    p.dequant = dequant;
+    p.dequant_scaling = dequant_scaling;
+    p.dequant_normal = dequant_normal;
 #if !HIGH_BIT_DEPTH
     p.idct[IDST_4x4] = idst4; // fails with 10bit inputs
 #endif
diff -r ca8b260da44f -r d6ebc2bf8497 source/test/mbdstharness.cpp
--- a/source/test/mbdstharness.cpp	Fri Nov 22 19:34:10 2013 +0800
+++ b/source/test/mbdstharness.cpp	Fri Nov 22 19:34:27 2013 +0800
@@ -25,6 +25,7 @@
  *****************************************************************************/
 
 #include "mbdstharness.h"
+#include "TLibCommon/TComRom.h"
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
@@ -195,40 +196,71 @@
     return true;
 }
 
-bool MBDstHarness::check_dequant_primitive(dequant_t ref, dequant_t opt)
+bool MBDstHarness::check_dequant_primitive(dequant_normal_t ref, dequant_normal_t opt)
 {
     int j = 0;
 
     for (int i = 0; i <= 5; i++)
     {
-        int width = (rand() % 4 + 1) * 4;
+        int log2TrSize = (rand() % 4) + 2;
 
-        if (width == 12)
-        {
-            width = 32;
-        }
+        int width = (1 << log2TrSize);
         int height = width;
 
-        int scale = rand() % 58;
-        int per = scale / 6;
-        int rem = scale % 6;
-
-        bool useScalingList = (scale % 2 == 0) ? false : true;
-
-        uint32_t log2TrSize = (rand() % 4) + 2;
+        int qp = rand() % 52;
+        int per = qp / 6;
+        int rem = qp % 6;
+        static const int invQuantScales[6] = { 40, 45, 51, 57, 64, 72 };
+        int scale = invQuantScales[rem] << per;
+        int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize;
+        int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift;
 
         int cmp_size = sizeof(int) * height * width;
 
-        opt(mintbuf1 + j, mintbuf3, width, height, per, rem, useScalingList, log2TrSize, mintbuf2 + j);
-        ref(mintbuf1 + j, mintbuf4, width, height, per, rem, useScalingList, log2TrSize, mintbuf2 + j);
+        ref(mintbuf1 + j, mintbuf3, width * height, scale, shift);
+        opt(mintbuf1 + j, mintbuf4, width * height, scale, shift);
 
         if (memcmp(mintbuf3, mintbuf4, cmp_size))
             return false;
 
         j += 16;
 #if _DEBUG
-        memset(mintbuf3, 0, mem_cmp_size);
-        memset(mintbuf4, 0, mem_cmp_size);
+        memset(mintbuf3, 0xCD, mem_cmp_size);
+        memset(mintbuf4, 0xCD, mem_cmp_size);
+#endif
+    }
+
+    return true;
+}
+
+bool MBDstHarness::check_dequant_primitive(dequant_scaling_t ref, dequant_scaling_t opt)
+{
+    int j = 0;
+
+    for (int i = 0; i <= 5; i++)
+    {
+        int log2TrSize = (rand() % 4) + 2;
+
+        int width = (1 << log2TrSize);
+        int height = width;
+
+        int qp = rand() % 52;
+        int per = qp / 6;
+        int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize;
+        int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift;
+
+        int cmp_size = sizeof(int) * height * width;
+
+        ref(mintbuf1 + j, mintbuf3, mintbuf2 + j, width * height, per, shift);
+        opt(mintbuf1 + j, mintbuf4, mintbuf2 + j, width * height, per, shift);
+
+        if (memcmp(mintbuf3, mintbuf4, cmp_size))
+            return false;
+
+        j += 16;
+#if _DEBUG
+        memset(mintbuf3, 0xCD, mem_cmp_size);
+        memset(mintbuf4, 0xCD, mem_cmp_size);
 #endif
     }
 
@@ -319,9 +351,9 @@
         }
     }
 
-    if (opt.dequant)
+    if (opt.dequant_normal)
     {
-        if (!check_dequant_primitive(ref.dequant, opt.dequant))
+        if (!check_dequant_primitive(ref.dequant_normal, opt.dequant_normal))
         {
             printf("dequant: Failed!\n");
             return false;
@@ -360,10 +392,16 @@
         }
     }
 
-    if (opt.dequant)
+    if (opt.dequant_normal)
     {
-        printf("dequant\t\t");
-        REPORT_SPEEDUP(opt.dequant, ref.dequant, mintbuf1, mintbuf3, 32, 32, 5, 2, false, 5, mintbuf2);
+        printf("dequant_normal\t");
+        REPORT_SPEEDUP(opt.dequant_normal, ref.dequant_normal, mintbuf1, mintbuf3, 32 * 32, 70, 1);
+    }
+
+    if (opt.dequant_scaling)
+    {
+        printf("dequant_scaling\t");
+        REPORT_SPEEDUP(opt.dequant_scaling, ref.dequant_scaling, mintbuf1, mintbuf3, mintbuf2, 32 * 32, 5, 1);
     }
 
     if (opt.quant)
diff -r ca8b260da44f -r d6ebc2bf8497 source/test/mbdstharness.h
--- a/source/test/mbdstharness.h	Fri Nov 22 19:34:10 2013 +0800
+++ b/source/test/mbdstharness.h	Fri Nov 22 19:34:27 2013 +0800
@@ -40,7 +40,8 @@
     static const int mb_t_size = 6400;
     static const int mem_cmp_size = 32 * 32;
 
-    bool check_dequant_primitive(dequant_t ref, dequant_t opt);
+    bool check_dequant_primitive(dequant_scaling_t ref, dequant_scaling_t opt);
+    bool check_dequant_primitive(dequant_normal_t ref, dequant_normal_t opt);
     bool check_quant_primitive(quant_t ref, quant_t opt);
     bool check_dct_primitive(dct_t ref, dct_t opt, int width);
     bool check_idct_primitive(idct_t ref, idct_t opt, int width);



More information about the x265-devel mailing list