[x265] [PATCH] dequant_normal optimization, downscaling quantCoef from int32_t* to int16_t*
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Tue Sep 2 16:08:33 CEST 2014
# HG changeset patch
# User Praveen Tiwari
# Date 1409637112 -19800
# Node ID 32abebf1dd44d8328a32e7441382e459733233b7
# Parent 380a796052afc62cac7e480fde70e3766a940246
dequant_normal optimization, downscaling quantCoef from int32_t* to int16_t*
diff -r 380a796052af -r 32abebf1dd44 source/common/dct.cpp
--- a/source/common/dct.cpp Mon Aug 25 12:49:37 2014 +0530
+++ b/source/common/dct.cpp Tue Sep 02 11:21:52 2014 +0530
@@ -718,7 +718,7 @@
}
}
-void dequant_normal_c(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift)
+void dequant_normal_c(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift)
{
#if HIGH_BIT_DEPTH
X265_CHECK(scale < 32768 || ((scale & 3) == 0 && shift > 2), "dequant invalid scale %d\n", scale);
@@ -732,14 +732,11 @@
int add, coeffQ;
- int clipQCoef;
-
add = 1 << (shift - 1);
for (int n = 0; n < num; n++)
{
- clipQCoef = Clip3(-32768, 32767, quantCoef[n]);
- coeffQ = (clipQCoef * scale + add) >> shift;
+ coeffQ = (quantCoef[n] * scale + add) >> shift;
coef[n] = Clip3(-32768, 32767, coeffQ);
}
}
diff -r 380a796052af -r 32abebf1dd44 source/common/primitives.h
--- a/source/common/primitives.h Mon Aug 25 12:49:37 2014 +0530
+++ b/source/common/primitives.h Tue Sep 02 11:21:52 2014 +0530
@@ -165,7 +165,7 @@
typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
-typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
+typedef void (*dequant_normal_t)(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
typedef int (*count_nonzero_t)(const int16_t *quantCoeff, int numCoeff);
typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
diff -r 380a796052af -r 32abebf1dd44 source/common/quant.cpp
--- a/source/common/quant.cpp Mon Aug 25 12:49:37 2014 +0530
+++ b/source/common/quant.cpp Tue Sep 02 11:21:52 2014 +0530
@@ -410,6 +410,15 @@
int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift;
int numCoeff = 1 << log2TrSize * 2;
+ /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is
+ * optimize to take coefficients as int16_t*, it will be cleanse.*/
+ assert(numCoeff <= 1024);
+ ALIGN_VAR_16(int16_t, qCoeff[32 * 32]);
+ for (int i = 0; i < numCoeff; i++)
+ {
+ qCoeff[i] = (int16_t)Clip3(-32768, 32767, coeff[i]);
+ }
+
if (m_scalingList->m_bEnabled)
{
int scalingListType = (bIntra ? 0 : 3) + ttype;
@@ -419,7 +428,7 @@
else
{
int scale = m_scalingList->s_invQuantScales[rem] << per;
- primitives.dequant_normal(coeff, m_resiDctCoeff, numCoeff, scale, shift);
+ primitives.dequant_normal(qCoeff, m_resiDctCoeff, numCoeff, scale, shift);
}
if (useTransformSkip)
diff -r 380a796052af -r 32abebf1dd44 source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h Mon Aug 25 12:49:37 2014 +0530
+++ b/source/common/x86/pixel-util.h Tue Sep 02 11:21:52 2014 +0530
@@ -46,7 +46,7 @@
uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
-void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
+void x265_dequant_normal_sse4(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
int x265_count_nonzero_ssse3(const int16_t *quantCoeff, int numCoeff);
void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
diff -r 380a796052af -r 32abebf1dd44 source/test/mbdstharness.cpp
--- a/source/test/mbdstharness.cpp Mon Aug 25 12:49:37 2014 +0530
+++ b/source/test/mbdstharness.cpp Tue Sep 02 11:21:52 2014 +0530
@@ -154,10 +154,10 @@
int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize;
int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift;
- ref(int_test_buff[index] + j, mintbuf1, width * height, scale, shift);
- checked(opt, int_test_buff[index] + j, mintbuf2, width * height, scale, shift);
+ ref(short_test_buff[index] + j, mintbuf3, width * height, scale, shift);
+ checked(opt, short_test_buff[index] + j, mintbuf4, width * height, scale, shift);
- if (memcmp(mintbuf1, mintbuf2, sizeof(int) * height * width))
+ if (memcmp(mintbuf3, mintbuf4, sizeof(int) * height * width))
return false;
reportfail();
@@ -410,7 +410,7 @@
if (opt.dequant_normal)
{
printf("dequant_normal\t");
- REPORT_SPEEDUP(opt.dequant_normal, ref.dequant_normal, int_test_buff[0], mintbuf3, 32 * 32, 70, 1);
+ REPORT_SPEEDUP(opt.dequant_normal, ref.dequant_normal, short_test_buff[0], mintbuf3, 32 * 32, 70, 1);
}
if (opt.dequant_scaling)
More information about the x265-devel
mailing list