[x265] [PATCH] dequant_normal optimization, downscaling quantCoef from int32_t* to int16_t*
Steve Borho
steve at borho.org
Thu Sep 4 15:00:08 CEST 2014
On 09/02, praveen at multicorewareinc.com wrote:
> # HG changeset patch
> # User Praveen Tiwari
> # Date 1409637112 -19800
> # Node ID 32abebf1dd44d8328a32e7441382e459733233b7
> # Parent 380a796052afc62cac7e480fde70e3766a940246
> dequant_normal optimization, downscaling quantCoef from int32_t* to int16_t*
sending these one at a time has made it difficult to apply them all in
the correct order
> diff -r 380a796052af -r 32abebf1dd44 source/common/dct.cpp
> --- a/source/common/dct.cpp Mon Aug 25 12:49:37 2014 +0530
> +++ b/source/common/dct.cpp Tue Sep 02 11:21:52 2014 +0530
> @@ -718,7 +718,7 @@
> }
> }
>
> -void dequant_normal_c(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift)
> +void dequant_normal_c(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift)
> {
> #if HIGH_BIT_DEPTH
> X265_CHECK(scale < 32768 || ((scale & 3) == 0 && shift > 2), "dequant invalid scale %d\n", scale);
> @@ -732,14 +732,11 @@
>
> int add, coeffQ;
>
> - int clipQCoef;
> -
> add = 1 << (shift - 1);
>
> for (int n = 0; n < num; n++)
> {
> - clipQCoef = Clip3(-32768, 32767, quantCoef[n]);
> - coeffQ = (clipQCoef * scale + add) >> shift;
> + coeffQ = (quantCoef[n] * scale + add) >> shift;
> coef[n] = Clip3(-32768, 32767, coeffQ);
> }
> }
> diff -r 380a796052af -r 32abebf1dd44 source/common/primitives.h
> --- a/source/common/primitives.h Mon Aug 25 12:49:37 2014 +0530
> +++ b/source/common/primitives.h Tue Sep 02 11:21:52 2014 +0530
> @@ -165,7 +165,7 @@
> typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
> typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
> typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
> -typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
> +typedef void (*dequant_normal_t)(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
> typedef int (*count_nonzero_t)(const int16_t *quantCoeff, int numCoeff);
>
> typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
> diff -r 380a796052af -r 32abebf1dd44 source/common/quant.cpp
> --- a/source/common/quant.cpp Mon Aug 25 12:49:37 2014 +0530
> +++ b/source/common/quant.cpp Tue Sep 02 11:21:52 2014 +0530
> @@ -410,6 +410,15 @@
> int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift;
> int numCoeff = 1 << log2TrSize * 2;
>
> + /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is
> + * optimize to take coefficients as int16_t*, it will be cleanse.*/
> + assert(numCoeff <= 1024);
> + ALIGN_VAR_16(int16_t, qCoeff[32 * 32]);
> + for (int i = 0; i < numCoeff; i++)
> + {
> + qCoeff[i] = (int16_t)Clip3(-32768, 32767, coeff[i]);
> + }
> +
> if (m_scalingList->m_bEnabled)
> {
> int scalingListType = (bIntra ? 0 : 3) + ttype;
> @@ -419,7 +428,7 @@
> else
> {
> int scale = m_scalingList->s_invQuantScales[rem] << per;
> - primitives.dequant_normal(coeff, m_resiDctCoeff, numCoeff, scale, shift);
> + primitives.dequant_normal(qCoeff, m_resiDctCoeff, numCoeff, scale, shift);
> }
>
> if (useTransformSkip)
> diff -r 380a796052af -r 32abebf1dd44 source/common/x86/pixel-util.h
> --- a/source/common/x86/pixel-util.h Mon Aug 25 12:49:37 2014 +0530
> +++ b/source/common/x86/pixel-util.h Tue Sep 02 11:21:52 2014 +0530
> @@ -46,7 +46,7 @@
>
> uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
> uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
> -void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
> +void x265_dequant_normal_sse4(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
> int x265_count_nonzero_ssse3(const int16_t *quantCoeff, int numCoeff);
>
> void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
> diff -r 380a796052af -r 32abebf1dd44 source/test/mbdstharness.cpp
> --- a/source/test/mbdstharness.cpp Mon Aug 25 12:49:37 2014 +0530
> +++ b/source/test/mbdstharness.cpp Tue Sep 02 11:21:52 2014 +0530
> @@ -154,10 +154,10 @@
> int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize;
> int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift;
>
> - ref(int_test_buff[index] + j, mintbuf1, width * height, scale, shift);
> - checked(opt, int_test_buff[index] + j, mintbuf2, width * height, scale, shift);
> + ref(short_test_buff[index] + j, mintbuf3, width * height, scale, shift);
> + checked(opt, short_test_buff[index] + j, mintbuf4, width * height, scale, shift);
>
> - if (memcmp(mintbuf1, mintbuf2, sizeof(int) * height * width))
> + if (memcmp(mintbuf3, mintbuf4, sizeof(int) * height * width))
> return false;
>
> reportfail();
> @@ -410,7 +410,7 @@
> if (opt.dequant_normal)
> {
> printf("dequant_normal\t");
> - REPORT_SPEEDUP(opt.dequant_normal, ref.dequant_normal, int_test_buff[0], mintbuf3, 32 * 32, 70, 1);
> + REPORT_SPEEDUP(opt.dequant_normal, ref.dequant_normal, short_test_buff[0], mintbuf3, 32 * 32, 70, 1);
> }
>
> if (opt.dequant_scaling)
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list