[x265] [PATCH] dequant_normal optimization, downscaling quantCoef from int32_t* to int16_t*

Thu Sep 4 15:00:08 CEST 2014

On 09/02, praveen at multicorewareinc.com wrote:
> # HG changeset patch
> # User Praveen Tiwari
> # Date 1409637112 -19800
> # Node ID 32abebf1dd44d8328a32e7441382e459733233b7
> # Parent  380a796052afc62cac7e480fde70e3766a940246
> dequant_normal optimization, downscaling quantCoef from int32_t* to int16_t*

sending these one at a time has made it difficult to apply them all in
the correct order

> diff -r 380a796052af -r 32abebf1dd44 source/common/dct.cpp
> --- a/source/common/dct.cpp	Mon Aug 25 12:49:37 2014 +0530
> +++ b/source/common/dct.cpp	Tue Sep 02 11:21:52 2014 +0530
> @@ -718,7 +718,7 @@
>      }
>  }
>  
> -void dequant_normal_c(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift)
> +void dequant_normal_c(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift)
>  {
>  #if HIGH_BIT_DEPTH
>      X265_CHECK(scale < 32768 || ((scale & 3) == 0 && shift > 2), "dequant invalid scale %d\n", scale);
> @@ -732,14 +732,11 @@
>  
>      int add, coeffQ;
>  
> -    int clipQCoef;
> -
>      add = 1 << (shift - 1);
>  
>      for (int n = 0; n < num; n++)
>      {
> -        clipQCoef = Clip3(-32768, 32767, quantCoef[n]);
> -        coeffQ = (clipQCoef * scale + add) >> shift;
> +        coeffQ = (quantCoef[n] * scale + add) >> shift;
>          coef[n] = Clip3(-32768, 32767, coeffQ);
>      }
>  }
> diff -r 380a796052af -r 32abebf1dd44 source/common/primitives.h
> --- a/source/common/primitives.h	Mon Aug 25 12:49:37 2014 +0530
> +++ b/source/common/primitives.h	Tue Sep 02 11:21:52 2014 +0530
> @@ -165,7 +165,7 @@
>  typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
>  typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
>  typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
> -typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
> +typedef void (*dequant_normal_t)(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
>  typedef int  (*count_nonzero_t)(const int16_t *quantCoeff, int numCoeff);
>  
>  typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
> diff -r 380a796052af -r 32abebf1dd44 source/common/quant.cpp
> --- a/source/common/quant.cpp	Mon Aug 25 12:49:37 2014 +0530
> +++ b/source/common/quant.cpp	Tue Sep 02 11:21:52 2014 +0530
> @@ -410,6 +410,15 @@
>      int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift;
>      int numCoeff = 1 << log2TrSize * 2;
>  
> +    /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is
> +     * optimize to take coefficients as int16_t*, it will be cleanse.*/
> +    assert(numCoeff <= 1024);
> +    ALIGN_VAR_16(int16_t, qCoeff[32 * 32]);
> +    for (int i = 0; i < numCoeff; i++)
> +    {
> +        qCoeff[i] = (int16_t)Clip3(-32768, 32767, coeff[i]);
> +    }
> +
>      if (m_scalingList->m_bEnabled)
>      {
>          int scalingListType = (bIntra ? 0 : 3) + ttype;
> @@ -419,7 +428,7 @@
>      else
>      {
>          int scale = m_scalingList->s_invQuantScales[rem] << per;
> -        primitives.dequant_normal(coeff, m_resiDctCoeff, numCoeff, scale, shift);
> +        primitives.dequant_normal(qCoeff, m_resiDctCoeff, numCoeff, scale, shift);
>      }
>  
>      if (useTransformSkip)
> diff -r 380a796052af -r 32abebf1dd44 source/common/x86/pixel-util.h
> --- a/source/common/x86/pixel-util.h	Mon Aug 25 12:49:37 2014 +0530
> +++ b/source/common/x86/pixel-util.h	Tue Sep 02 11:21:52 2014 +0530
> @@ -46,7 +46,7 @@
>  
>  uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
>  uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
> -void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
> +void x265_dequant_normal_sse4(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
>  int x265_count_nonzero_ssse3(const int16_t *quantCoeff, int numCoeff);
>  
>  void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
> diff -r 380a796052af -r 32abebf1dd44 source/test/mbdstharness.cpp
> --- a/source/test/mbdstharness.cpp	Mon Aug 25 12:49:37 2014 +0530
> +++ b/source/test/mbdstharness.cpp	Tue Sep 02 11:21:52 2014 +0530
> @@ -154,10 +154,10 @@
>          int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize;
>          int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift;
>  
> -        ref(int_test_buff[index] + j, mintbuf1, width * height, scale, shift);
> -        checked(opt, int_test_buff[index] + j, mintbuf2, width * height, scale, shift);
> +        ref(short_test_buff[index] + j, mintbuf3, width * height, scale, shift);
> +        checked(opt, short_test_buff[index] + j, mintbuf4, width * height, scale, shift);
>  
> -        if (memcmp(mintbuf1, mintbuf2, sizeof(int) * height * width))
> +        if (memcmp(mintbuf3, mintbuf4, sizeof(int) * height * width))
>              return false;
>  
>          reportfail();
> @@ -410,7 +410,7 @@
>      if (opt.dequant_normal)
>      {
>          printf("dequant_normal\t");
> -        REPORT_SPEEDUP(opt.dequant_normal, ref.dequant_normal, int_test_buff[0], mintbuf3, 32 * 32, 70, 1);
> +        REPORT_SPEEDUP(opt.dequant_normal, ref.dequant_normal, short_test_buff[0], mintbuf3, 32 * 32, 70, 1);
>      }
>  
>      if (opt.dequant_scaling)
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho