<div dir="ltr"><div>Praveen,<br><br>Can you build a mercurial queue for these quant patches - so they can be reviewed and pushed in once quant is 16-bit everywhere?<br><br></div>Thanks,<br>Deepthi<br></div><div class="gmail_extra">


<br><br><div class="gmail_quote">On Thu, Aug 14, 2014 at 2:01 AM, Steve Borho <span dir="ltr"><<a href="mailto:steve@borho.org" target="_blank">steve@borho.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">


<div class="">On 08/12, <a href="mailto:praveen@multicorewareinc.com">praveen@multicorewareinc.com</a> wrote:<br>

> # HG changeset patch<br>

> # User Praveen Tiwari<br>

> # Date 1407834530 -19800<br>

> # Node ID bb4d44663964237e4b66af6d92b2f13dbcf4f9b9<br>

> # Parent  8a7f4bb1d1be32fe668d410450c2e320ccae6098<br>

> count_nonzero primitive, downscaling quantCoeff from int32_t* to int16_t*<br>

<br>

</div>There's not much point in applying these patches until all of the quant<br>

primitives are using short ints for coefficients. As-is this will just<br>

be a slow-down.<br>

<div><div class="h5"><br>

> diff -r 8a7f4bb1d1be -r bb4d44663964 source/common/dct.cpp<br>

> --- a/source/common/dct.cpp   Tue Aug 12 01:11:39 2014 -0500<br>

> +++ b/source/common/dct.cpp   Tue Aug 12 14:38:50 2014 +0530<br>

> @@ -815,7 +815,7 @@<br>

>      return numSig;<br>

>  }<br>

><br>

> -int  count_nonzero_c(const int32_t *quantCoeff, int numCoeff)<br>

> +int  count_nonzero_c(const int16_t *quantCoeff, int numCoeff)<br>

>  {<br>

>      X265_CHECK(((intptr_t)quantCoeff & 15) == 0, "quant buffer not aligned\n");<br>

>      X265_CHECK(numCoeff > 0 && (numCoeff & 15) == 0, "numCoeff invalid %d\n", numCoeff);<br>

> diff -r 8a7f4bb1d1be -r bb4d44663964 source/common/primitives.h<br>

> --- a/source/common/primitives.h      Tue Aug 12 01:11:39 2014 -0500<br>

> +++ b/source/common/primitives.h      Tue Aug 12 14:38:50 2014 +0530<br>

> @@ -163,7 +163,7 @@<br>

>  typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);<br>

>  typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);<br>

>  typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);<br>

> -typedef int  (*count_nonzero_t)(const int32_t *quantCoeff, int numCoeff);<br>

> +typedef int  (*count_nonzero_t)(const int16_t *quantCoeff, int numCoeff);<br>

><br>

>  typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);<br>

>  typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);<br>

> diff -r 8a7f4bb1d1be -r bb4d44663964 source/common/quant.cpp<br>

> --- a/source/common/quant.cpp Tue Aug 12 01:11:39 2014 -0500<br>

> +++ b/source/common/quant.cpp Tue Aug 12 14:38:50 2014 +0530<br>

> @@ -2,6 +2,7 @@<br>

>   * Copyright (C) 2014 x265 project<br>

>   *<br>

>   * Authors: Steve Borho <<a href="mailto:steve@borho.org">steve@borho.org</a>><br>

> + *          Praveen Kumar Tiwari <<a href="mailto:praveen@multicorewareinc.com">praveen@multicorewareinc.com</a>><br>

>   *<br>

>   * This program is free software; you can redistribute it and/or modify<br>

>   * it under the terms of the GNU General Public License as published by<br>

> @@ -463,7 +464,17 @@<br>

>          const uint32_t sizeIdx = log2TrSize - 2;<br>

>          int useDST = !sizeIdx && ttype == TEXT_LUMA && bIntra;<br>

><br>

> -        X265_CHECK((int)numSig == primitives.count_nonzero(coeff, 1 << log2TrSize * 2), "numSig differ\n");<br>

> +        /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is<br>

> +         * optimize to take coefficients as int16_t*, it will be cleanse.*/<br>

> +        int numCoeff = (1 << (log2TrSize * 2));<br>

> +        assert(numCoeff <= 1024);<br>

> +        ALIGN_VAR_16(int16_t, qCoeff[32 * 32]);<br>

> +        for (int i = 0; i < numCoeff; i++)<br>

> +        {<br>

> +            qCoeff[i] = (coeff[i] & 0xFFFF);<br>

> +        }<br>

> +<br>

> +        X265_CHECK((int)numSig == primitives.count_nonzero(qCoeff, 1 << log2TrSize * 2), "numSig differ\n");<br>

><br>

>          // DC only<br>

>          if (numSig == 1 && coeff[0] != 0 && !useDST)<br>

> @@ -501,7 +512,16 @@<br>

>      int numCoeff = 1 << log2TrSize * 2;<br>

>      uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, dstCoeff, qbits, add, numCoeff);<br>

<br>

</div></div>These two loops are only here for an X265_CHECK statement that is<br>

usually compiled out.  All of this code should have been wrapped within<br>

#if CHECKED_BUILD || _DEBUG<br>

<div class="HOEnZb"><div class="h5"><br>

> -    X265_CHECK((int)numSig == primitives.count_nonzero(dstCoeff, numCoeff), "numSig differ\n");<br>

> +    /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is<br>

> +     * optimize to take coefficients as int16_t*, it will be cleanse.*/<br>

> +    assert(numCoeff <= 1024);<br>

> +    ALIGN_VAR_16(int16_t, qCoeff[32 * 32]);<br>

> +    for (int i = 0; i < numCoeff; i++)<br>

> +    {<br>

> +        qCoeff[i] = (dstCoeff[i] & 0xFFFF);<br>

> +    }<br>

> +<br>

> +    X265_CHECK((int)numSig == primitives.count_nonzero(qCoeff, numCoeff), "numSig differ\n");<br>

>      if (!numSig)<br>

>          return 0;<br>

><br>

> diff -r 8a7f4bb1d1be -r bb4d44663964 source/common/x86/pixel-util.h<br>

> --- a/source/common/x86/pixel-util.h  Tue Aug 12 01:11:39 2014 -0500<br>

> +++ b/source/common/x86/pixel-util.h  Tue Aug 12 14:38:50 2014 +0530<br>

> @@ -2,6 +2,7 @@<br>

>   * Copyright (C) 2013 x265 project<br>

>   *<br>

>   * Authors: Steve Borho <<a href="mailto:steve@borho.org">steve@borho.org</a>><br>

> + *          Praveen Kumar Tiwari <<a href="mailto:praveen@multicorewareinc.com">praveen@multicorewareinc.com</a>><br>

>   *<br>

>   * This program is free software; you can redistribute it and/or modify<br>

>   * it under the terms of the GNU General Public License as published by<br>

> @@ -47,7 +48,7 @@<br>

>  uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);<br>

>  uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);<br>

>  void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);<br>

> -int x265_count_nonzero_ssse3(const int32_t *quantCoeff, int numCoeff);<br>

> +int x265_count_nonzero_ssse3(const int16_t *quantCoeff, int numCoeff);<br>

><br>

>  void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);<br>

>  void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);<br>

> diff -r 8a7f4bb1d1be -r bb4d44663964 source/common/x86/pixel-util8.asm<br>

> --- a/source/common/x86/pixel-util8.asm       Tue Aug 12 01:11:39 2014 -0500<br>

> +++ b/source/common/x86/pixel-util8.asm       Tue Aug 12 14:38:50 2014 +0530<br>

> @@ -3,6 +3,7 @@<br>

>  ;*<br>

>  ;* Authors: Min Chen <<a href="mailto:chenm003@163.com">chenm003@163.com</a>> <<a href="mailto:min.chen@multicorewareinc.com">min.chen@multicorewareinc.com</a>><br>

>  ;*          Nabajit Deka <<a href="mailto:nabajit@multicorewareinc.com">nabajit@multicorewareinc.com</a>><br>

> +;*          Praveen Kumar Tiwari <<a href="mailto:praveen@multicorewareinc.com">praveen@multicorewareinc.com</a>><br>

>  ;*<br>

>  ;* This program is free software; you can redistribute it and/or modify<br>

>  ;* it under the terms of the GNU General Public License as published by<br>

> @@ -1091,10 +1092,10 @@<br>

><br>

><br>

>  ;-----------------------------------------------------------------------------<br>

> -; int count_nonzero(const int32_t *quantCoeff, int numCoeff);<br>

> +; int count_nonzero(const int16_t *quantCoeff, int numCoeff);<br>

>  ;-----------------------------------------------------------------------------<br>

>  INIT_XMM ssse3<br>

> -cglobal count_nonzero, 2,2,5<br>

> +cglobal count_nonzero, 2,2,4<br>

>      pxor        m0, m0<br>

>      shr         r1d, 4<br>

>      movd        m1, r1d<br>

> @@ -1103,12 +1104,8 @@<br>

>  .loop:<br>

>      mova        m2, [r0 +  0]<br>

>      mova        m3, [r0 + 16]<br>

> -    packssdw    m2, m3<br>

> -    mova        m3, [r0 + 32]<br>

> -    mova        m4, [r0 + 48]<br>

> -    add         r0, 64<br>

> -    packssdw    m3, m4<br>

>      packsswb    m2, m3<br>

> +    add         r0, 32<br>

>      pcmpeqb     m2, m0<br>

>      paddb       m1, m2<br>

>      dec         r1d<br>

> diff -r 8a7f4bb1d1be -r bb4d44663964 source/encoder/entropy.cpp<br>

> --- a/source/encoder/entropy.cpp      Tue Aug 12 01:11:39 2014 -0500<br>

> +++ b/source/encoder/entropy.cpp      Tue Aug 12 14:38:50 2014 +0530<br>

> @@ -2,6 +2,7 @@<br>

>  * Copyright (C) 2013 x265 project<br>

>  *<br>

>  * Authors: Steve Borho <<a href="mailto:steve@borho.org">steve@borho.org</a>><br>

> +*          Praveen Kumar Tiwari <<a href="mailto:praveen@multicorewareinc.com">praveen@multicorewareinc.com</a>><br>

>  *<br>

>  * This program is free software; you can redistribute it and/or modify<br>

>  * it under the terms of the GNU General Public License as published by<br>

> @@ -1488,8 +1489,18 @@<br>

>  {<br>

>      uint32_t trSize = 1 << log2TrSize;<br>

><br>

> +    /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is<br>

> +     * optimize to take coefficients as int16_t*, it will be cleanse.*/<br>

> +    int numCoeff = (1 << (log2TrSize << 1));<br>

> +    assert(numCoeff <= 1024);<br>

> +    ALIGN_VAR_16(int16_t, qCoeff[32 * 32]);<br>

> +    for (int i = 0; i < numCoeff; i++)<br>

> +    {<br>

> +        qCoeff[i] = (coeff[i] & 0xFFFF);<br>

> +    }<br>

> +<br>

>      // compute number of significant coefficients<br>

> -    uint32_t numSig = primitives.count_nonzero(coeff, (1 << (log2TrSize << 1)));<br>

> +    uint32_t numSig = primitives.count_nonzero(qCoeff, (1 << (log2TrSize << 1)));<br>

><br>

>      X265_CHECK(numSig > 0, "cbf check fail\n");<br>

><br>

> diff -r 8a7f4bb1d1be -r bb4d44663964 source/test/mbdstharness.cpp<br>

> --- a/source/test/mbdstharness.cpp    Tue Aug 12 01:11:39 2014 -0500<br>

> +++ b/source/test/mbdstharness.cpp    Tue Aug 12 14:38:50 2014 +0530<br>

> @@ -366,7 +366,7 @@<br>

><br>

>  bool MBDstHarness::check_count_nonzero_primitive(count_nonzero_t ref, count_nonzero_t opt)<br>

>  {<br>

> -    ALIGN_VAR_32(int32_t, qcoeff[32 * 32]);<br>

> +    ALIGN_VAR_32(int16_t, qcoeff[32 * 32]);<br>

><br>

>      for (int i = 0; i < 4; i++)<br>

>      {<br>

> @@ -376,7 +376,7 @@<br>

><br>

>          for (int n = 0; n <= num; n++)<br>

>          {<br>

> -            memset(qcoeff, 0, num * sizeof(int32_t));<br>

> +            memset(qcoeff, 0, num * sizeof(int16_t));<br>

><br>

>              for (int j = 0; j < n; j++)<br>

>              {<br>

> @@ -386,7 +386,7 @@<br>

>                      k = (k + 11) & mask;<br>

>                  }<br>

><br>

> -                qcoeff[k] = rand() - RAND_MAX / 2;<br>

> +                qcoeff[k] = (int16_t)rand() - RAND_MAX / 2;<br>

>              }<br>

><br>

>              int refval = ref(qcoeff, num);<br>

> @@ -516,7 +516,7 @@<br>

>          for (int i = 4; i <= 32; i <<= 1)<br>

>          {<br>

>              printf("count_nonzero[%dx%d]", i, i);<br>

> -            REPORT_SPEEDUP(opt.count_nonzero, ref.count_nonzero, mbufidct, i * i)<br>

> +            REPORT_SPEEDUP(opt.count_nonzero, ref.count_nonzero, mbuf1, i * i)<br>

>          }<br>

>      }<br>

>  }<br>

> _______________________________________________<br>

> x265-devel mailing list<br>

> <a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>

> <a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>

<br>

</div></div><span class="HOEnZb"><font color="#888888">--<br>

Steve Borho<br>

</font></span><div class="HOEnZb"><div class="h5">_______________________________________________<br>

x265-devel mailing list<br>

<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>

<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>

</div></div></blockquote></div><br></div>