[x265] [PATCH] Implementation of low-pass subband dct approximation

Mon Nov 6 05:54:28 CET 2017

On Fri, Nov 3, 2017 at 9:05 PM, <mont3z.claro5 at gmail.com> wrote:

> # HG changeset patch
> # User hribeiro
> # Date 1507997943 25200
> #      Sat Oct 14 09:19:03 2017 -0700
> # Node ID 893b36b82133a2bc4d3cfd6aa3a18c544ce0bf94
> # Parent  6a310b24c6a2d831ef08bbda1bdcf9d929daa308
> Implementation of low-pass subband dct approximation.
>

Thanks for the contribution. I had to make one small fix in where the new
cli option is added in x265cli.h to avoid a compilation error of "if block
too deeply nested" in MSVC, but otherwise, it was good to go. I've pushed
this to default branch. Thanks for the contribution.
I have one comment below which I think will be worth addressing in a
subsequent patch.

>
> diff -r 6a310b24c6a2 -r 893b36b82133 doc/reST/cli.rst
> --- a/doc/reST/cli.rst  Thu Nov 02 12:17:29 2017 +0530
> +++ b/doc/reST/cli.rst  Sat Oct 14 09:19:03 2017 -0700
> @@ -2142,6 +2142,18 @@
>
>         Only effective at RD levels 5 and 6
>
> +DCT Approximations
> +=================
> +
> +.. option:: --lowpass-dct
> +
> +    If enabled, x265 will use low-pass truncated dct approximation
> instead of the
> +    standard dct. This approximation is less computational intesive but
> it generates
> +    truncated coefficient matrixes for the transformed block. Empirical
> analysis shows
> +    this approximation gives good PSNR results for QP>=23.
> +
> +    This approximation should be considered for platforms with
> performance and time
> +    constrains.
>
>  Debugging options
>  =================
> diff -r 6a310b24c6a2 -r 893b36b82133 source/CMakeLists.txt
> --- a/source/CMakeLists.txt     Thu Nov 02 12:17:29 2017 +0530
> +++ b/source/CMakeLists.txt     Sat Oct 14 09:19:03 2017 -0700
> @@ -29,7 +29,7 @@
>  option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
>  mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
>  # X265_BUILD must be incremented each time the public API is changed
> -set(X265_BUILD 136)
> +set(X265_BUILD 137)
>  configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
>                 "${PROJECT_BINARY_DIR}/x265.def")
>  configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
> diff -r 6a310b24c6a2 -r 893b36b82133 source/common/CMakeLists.txt
> --- a/source/common/CMakeLists.txt      Thu Nov 02 12:17:29 2017 +0530
> +++ b/source/common/CMakeLists.txt      Sat Oct 14 09:19:03 2017 -0700
> @@ -131,7 +131,7 @@
>  add_library(common OBJECT
>      ${ASM_PRIMITIVES} ${VEC_PRIMITIVES} ${ALTIVEC_PRIMITIVES} ${WINXP}
>      primitives.cpp primitives.h
> -    pixel.cpp dct.cpp ipfilter.cpp intrapred.cpp loopfilter.cpp
> +    pixel.cpp dct.cpp lowpassdct.cpp ipfilter.cpp intrapred.cpp
> loopfilter.cpp
>      constants.cpp constants.h
>      cpu.cpp cpu.h version.cpp
>      threading.cpp threading.h
> diff -r 6a310b24c6a2 -r 893b36b82133 source/common/lowpassdct.cpp
> --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/common/lowpassdct.cpp      Sat Oct 14 09:19:03 2017 -0700
> @@ -0,0 +1,127 @@
> +/**********************************************************
> *******************
> + * Copyright (C) 2017
> + *
> + * Authors: Humberto Ribeiro Filho <mont3z.claro5 at gmail.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111,
> USA.
> + *
> + * This program is also available under a commercial proprietary license.
> + * For more information, contact us at license @ x265.com.
> + ************************************************************
> *****************/
> +
> +#include "common.h"
> +#include "primitives.h"
> +
> +using namespace X265_NS;
> +
> +/* standard dct transformations */
> +static dct_t* s_dct4x4;
> +static dct_t* s_dct8x8;
> +static dct_t* s_dct16x16;
> +
> +static void lowPassDct8_c(const int16_t* src, int16_t* dst, intptr_t
> srcStride)
> +{
> +    ALIGN_VAR_32(int16_t, coef[4 * 4]);
> +    ALIGN_VAR_32(int16_t, avgBlock[4 * 4]);
> +    int16_t totalSum = 0;
> +    int16_t sum = 0;
> +
> +    for (int i = 0; i < 4; i++)
> +        for (int j =0; j < 4; j++)
> +        {
> +            // Calculate average of 2x2 cells
> +            sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1]
> +                    + src[(2*i+1)*srcStride + 2*j] +
> src[(2*i+1)*srcStride + 2*j + 1];
> +            avgBlock[i*4 + j] = sum >> 2;
> +
> +            totalSum += sum; // use to calculate total block average
> +        }
> +
> +    //dct4
> +    (*s_dct4x4)(avgBlock, coef, 4);
> +    memset(dst, 0, 64 * sizeof(int16_t));
> +    for (int i = 0; i < 4; i++)
> +    {
> +        memcpy(&dst[i * 8], &coef[i * 4], 4 * sizeof(int16_t));
> +    }
> +
> +    // replace first coef with total block average
> +    dst[0] = totalSum << 1;
> +}
> +
> +static void lowPassDct16_c(const int16_t* src, int16_t* dst, intptr_t
> srcStride)
> +{
> +    ALIGN_VAR_32(int16_t, coef[8 * 8]);
> +    ALIGN_VAR_32(int16_t, avgBlock[8 * 8]);
> +    int32_t totalSum = 0;
> +    int16_t sum = 0;
> +    for (int i = 0; i < 8; i++)
> +        for (int j =0; j < 8; j++)
> +        {
> +            sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1]
> +                    + src[(2*i+1)*srcStride + 2*j] +
> src[(2*i+1)*srcStride + 2*j + 1];
> +            avgBlock[i*8 + j] = sum >> 2;
> +
> +            totalSum += sum;
> +        }
> +
> +    (*s_dct8x8)(avgBlock, coef, 8);
> +    memset(dst, 0, 256 * sizeof(int16_t));
> +    for (int i = 0; i < 8; i++)
> +    {
> +        memcpy(&dst[i * 16], &coef[i * 8], 8 * sizeof(int16_t));
> +    }
> +    dst[0] = static_cast<int16_t>(totalSum >> 1);
> +}
> +
> +static void lowPassDct32_c(const int16_t* src, int16_t* dst, intptr_t
> srcStride)
> +{
> +    ALIGN_VAR_32(int16_t, coef[16 * 16]);
> +    ALIGN_VAR_32(int16_t, avgBlock[16 * 16]);
> +    int32_t totalSum = 0;
> +    int16_t sum = 0;
> +    for (int i = 0; i < 16; i++)
> +        for (int j =0; j < 16; j++)
> +        {
> +            sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1]
> +                    + src[(2*i+1)*srcStride + 2*j] +
> src[(2*i+1)*srcStride + 2*j + 1];
> +            avgBlock[i*16 + j] = sum >> 2;
> +
> +            totalSum += sum;
> +        }
> +
> +    (*s_dct16x16)(avgBlock, coef, 16);
> +    memset(dst, 0, 1024 * sizeof(int16_t));
> +    for (int i = 0; i < 16; i++)
> +    {
> +        memcpy(&dst[i * 32], &coef[i * 16], 16 * sizeof(int16_t));
> +    }
> +    dst[0] = static_cast<int16_t>(totalSum >> 3);
> +}
> +
> +namespace X265_NS {
> +// x265 private namespace
> +
> +void setupLowPassPrimitives_c(EncoderPrimitives& p)
> +{
> +    s_dct4x4 = &(p.cu[BLOCK_4x4].standard_dct);
> +    s_dct8x8 = &(p.cu[BLOCK_8x8].standard_dct);
> +    s_dct16x16 = &(p.cu[BLOCK_16x16].standard_dct);
> +
> +    p.cu[BLOCK_8x8].lowpass_dct = lowPassDct8_c;
> +    p.cu[BLOCK_16x16].lowpass_dct = lowPassDct16_c;
> +    p.cu[BLOCK_32x32].lowpass_dct = lowPassDct32_c;
> +}
> +}
> diff -r 6a310b24c6a2 -r 893b36b82133 source/common/param.cpp
> --- a/source/common/param.cpp   Thu Nov 02 12:17:29 2017 +0530
> +++ b/source/common/param.cpp   Sat Oct 14 09:19:03 2017 -0700
> @@ -288,6 +288,9 @@
>      param->csvfpt = NULL;
>      param->forceFlush = 0;
>      param->bDisableLookahead = 0;
> +
> +    /* DCT Approximations */
> +    param->bLowPassDct = 0;
>  }
>
>  int x265_param_default_preset(x265_param* param, const char* preset,
> const char* tune)
> @@ -927,6 +930,7 @@
>      OPT("max-cll") bError |= sscanf(value, "%hu,%hu", &p->maxCLL,
> &p->maxFALL) != 2;
>      OPT("min-luma") p->minLuma = (uint16_t)atoi(value);
>      OPT("max-luma") p->maxLuma = (uint16_t)atoi(value);
> +    OPT("lowpass-dct") p->bLowPassDct = atobool(value);
>      OPT("uhd-bd") p->uhdBluray = atobool(value);
>      else
>          bExtraParams = true;
> @@ -1676,6 +1680,7 @@
>      s += sprintf(s, " refine-mv=%d", p->mvRefine);
>      BOOL(p->bLimitSAO, "limit-sao");
>      s += sprintf(s, " ctu-info=%d", p->bCTUInfo);
> +    BOOL(p->bLowPassDct, "lowpass-dct");
>  #undef BOOL
>      return buf;
>  }
> diff -r 6a310b24c6a2 -r 893b36b82133 source/common/primitives.cpp
> --- a/source/common/primitives.cpp      Thu Nov 02 12:17:29 2017 +0530
> +++ b/source/common/primitives.cpp      Sat Oct 14 09:19:03 2017 -0700
> @@ -58,11 +58,13 @@
>  void setupLoopFilterPrimitives_c(EncoderPrimitives &p);
>  void setupSaoPrimitives_c(EncoderPrimitives &p);
>  void setupSeaIntegralPrimitives_c(EncoderPrimitives &p);
> +void setupLowPassPrimitives_c(EncoderPrimitives& p);
>
>  void setupCPrimitives(EncoderPrimitives &p)
>  {
>      setupPixelPrimitives_c(p);      // pixel.cpp
>      setupDCTPrimitives_c(p);        // dct.cpp
> +    setupLowPassPrimitives_c(p);    // lowpassdct.cpp
>      setupFilterPrimitives_c(p);     // ipfilter.cpp
>      setupIntraPrimitives_c(p);      // intrapred.cpp
>      setupLoopFilterPrimitives_c(p); // loopfilter.cpp
> @@ -70,6 +72,19 @@
>      setupSeaIntegralPrimitives_c(p);  // framefilter.cpp
>  }
>
> +void enableLowpassDCTPrimitives(EncoderPrimitives &p)
> +{
> +    // update copies of the standard dct transform
> +    p.cu[BLOCK_4x4].standard_dct = p.cu[BLOCK_4x4].dct;
> +    p.cu[BLOCK_8x8].standard_dct = p.cu[BLOCK_8x8].dct;
> +    p.cu[BLOCK_16x16].standard_dct = p.cu[BLOCK_16x16].dct;
> +    p.cu[BLOCK_32x32].standard_dct = p.cu[BLOCK_32x32].dct;
> +
> +    // replace active dct by lowpass dct for high dct transforms
> +    p.cu[BLOCK_16x16].dct = p.cu[BLOCK_16x16].lowpass_dct;
> +    p.cu[BLOCK_32x32].dct = p.cu[BLOCK_32x32].lowpass_dct;
> +}
> +
>  void setupAliasPrimitives(EncoderPrimitives &p)
>  {
>  #if HIGH_BIT_DEPTH
> @@ -256,6 +271,11 @@
>  #endif
>
>          setupAliasPrimitives(primitives);
> +
> +        if (param->bLowPassDct && param->rc.qp > 20)
> +        {
> +            enableLowpassDCTPrimitives(primitives);
> +        }
>

Essentially this means that you enable lowpass-dct only when doing constant
QP encodes. You could consider relaxing this to enable the option for other
rate-control modes as well (ABR/CRF) and have some directives in your docs
as to when the feature is better to use. Hard-coding the limits in the code
isn't a great idea, IMO.

>      }
>
>      x265_report_simd(param);
> diff -r 6a310b24c6a2 -r 893b36b82133 source/common/primitives.h
> --- a/source/common/primitives.h        Thu Nov 02 12:17:29 2017 +0530
> +++ b/source/common/primitives.h        Sat Oct 14 09:19:03 2017 -0700
> @@ -259,8 +259,12 @@
>       * primitives will leave 64x64 pointers NULL.  Indexed by LumaCU */
>      struct CU
>      {
> -        dct_t           dct;
> -        idct_t          idct;
> +        dct_t           dct;    // active dct transformation
> +        idct_t          idct;   // active idct transformation
> +
> +        dct_t           standard_dct;   // original dct function, used by
> lowpass_dct
> +        dct_t           lowpass_dct;    // lowpass dct approximation
> +
>          calcresidual_t  calcresidual;
>          pixel_sub_ps_t  sub_ps;
>          pixel_add_ps_t  add_ps;
> diff -r 6a310b24c6a2 -r 893b36b82133 source/x265.h
> --- a/source/x265.h     Thu Nov 02 12:17:29 2017 +0530
> +++ b/source/x265.h     Sat Oct 14 09:19:03 2017 -0700
> @@ -1505,6 +1505,11 @@
>
>      /* Disable lookahead */
>      int       bDisableLookahead;
> +
> +    /* Use low-pass truncated dct approximation
> +    *  This DCT approximation is less computational intensive and gives
> results close to
> +    *  standard DCT for QP >= 23 */
> +    int       bLowPassDct;
>  } x265_param;
>
>  /* x265_param_alloc:
> diff -r 6a310b24c6a2 -r 893b36b82133 source/x265cli.h
> --- a/source/x265cli.h  Thu Nov 02 12:17:29 2017 +0530
> +++ b/source/x265cli.h  Sat Oct 14 09:19:03 2017 -0700
> @@ -282,6 +282,7 @@
>      { "force-flush",    required_argument, NULL, 0 },
>      { "splitrd-skip",         no_argument, NULL, 0 },
>      { "no-splitrd-skip",      no_argument, NULL, 0 },
> +    { "lowpass-dct",          no_argument, NULL, 0 },
>      { 0, 0, 0, 0 },
>      { 0, 0, 0, 0 },
>      { 0, 0, 0, 0 },
> @@ -543,6 +544,7 @@
>      H1("-r/--recon <filename>            Reconstructed raw image YUV or
> Y4M output file name\n");
>      H1("   --recon-depth <integer>       Bit-depth of reconstructed raw
> image file. Defaults to input bit depth, or 8 if Y4M\n");
>      H1("   --recon-y4m-exec <string>     pipe reconstructed frames to Y4M
> viewer, ex:\"ffplay -i pipe:0 -autoexit\"\n");
> +    H0("   --lowpass-dct                 Use low-pass subband dct
> approximation. Default %s\n", OPT(param->bLowPassDct));
>      H1("\nExecutable return codes:\n");
>      H1("    0 - encode successful\n");
>      H1("    1 - unable to parse command line\n");
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20171106/3f97ce6f/attachment-0001.html>