[x265] [PATCH] Implementation of low-pass subband dct approximation
Pradeep Ramachandran
pradeep at multicorewareinc.com
Mon Nov 6 05:54:28 CET 2017
On Fri, Nov 3, 2017 at 9:05 PM, <mont3z.claro5 at gmail.com> wrote:
> # HG changeset patch
> # User hribeiro
> # Date 1507997943 25200
> # Sat Oct 14 09:19:03 2017 -0700
> # Node ID 893b36b82133a2bc4d3cfd6aa3a18c544ce0bf94
> # Parent 6a310b24c6a2d831ef08bbda1bdcf9d929daa308
> Implementation of low-pass subband dct approximation.
>
Thanks for the contribution. I had to make one small fix in where the new
cli option is added in x265cli.h to avoid a compilation error of "if block
too deeply nested" in MSVC, but otherwise, it was good to go. I've pushed
this to default branch. Thanks for the contribution.
I have one comment below which I think will be worth addressing in a
subsequent patch.
>
> diff -r 6a310b24c6a2 -r 893b36b82133 doc/reST/cli.rst
> --- a/doc/reST/cli.rst Thu Nov 02 12:17:29 2017 +0530
> +++ b/doc/reST/cli.rst Sat Oct 14 09:19:03 2017 -0700
> @@ -2142,6 +2142,18 @@
>
> Only effective at RD levels 5 and 6
>
> +DCT Approximations
> +=================
> +
> +.. option:: --lowpass-dct
> +
> + If enabled, x265 will use low-pass truncated dct approximation
> instead of the
> + standard dct. This approximation is less computational intesive but
> it generates
> + truncated coefficient matrixes for the transformed block. Empirical
> analysis shows
> + this approximation gives good PSNR results for QP>=23.
> +
> + This approximation should be considered for platforms with
> performance and time
> + constrains.
>
> Debugging options
> =================
> diff -r 6a310b24c6a2 -r 893b36b82133 source/CMakeLists.txt
> --- a/source/CMakeLists.txt Thu Nov 02 12:17:29 2017 +0530
> +++ b/source/CMakeLists.txt Sat Oct 14 09:19:03 2017 -0700
> @@ -29,7 +29,7 @@
> option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
> mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
> # X265_BUILD must be incremented each time the public API is changed
> -set(X265_BUILD 136)
> +set(X265_BUILD 137)
> configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
> "${PROJECT_BINARY_DIR}/x265.def")
> configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
> diff -r 6a310b24c6a2 -r 893b36b82133 source/common/CMakeLists.txt
> --- a/source/common/CMakeLists.txt Thu Nov 02 12:17:29 2017 +0530
> +++ b/source/common/CMakeLists.txt Sat Oct 14 09:19:03 2017 -0700
> @@ -131,7 +131,7 @@
> add_library(common OBJECT
> ${ASM_PRIMITIVES} ${VEC_PRIMITIVES} ${ALTIVEC_PRIMITIVES} ${WINXP}
> primitives.cpp primitives.h
> - pixel.cpp dct.cpp ipfilter.cpp intrapred.cpp loopfilter.cpp
> + pixel.cpp dct.cpp lowpassdct.cpp ipfilter.cpp intrapred.cpp
> loopfilter.cpp
> constants.cpp constants.h
> cpu.cpp cpu.h version.cpp
> threading.cpp threading.h
> diff -r 6a310b24c6a2 -r 893b36b82133 source/common/lowpassdct.cpp
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/common/lowpassdct.cpp Sat Oct 14 09:19:03 2017 -0700
> @@ -0,0 +1,127 @@
> +/**********************************************************
> *******************
> + * Copyright (C) 2017
> + *
> + * Authors: Humberto Ribeiro Filho <mont3z.claro5 at gmail.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111,
> USA.
> + *
> + * This program is also available under a commercial proprietary license.
> + * For more information, contact us at license @ x265.com.
> + ************************************************************
> *****************/
> +
> +#include "common.h"
> +#include "primitives.h"
> +
> +using namespace X265_NS;
> +
> +/* standard dct transformations */
> +static dct_t* s_dct4x4;
> +static dct_t* s_dct8x8;
> +static dct_t* s_dct16x16;
> +
> +static void lowPassDct8_c(const int16_t* src, int16_t* dst, intptr_t
> srcStride)
> +{
> + ALIGN_VAR_32(int16_t, coef[4 * 4]);
> + ALIGN_VAR_32(int16_t, avgBlock[4 * 4]);
> + int16_t totalSum = 0;
> + int16_t sum = 0;
> +
> + for (int i = 0; i < 4; i++)
> + for (int j =0; j < 4; j++)
> + {
> + // Calculate average of 2x2 cells
> + sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1]
> + + src[(2*i+1)*srcStride + 2*j] +
> src[(2*i+1)*srcStride + 2*j + 1];
> + avgBlock[i*4 + j] = sum >> 2;
> +
> + totalSum += sum; // use to calculate total block average
> + }
> +
> + //dct4
> + (*s_dct4x4)(avgBlock, coef, 4);
> + memset(dst, 0, 64 * sizeof(int16_t));
> + for (int i = 0; i < 4; i++)
> + {
> + memcpy(&dst[i * 8], &coef[i * 4], 4 * sizeof(int16_t));
> + }
> +
> + // replace first coef with total block average
> + dst[0] = totalSum << 1;
> +}
> +
> +static void lowPassDct16_c(const int16_t* src, int16_t* dst, intptr_t
> srcStride)
> +{
> + ALIGN_VAR_32(int16_t, coef[8 * 8]);
> + ALIGN_VAR_32(int16_t, avgBlock[8 * 8]);
> + int32_t totalSum = 0;
> + int16_t sum = 0;
> + for (int i = 0; i < 8; i++)
> + for (int j =0; j < 8; j++)
> + {
> + sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1]
> + + src[(2*i+1)*srcStride + 2*j] +
> src[(2*i+1)*srcStride + 2*j + 1];
> + avgBlock[i*8 + j] = sum >> 2;
> +
> + totalSum += sum;
> + }
> +
> + (*s_dct8x8)(avgBlock, coef, 8);
> + memset(dst, 0, 256 * sizeof(int16_t));
> + for (int i = 0; i < 8; i++)
> + {
> + memcpy(&dst[i * 16], &coef[i * 8], 8 * sizeof(int16_t));
> + }
> + dst[0] = static_cast<int16_t>(totalSum >> 1);
> +}
> +
> +static void lowPassDct32_c(const int16_t* src, int16_t* dst, intptr_t
> srcStride)
> +{
> + ALIGN_VAR_32(int16_t, coef[16 * 16]);
> + ALIGN_VAR_32(int16_t, avgBlock[16 * 16]);
> + int32_t totalSum = 0;
> + int16_t sum = 0;
> + for (int i = 0; i < 16; i++)
> + for (int j =0; j < 16; j++)
> + {
> + sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1]
> + + src[(2*i+1)*srcStride + 2*j] +
> src[(2*i+1)*srcStride + 2*j + 1];
> + avgBlock[i*16 + j] = sum >> 2;
> +
> + totalSum += sum;
> + }
> +
> + (*s_dct16x16)(avgBlock, coef, 16);
> + memset(dst, 0, 1024 * sizeof(int16_t));
> + for (int i = 0; i < 16; i++)
> + {
> + memcpy(&dst[i * 32], &coef[i * 16], 16 * sizeof(int16_t));
> + }
> + dst[0] = static_cast<int16_t>(totalSum >> 3);
> +}
> +
> +namespace X265_NS {
> +// x265 private namespace
> +
> +void setupLowPassPrimitives_c(EncoderPrimitives& p)
> +{
> + s_dct4x4 = &(p.cu[BLOCK_4x4].standard_dct);
> + s_dct8x8 = &(p.cu[BLOCK_8x8].standard_dct);
> + s_dct16x16 = &(p.cu[BLOCK_16x16].standard_dct);
> +
> + p.cu[BLOCK_8x8].lowpass_dct = lowPassDct8_c;
> + p.cu[BLOCK_16x16].lowpass_dct = lowPassDct16_c;
> + p.cu[BLOCK_32x32].lowpass_dct = lowPassDct32_c;
> +}
> +}
> diff -r 6a310b24c6a2 -r 893b36b82133 source/common/param.cpp
> --- a/source/common/param.cpp Thu Nov 02 12:17:29 2017 +0530
> +++ b/source/common/param.cpp Sat Oct 14 09:19:03 2017 -0700
> @@ -288,6 +288,9 @@
> param->csvfpt = NULL;
> param->forceFlush = 0;
> param->bDisableLookahead = 0;
> +
> + /* DCT Approximations */
> + param->bLowPassDct = 0;
> }
>
> int x265_param_default_preset(x265_param* param, const char* preset,
> const char* tune)
> @@ -927,6 +930,7 @@
> OPT("max-cll") bError |= sscanf(value, "%hu,%hu", &p->maxCLL,
> &p->maxFALL) != 2;
> OPT("min-luma") p->minLuma = (uint16_t)atoi(value);
> OPT("max-luma") p->maxLuma = (uint16_t)atoi(value);
> + OPT("lowpass-dct") p->bLowPassDct = atobool(value);
> OPT("uhd-bd") p->uhdBluray = atobool(value);
> else
> bExtraParams = true;
> @@ -1676,6 +1680,7 @@
> s += sprintf(s, " refine-mv=%d", p->mvRefine);
> BOOL(p->bLimitSAO, "limit-sao");
> s += sprintf(s, " ctu-info=%d", p->bCTUInfo);
> + BOOL(p->bLowPassDct, "lowpass-dct");
> #undef BOOL
> return buf;
> }
> diff -r 6a310b24c6a2 -r 893b36b82133 source/common/primitives.cpp
> --- a/source/common/primitives.cpp Thu Nov 02 12:17:29 2017 +0530
> +++ b/source/common/primitives.cpp Sat Oct 14 09:19:03 2017 -0700
> @@ -58,11 +58,13 @@
> void setupLoopFilterPrimitives_c(EncoderPrimitives &p);
> void setupSaoPrimitives_c(EncoderPrimitives &p);
> void setupSeaIntegralPrimitives_c(EncoderPrimitives &p);
> +void setupLowPassPrimitives_c(EncoderPrimitives& p);
>
> void setupCPrimitives(EncoderPrimitives &p)
> {
> setupPixelPrimitives_c(p); // pixel.cpp
> setupDCTPrimitives_c(p); // dct.cpp
> + setupLowPassPrimitives_c(p); // lowpassdct.cpp
> setupFilterPrimitives_c(p); // ipfilter.cpp
> setupIntraPrimitives_c(p); // intrapred.cpp
> setupLoopFilterPrimitives_c(p); // loopfilter.cpp
> @@ -70,6 +72,19 @@
> setupSeaIntegralPrimitives_c(p); // framefilter.cpp
> }
>
> +void enableLowpassDCTPrimitives(EncoderPrimitives &p)
> +{
> + // update copies of the standard dct transform
> + p.cu[BLOCK_4x4].standard_dct = p.cu[BLOCK_4x4].dct;
> + p.cu[BLOCK_8x8].standard_dct = p.cu[BLOCK_8x8].dct;
> + p.cu[BLOCK_16x16].standard_dct = p.cu[BLOCK_16x16].dct;
> + p.cu[BLOCK_32x32].standard_dct = p.cu[BLOCK_32x32].dct;
> +
> + // replace active dct by lowpass dct for high dct transforms
> + p.cu[BLOCK_16x16].dct = p.cu[BLOCK_16x16].lowpass_dct;
> + p.cu[BLOCK_32x32].dct = p.cu[BLOCK_32x32].lowpass_dct;
> +}
> +
> void setupAliasPrimitives(EncoderPrimitives &p)
> {
> #if HIGH_BIT_DEPTH
> @@ -256,6 +271,11 @@
> #endif
>
> setupAliasPrimitives(primitives);
> +
> + if (param->bLowPassDct && param->rc.qp > 20)
> + {
> + enableLowpassDCTPrimitives(primitives);
> + }
>
Essentially this means that you enable lowpass-dct only when doing constant
QP encodes. You could consider relaxing this to enable the option for other
rate-control modes as well (ABR/CRF) and have some directives in your docs
as to when the feature is better to use. Hard-coding the limits in the code
isn't a great idea, IMO.
> }
>
> x265_report_simd(param);
> diff -r 6a310b24c6a2 -r 893b36b82133 source/common/primitives.h
> --- a/source/common/primitives.h Thu Nov 02 12:17:29 2017 +0530
> +++ b/source/common/primitives.h Sat Oct 14 09:19:03 2017 -0700
> @@ -259,8 +259,12 @@
> * primitives will leave 64x64 pointers NULL. Indexed by LumaCU */
> struct CU
> {
> - dct_t dct;
> - idct_t idct;
> + dct_t dct; // active dct transformation
> + idct_t idct; // active idct transformation
> +
> + dct_t standard_dct; // original dct function, used by
> lowpass_dct
> + dct_t lowpass_dct; // lowpass dct approximation
> +
> calcresidual_t calcresidual;
> pixel_sub_ps_t sub_ps;
> pixel_add_ps_t add_ps;
> diff -r 6a310b24c6a2 -r 893b36b82133 source/x265.h
> --- a/source/x265.h Thu Nov 02 12:17:29 2017 +0530
> +++ b/source/x265.h Sat Oct 14 09:19:03 2017 -0700
> @@ -1505,6 +1505,11 @@
>
> /* Disable lookahead */
> int bDisableLookahead;
> +
> + /* Use low-pass truncated dct approximation
> + * This DCT approximation is less computational intensive and gives
> results close to
> + * standard DCT for QP >= 23 */
> + int bLowPassDct;
> } x265_param;
>
> /* x265_param_alloc:
> diff -r 6a310b24c6a2 -r 893b36b82133 source/x265cli.h
> --- a/source/x265cli.h Thu Nov 02 12:17:29 2017 +0530
> +++ b/source/x265cli.h Sat Oct 14 09:19:03 2017 -0700
> @@ -282,6 +282,7 @@
> { "force-flush", required_argument, NULL, 0 },
> { "splitrd-skip", no_argument, NULL, 0 },
> { "no-splitrd-skip", no_argument, NULL, 0 },
> + { "lowpass-dct", no_argument, NULL, 0 },
> { 0, 0, 0, 0 },
> { 0, 0, 0, 0 },
> { 0, 0, 0, 0 },
> @@ -543,6 +544,7 @@
> H1("-r/--recon <filename> Reconstructed raw image YUV or
> Y4M output file name\n");
> H1(" --recon-depth <integer> Bit-depth of reconstructed raw
> image file. Defaults to input bit depth, or 8 if Y4M\n");
> H1(" --recon-y4m-exec <string> pipe reconstructed frames to Y4M
> viewer, ex:\"ffplay -i pipe:0 -autoexit\"\n");
> + H0(" --lowpass-dct Use low-pass subband dct
> approximation. Default %s\n", OPT(param->bLowPassDct));
> H1("\nExecutable return codes:\n");
> H1(" 0 - encode successful\n");
> H1(" 1 - unable to parse command line\n");
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20171106/3f97ce6f/attachment-0001.html>
More information about the x265-devel
mailing list