[x265] [PATCH] Implementation of low-pass subband dct approximation
mont3z.claro5 at gmail.com
mont3z.claro5 at gmail.com
Fri Nov 3 16:35:58 CET 2017
# HG changeset patch
# User hribeiro
# Date 1507997943 25200
# Sat Oct 14 09:19:03 2017 -0700
# Node ID 893b36b82133a2bc4d3cfd6aa3a18c544ce0bf94
# Parent 6a310b24c6a2d831ef08bbda1bdcf9d929daa308
Implementation of low-pass subband dct approximation.
diff -r 6a310b24c6a2 -r 893b36b82133 doc/reST/cli.rst
--- a/doc/reST/cli.rst Thu Nov 02 12:17:29 2017 +0530
+++ b/doc/reST/cli.rst Sat Oct 14 09:19:03 2017 -0700
@@ -2142,6 +2142,18 @@
Only effective at RD levels 5 and 6
+DCT Approximations
+=================
+
+.. option:: --lowpass-dct
+
+ If enabled, x265 will use low-pass truncated dct approximation instead of the
+ standard dct. This approximation is less computational intesive but it generates
+ truncated coefficient matrixes for the transformed block. Empirical analysis shows
+ this approximation gives good PSNR results for QP>=23.
+
+ This approximation should be considered for platforms with performance and time
+ constrains.
Debugging options
=================
diff -r 6a310b24c6a2 -r 893b36b82133 source/CMakeLists.txt
--- a/source/CMakeLists.txt Thu Nov 02 12:17:29 2017 +0530
+++ b/source/CMakeLists.txt Sat Oct 14 09:19:03 2017 -0700
@@ -29,7 +29,7 @@
option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
# X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 136)
+set(X265_BUILD 137)
configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
"${PROJECT_BINARY_DIR}/x265.def")
configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r 6a310b24c6a2 -r 893b36b82133 source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt Thu Nov 02 12:17:29 2017 +0530
+++ b/source/common/CMakeLists.txt Sat Oct 14 09:19:03 2017 -0700
@@ -131,7 +131,7 @@
add_library(common OBJECT
${ASM_PRIMITIVES} ${VEC_PRIMITIVES} ${ALTIVEC_PRIMITIVES} ${WINXP}
primitives.cpp primitives.h
- pixel.cpp dct.cpp ipfilter.cpp intrapred.cpp loopfilter.cpp
+ pixel.cpp dct.cpp lowpassdct.cpp ipfilter.cpp intrapred.cpp loopfilter.cpp
constants.cpp constants.h
cpu.cpp cpu.h version.cpp
threading.cpp threading.h
diff -r 6a310b24c6a2 -r 893b36b82133 source/common/lowpassdct.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/source/common/lowpassdct.cpp Sat Oct 14 09:19:03 2017 -0700
@@ -0,0 +1,127 @@
+/*****************************************************************************
+ * Copyright (C) 2017
+ *
+ * Authors: Humberto Ribeiro Filho <mont3z.claro5 at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#include "common.h"
+#include "primitives.h"
+
+using namespace X265_NS;
+
+/* standard dct transformations */
+static dct_t* s_dct4x4;
+static dct_t* s_dct8x8;
+static dct_t* s_dct16x16;
+
+static void lowPassDct8_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
+{
+ ALIGN_VAR_32(int16_t, coef[4 * 4]);
+ ALIGN_VAR_32(int16_t, avgBlock[4 * 4]);
+ int16_t totalSum = 0;
+ int16_t sum = 0;
+
+ for (int i = 0; i < 4; i++)
+ for (int j =0; j < 4; j++)
+ {
+ // Calculate average of 2x2 cells
+ sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1]
+ + src[(2*i+1)*srcStride + 2*j] + src[(2*i+1)*srcStride + 2*j + 1];
+ avgBlock[i*4 + j] = sum >> 2;
+
+ totalSum += sum; // use to calculate total block average
+ }
+
+ //dct4
+ (*s_dct4x4)(avgBlock, coef, 4);
+ memset(dst, 0, 64 * sizeof(int16_t));
+ for (int i = 0; i < 4; i++)
+ {
+ memcpy(&dst[i * 8], &coef[i * 4], 4 * sizeof(int16_t));
+ }
+
+ // replace first coef with total block average
+ dst[0] = totalSum << 1;
+}
+
+static void lowPassDct16_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
+{
+ ALIGN_VAR_32(int16_t, coef[8 * 8]);
+ ALIGN_VAR_32(int16_t, avgBlock[8 * 8]);
+ int32_t totalSum = 0;
+ int16_t sum = 0;
+ for (int i = 0; i < 8; i++)
+ for (int j =0; j < 8; j++)
+ {
+ sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1]
+ + src[(2*i+1)*srcStride + 2*j] + src[(2*i+1)*srcStride + 2*j + 1];
+ avgBlock[i*8 + j] = sum >> 2;
+
+ totalSum += sum;
+ }
+
+ (*s_dct8x8)(avgBlock, coef, 8);
+ memset(dst, 0, 256 * sizeof(int16_t));
+ for (int i = 0; i < 8; i++)
+ {
+ memcpy(&dst[i * 16], &coef[i * 8], 8 * sizeof(int16_t));
+ }
+ dst[0] = static_cast<int16_t>(totalSum >> 1);
+}
+
+static void lowPassDct32_c(const int16_t* src, int16_t* dst, intptr_t srcStride)
+{
+ ALIGN_VAR_32(int16_t, coef[16 * 16]);
+ ALIGN_VAR_32(int16_t, avgBlock[16 * 16]);
+ int32_t totalSum = 0;
+ int16_t sum = 0;
+ for (int i = 0; i < 16; i++)
+ for (int j =0; j < 16; j++)
+ {
+ sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1]
+ + src[(2*i+1)*srcStride + 2*j] + src[(2*i+1)*srcStride + 2*j + 1];
+ avgBlock[i*16 + j] = sum >> 2;
+
+ totalSum += sum;
+ }
+
+ (*s_dct16x16)(avgBlock, coef, 16);
+ memset(dst, 0, 1024 * sizeof(int16_t));
+ for (int i = 0; i < 16; i++)
+ {
+ memcpy(&dst[i * 32], &coef[i * 16], 16 * sizeof(int16_t));
+ }
+ dst[0] = static_cast<int16_t>(totalSum >> 3);
+}
+
+namespace X265_NS {
+// x265 private namespace
+
+void setupLowPassPrimitives_c(EncoderPrimitives& p)
+{
+ s_dct4x4 = &(p.cu[BLOCK_4x4].standard_dct);
+ s_dct8x8 = &(p.cu[BLOCK_8x8].standard_dct);
+ s_dct16x16 = &(p.cu[BLOCK_16x16].standard_dct);
+
+ p.cu[BLOCK_8x8].lowpass_dct = lowPassDct8_c;
+ p.cu[BLOCK_16x16].lowpass_dct = lowPassDct16_c;
+ p.cu[BLOCK_32x32].lowpass_dct = lowPassDct32_c;
+}
+}
diff -r 6a310b24c6a2 -r 893b36b82133 source/common/param.cpp
--- a/source/common/param.cpp Thu Nov 02 12:17:29 2017 +0530
+++ b/source/common/param.cpp Sat Oct 14 09:19:03 2017 -0700
@@ -288,6 +288,9 @@
param->csvfpt = NULL;
param->forceFlush = 0;
param->bDisableLookahead = 0;
+
+ /* DCT Approximations */
+ param->bLowPassDct = 0;
}
int x265_param_default_preset(x265_param* param, const char* preset, const char* tune)
@@ -927,6 +930,7 @@
OPT("max-cll") bError |= sscanf(value, "%hu,%hu", &p->maxCLL, &p->maxFALL) != 2;
OPT("min-luma") p->minLuma = (uint16_t)atoi(value);
OPT("max-luma") p->maxLuma = (uint16_t)atoi(value);
+ OPT("lowpass-dct") p->bLowPassDct = atobool(value);
OPT("uhd-bd") p->uhdBluray = atobool(value);
else
bExtraParams = true;
@@ -1676,6 +1680,7 @@
s += sprintf(s, " refine-mv=%d", p->mvRefine);
BOOL(p->bLimitSAO, "limit-sao");
s += sprintf(s, " ctu-info=%d", p->bCTUInfo);
+ BOOL(p->bLowPassDct, "lowpass-dct");
#undef BOOL
return buf;
}
diff -r 6a310b24c6a2 -r 893b36b82133 source/common/primitives.cpp
--- a/source/common/primitives.cpp Thu Nov 02 12:17:29 2017 +0530
+++ b/source/common/primitives.cpp Sat Oct 14 09:19:03 2017 -0700
@@ -58,11 +58,13 @@
void setupLoopFilterPrimitives_c(EncoderPrimitives &p);
void setupSaoPrimitives_c(EncoderPrimitives &p);
void setupSeaIntegralPrimitives_c(EncoderPrimitives &p);
+void setupLowPassPrimitives_c(EncoderPrimitives& p);
void setupCPrimitives(EncoderPrimitives &p)
{
setupPixelPrimitives_c(p); // pixel.cpp
setupDCTPrimitives_c(p); // dct.cpp
+ setupLowPassPrimitives_c(p); // lowpassdct.cpp
setupFilterPrimitives_c(p); // ipfilter.cpp
setupIntraPrimitives_c(p); // intrapred.cpp
setupLoopFilterPrimitives_c(p); // loopfilter.cpp
@@ -70,6 +72,19 @@
setupSeaIntegralPrimitives_c(p); // framefilter.cpp
}
+void enableLowpassDCTPrimitives(EncoderPrimitives &p)
+{
+ // update copies of the standard dct transform
+ p.cu[BLOCK_4x4].standard_dct = p.cu[BLOCK_4x4].dct;
+ p.cu[BLOCK_8x8].standard_dct = p.cu[BLOCK_8x8].dct;
+ p.cu[BLOCK_16x16].standard_dct = p.cu[BLOCK_16x16].dct;
+ p.cu[BLOCK_32x32].standard_dct = p.cu[BLOCK_32x32].dct;
+
+ // replace active dct by lowpass dct for high dct transforms
+ p.cu[BLOCK_16x16].dct = p.cu[BLOCK_16x16].lowpass_dct;
+ p.cu[BLOCK_32x32].dct = p.cu[BLOCK_32x32].lowpass_dct;
+}
+
void setupAliasPrimitives(EncoderPrimitives &p)
{
#if HIGH_BIT_DEPTH
@@ -256,6 +271,11 @@
#endif
setupAliasPrimitives(primitives);
+
+ if (param->bLowPassDct && param->rc.qp > 20)
+ {
+ enableLowpassDCTPrimitives(primitives);
+ }
}
x265_report_simd(param);
diff -r 6a310b24c6a2 -r 893b36b82133 source/common/primitives.h
--- a/source/common/primitives.h Thu Nov 02 12:17:29 2017 +0530
+++ b/source/common/primitives.h Sat Oct 14 09:19:03 2017 -0700
@@ -259,8 +259,12 @@
* primitives will leave 64x64 pointers NULL. Indexed by LumaCU */
struct CU
{
- dct_t dct;
- idct_t idct;
+ dct_t dct; // active dct transformation
+ idct_t idct; // active idct transformation
+
+ dct_t standard_dct; // original dct function, used by lowpass_dct
+ dct_t lowpass_dct; // lowpass dct approximation
+
calcresidual_t calcresidual;
pixel_sub_ps_t sub_ps;
pixel_add_ps_t add_ps;
diff -r 6a310b24c6a2 -r 893b36b82133 source/x265.h
--- a/source/x265.h Thu Nov 02 12:17:29 2017 +0530
+++ b/source/x265.h Sat Oct 14 09:19:03 2017 -0700
@@ -1505,6 +1505,11 @@
/* Disable lookahead */
int bDisableLookahead;
+
+ /* Use low-pass truncated dct approximation
+ * This DCT approximation is less computational intensive and gives results close to
+ * standard DCT for QP >= 23 */
+ int bLowPassDct;
} x265_param;
/* x265_param_alloc:
diff -r 6a310b24c6a2 -r 893b36b82133 source/x265cli.h
--- a/source/x265cli.h Thu Nov 02 12:17:29 2017 +0530
+++ b/source/x265cli.h Sat Oct 14 09:19:03 2017 -0700
@@ -282,6 +282,7 @@
{ "force-flush", required_argument, NULL, 0 },
{ "splitrd-skip", no_argument, NULL, 0 },
{ "no-splitrd-skip", no_argument, NULL, 0 },
+ { "lowpass-dct", no_argument, NULL, 0 },
{ 0, 0, 0, 0 },
{ 0, 0, 0, 0 },
{ 0, 0, 0, 0 },
@@ -543,6 +544,7 @@
H1("-r/--recon <filename> Reconstructed raw image YUV or Y4M output file name\n");
H1(" --recon-depth <integer> Bit-depth of reconstructed raw image file. Defaults to input bit depth, or 8 if Y4M\n");
H1(" --recon-y4m-exec <string> pipe reconstructed frames to Y4M viewer, ex:\"ffplay -i pipe:0 -autoexit\"\n");
+ H0(" --lowpass-dct Use low-pass subband dct approximation. Default %s\n", OPT(param->bLowPassDct));
H1("\nExecutable return codes:\n");
H1(" 0 - encode successful\n");
H1(" 1 - unable to parse command line\n");
-------------- next part --------------
A non-text attachment was scrubbed...
Name: videolan.patch
Type: text/x-patch
Size: 11822 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20171103/d501f5a7/attachment-0001.bin>
More information about the x265-devel
mailing list