<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Fri, Nov 3, 2017 at 9:05 PM, <span dir="ltr"><<a href="mailto:mont3z.claro5@gmail.com" target="_blank">mont3z.claro5@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5"># HG changeset patch<br>
# User hribeiro<br>
# Date 1507997943 25200<br>
# Sat Oct 14 09:19:03 2017 -0700<br>
# Node ID 893b36b82133a2bc4d3cfd6aa3a18c<wbr>544ce0bf94<br>
# Parent 6a310b24c6a2d831ef08bbda1bdcf9<wbr>d929daa308<br>
Implementation of low-pass subband dct approximation.<br></div></div></blockquote><div><br></div><div>Thanks for the contribution. I had to make one small fix in where the new cli option is added in x265cli.h to avoid a compilation error of "if block too deeply nested" in MSVC, but otherwise, it was good to go. I've pushed this to default branch. Thanks for the contribution.</div><div>I have one comment below which I think will be worth addressing in a subsequent patch.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5">
<br>
diff -r 6a310b24c6a2 -r 893b36b82133 doc/reST/cli.rst<br>
--- a/doc/reST/cli.rst Thu Nov 02 12:17:29 2017 +0530<br>
+++ b/doc/reST/cli.rst Sat Oct 14 09:19:03 2017 -0700<br>
@@ -2142,6 +2142,18 @@<br>
<br>
Only effective at RD levels 5 and 6<br>
<br>
+DCT Approximations<br>
+=================<br>
+<br>
+.. option:: --lowpass-dct<br>
+<br>
+ If enabled, x265 will use low-pass truncated dct approximation instead of the<br>
+ standard dct. This approximation is less computational intesive but it generates<br>
+ truncated coefficient matrixes for the transformed block. Empirical analysis shows<br>
+ this approximation gives good PSNR results for QP>=23.<br>
+<br>
+ This approximation should be considered for platforms with performance and time<br>
+ constrains.<br>
<br>
Debugging options<br>
=================<br>
diff -r 6a310b24c6a2 -r 893b36b82133 source/CMakeLists.txt<br>
--- a/source/CMakeLists.txt Thu Nov 02 12:17:29 2017 +0530<br>
+++ b/source/CMakeLists.txt Sat Oct 14 09:19:03 2017 -0700<br>
@@ -29,7 +29,7 @@<br>
option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)<br>
mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)<br>
# X265_BUILD must be incremented each time the public API is changed<br>
-set(X265_BUILD 136)<br>
+set(X265_BUILD 137)<br>
configure_file("${PROJECT_<wbr>SOURCE_DIR}/<a href="http://x265.def.in" rel="noreferrer" target="_blank">x265.def.in</a>"<br>
"${PROJECT_BINARY_DIR}/x265.<wbr>def")<br>
configure_file("${PROJECT_<wbr>SOURCE_DIR}/<a href="http://x265_config.h.in" rel="noreferrer" target="_blank">x265_config.h.in</a>"<br>
diff -r 6a310b24c6a2 -r 893b36b82133 source/common/CMakeLists.txt<br>
--- a/source/common/CMakeLists.txt Thu Nov 02 12:17:29 2017 +0530<br>
+++ b/source/common/CMakeLists.txt Sat Oct 14 09:19:03 2017 -0700<br>
@@ -131,7 +131,7 @@<br>
add_library(common OBJECT<br>
${ASM_PRIMITIVES} ${VEC_PRIMITIVES} ${ALTIVEC_PRIMITIVES} ${WINXP}<br>
primitives.cpp primitives.h<br>
- pixel.cpp dct.cpp ipfilter.cpp intrapred.cpp loopfilter.cpp<br>
+ pixel.cpp dct.cpp lowpassdct.cpp ipfilter.cpp intrapred.cpp loopfilter.cpp<br>
constants.cpp constants.h<br>
cpu.cpp cpu.h version.cpp<br>
threading.cpp threading.h<br>
diff -r 6a310b24c6a2 -r 893b36b82133 source/common/lowpassdct.cpp<br>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000<br>
+++ b/source/common/lowpassdct.cpp Sat Oct 14 09:19:03 2017 -0700<br>
@@ -0,0 +1,127 @@<br>
+/****************************<wbr>******************************<wbr>*******************<br>
+ * Copyright (C) 2017<br>
+ *<br>
+ * Authors: Humberto Ribeiro Filho <<a href="mailto:mont3z.claro5@gmail.com">mont3z.claro5@gmail.com</a>><br>
+ *<br>
+ * This program is free software; you can redistribute it and/or modify<br>
+ * it under the terms of the GNU General Public License as published by<br>
+ * the Free Software Foundation; either version 2 of the License, or<br>
+ * (at your option) any later version.<br>
+ *<br>
+ * This program is distributed in the hope that it will be useful,<br>
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of<br>
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the<br>
+ * GNU General Public License for more details.<br>
+ *<br>
+ * You should have received a copy of the GNU General Public License<br>
+ * along with this program; if not, write to the Free Software<br>
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.<br>
+ *<br>
+ * This program is also available under a commercial proprietary license.<br>
+ * For more information, contact us at license @ <a href="http://x265.com" rel="noreferrer" target="_blank">x265.com</a>.<br>
+ ******************************<wbr>******************************<wbr>*****************/<br>
+<br>
+#include "common.h"<br>
+#include "primitives.h"<br>
+<br>
+using namespace X265_NS;<br>
+<br>
+/* standard dct transformations */<br>
+static dct_t* s_dct4x4;<br>
+static dct_t* s_dct8x8;<br>
+static dct_t* s_dct16x16;<br>
+<br>
+static void lowPassDct8_c(const int16_t* src, int16_t* dst, intptr_t srcStride)<br>
+{<br>
+ ALIGN_VAR_32(int16_t, coef[4 * 4]);<br>
+ ALIGN_VAR_32(int16_t, avgBlock[4 * 4]);<br>
+ int16_t totalSum = 0;<br>
+ int16_t sum = 0;<br>
+<br>
+ for (int i = 0; i < 4; i++)<br>
+ for (int j =0; j < 4; j++)<br>
+ {<br>
+ // Calculate average of 2x2 cells<br>
+ sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1]<br>
+ + src[(2*i+1)*srcStride + 2*j] + src[(2*i+1)*srcStride + 2*j + 1];<br>
+ avgBlock[i*4 + j] = sum >> 2;<br>
+<br>
+ totalSum += sum; // use to calculate total block average<br>
+ }<br>
+<br>
+ //dct4<br>
+ (*s_dct4x4)(avgBlock, coef, 4);<br>
+ memset(dst, 0, 64 * sizeof(int16_t));<br>
+ for (int i = 0; i < 4; i++)<br>
+ {<br>
+ memcpy(&dst[i * 8], &coef[i * 4], 4 * sizeof(int16_t));<br>
+ }<br>
+<br>
+ // replace first coef with total block average<br>
+ dst[0] = totalSum << 1;<br>
+}<br>
+<br>
+static void lowPassDct16_c(const int16_t* src, int16_t* dst, intptr_t srcStride)<br>
+{<br>
+ ALIGN_VAR_32(int16_t, coef[8 * 8]);<br>
+ ALIGN_VAR_32(int16_t, avgBlock[8 * 8]);<br>
+ int32_t totalSum = 0;<br>
+ int16_t sum = 0;<br>
+ for (int i = 0; i < 8; i++)<br>
+ for (int j =0; j < 8; j++)<br>
+ {<br>
+ sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1]<br>
+ + src[(2*i+1)*srcStride + 2*j] + src[(2*i+1)*srcStride + 2*j + 1];<br>
+ avgBlock[i*8 + j] = sum >> 2;<br>
+<br>
+ totalSum += sum;<br>
+ }<br>
+<br>
+ (*s_dct8x8)(avgBlock, coef, 8);<br>
+ memset(dst, 0, 256 * sizeof(int16_t));<br>
+ for (int i = 0; i < 8; i++)<br>
+ {<br>
+ memcpy(&dst[i * 16], &coef[i * 8], 8 * sizeof(int16_t));<br>
+ }<br>
+ dst[0] = static_cast<int16_t>(totalSum >> 1);<br>
+}<br>
+<br>
+static void lowPassDct32_c(const int16_t* src, int16_t* dst, intptr_t srcStride)<br>
+{<br>
+ ALIGN_VAR_32(int16_t, coef[16 * 16]);<br>
+ ALIGN_VAR_32(int16_t, avgBlock[16 * 16]);<br>
+ int32_t totalSum = 0;<br>
+ int16_t sum = 0;<br>
+ for (int i = 0; i < 16; i++)<br>
+ for (int j =0; j < 16; j++)<br>
+ {<br>
+ sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1]<br>
+ + src[(2*i+1)*srcStride + 2*j] + src[(2*i+1)*srcStride + 2*j + 1];<br>
+ avgBlock[i*16 + j] = sum >> 2;<br>
+<br>
+ totalSum += sum;<br>
+ }<br>
+<br>
+ (*s_dct16x16)(avgBlock, coef, 16);<br>
+ memset(dst, 0, 1024 * sizeof(int16_t));<br>
+ for (int i = 0; i < 16; i++)<br>
+ {<br>
+ memcpy(&dst[i * 32], &coef[i * 16], 16 * sizeof(int16_t));<br>
+ }<br>
+ dst[0] = static_cast<int16_t>(totalSum >> 3);<br>
+}<br>
+<br>
+namespace X265_NS {<br>
+// x265 private namespace<br>
+<br>
+void setupLowPassPrimitives_c(<wbr>EncoderPrimitives& p)<br>
+{<br>
+ s_dct4x4 = &(<a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_4x4].standard_<wbr>dct);<br>
+ s_dct8x8 = &(<a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_8x8].standard_<wbr>dct);<br>
+ s_dct16x16 = &(<a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_16x16].standard_<wbr>dct);<br>
+<br>
+ <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_8x8].lowpass_dct = lowPassDct8_c;<br>
+ <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_16x16].lowpass_dct = lowPassDct16_c;<br>
+ <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_32x32].lowpass_dct = lowPassDct32_c;<br>
+}<br>
+}<br>
diff -r 6a310b24c6a2 -r 893b36b82133 source/common/param.cpp<br>
--- a/source/common/param.cpp Thu Nov 02 12:17:29 2017 +0530<br>
+++ b/source/common/param.cpp Sat Oct 14 09:19:03 2017 -0700<br>
@@ -288,6 +288,9 @@<br>
param->csvfpt = NULL;<br>
param->forceFlush = 0;<br>
param->bDisableLookahead = 0;<br>
+<br>
+ /* DCT Approximations */<br>
+ param->bLowPassDct = 0;<br>
}<br>
<br>
int x265_param_default_preset(<wbr>x265_param* param, const char* preset, const char* tune)<br>
@@ -927,6 +930,7 @@<br>
OPT("max-cll") bError |= sscanf(value, "%hu,%hu", &p->maxCLL, &p->maxFALL) != 2;<br>
OPT("min-luma") p->minLuma = (uint16_t)atoi(value);<br>
OPT("max-luma") p->maxLuma = (uint16_t)atoi(value);<br>
+ OPT("lowpass-dct") p->bLowPassDct = atobool(value);<br>
OPT("uhd-bd") p->uhdBluray = atobool(value);<br>
else<br>
bExtraParams = true;<br>
@@ -1676,6 +1680,7 @@<br>
s += sprintf(s, " refine-mv=%d", p->mvRefine);<br>
BOOL(p->bLimitSAO, "limit-sao");<br>
s += sprintf(s, " ctu-info=%d", p->bCTUInfo);<br>
+ BOOL(p->bLowPassDct, "lowpass-dct");<br>
#undef BOOL<br>
return buf;<br>
}<br>
diff -r 6a310b24c6a2 -r 893b36b82133 source/common/primitives.cpp<br>
--- a/source/common/primitives.cpp Thu Nov 02 12:17:29 2017 +0530<br>
+++ b/source/common/primitives.cpp Sat Oct 14 09:19:03 2017 -0700<br>
@@ -58,11 +58,13 @@<br>
void setupLoopFilterPrimitives_c(<wbr>EncoderPrimitives &p);<br>
void setupSaoPrimitives_c(<wbr>EncoderPrimitives &p);<br>
void setupSeaIntegralPrimitives_c(<wbr>EncoderPrimitives &p);<br>
+void setupLowPassPrimitives_c(<wbr>EncoderPrimitives& p);<br>
<br>
void setupCPrimitives(<wbr>EncoderPrimitives &p)<br>
{<br>
setupPixelPrimitives_c(p); // pixel.cpp<br>
setupDCTPrimitives_c(p); // dct.cpp<br>
+ setupLowPassPrimitives_c(p); // lowpassdct.cpp<br>
setupFilterPrimitives_c(p); // ipfilter.cpp<br>
setupIntraPrimitives_c(p); // intrapred.cpp<br>
setupLoopFilterPrimitives_c(p)<wbr>; // loopfilter.cpp<br>
@@ -70,6 +72,19 @@<br>
setupSeaIntegralPrimitives_c(<wbr>p); // framefilter.cpp<br>
}<br>
<br>
+void enableLowpassDCTPrimitives(<wbr>EncoderPrimitives &p)<br>
+{<br>
+ // update copies of the standard dct transform<br>
+ <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_4x4].standard_dct = <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_4x4].dct;<br>
+ <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_8x8].standard_dct = <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_8x8].dct;<br>
+ <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_16x16].standard_dct = <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_16x16].dct;<br>
+ <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_32x32].standard_dct = <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_32x32].dct;<br>
+<br>
+ // replace active dct by lowpass dct for high dct transforms<br>
+ <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_16x16].dct = <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_16x16].lowpass_dct;<br>
+ <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_32x32].dct = <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_32x32].lowpass_dct;<br>
+}<br>
+<br>
void setupAliasPrimitives(<wbr>EncoderPrimitives &p)<br>
{<br>
#if HIGH_BIT_DEPTH<br>
@@ -256,6 +271,11 @@<br>
#endif<br>
<br>
setupAliasPrimitives(<wbr>primitives);<br>
+<br>
+ if (param->bLowPassDct && param->rc.qp > 20)<br>
+ {<br>
+ enableLowpassDCTPrimitives(<wbr>primitives);<br>
+ }<br></div></div></blockquote><div><br></div><div>Essentially this means that you enable lowpass-dct only when doing constant QP encodes. You could consider relaxing this to enable the option for other rate-control modes as well (ABR/CRF) and have some directives in your docs as to when the feature is better to use. Hard-coding the limits in the code isn't a great idea, IMO.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5">
}<br>
<br>
x265_report_simd(param);<br>
diff -r 6a310b24c6a2 -r 893b36b82133 source/common/primitives.h<br>
--- a/source/common/primitives.h Thu Nov 02 12:17:29 2017 +0530<br>
+++ b/source/common/primitives.h Sat Oct 14 09:19:03 2017 -0700<br>
@@ -259,8 +259,12 @@<br>
* primitives will leave 64x64 pointers NULL. Indexed by LumaCU */<br>
struct CU<br>
{<br>
- dct_t dct;<br>
- idct_t idct;<br>
+ dct_t dct; // active dct transformation<br>
+ idct_t idct; // active idct transformation<br>
+<br>
+ dct_t standard_dct; // original dct function, used by lowpass_dct<br>
+ dct_t lowpass_dct; // lowpass dct approximation<br>
+<br>
calcresidual_t calcresidual;<br>
pixel_sub_ps_t sub_ps;<br>
pixel_add_ps_t add_ps;<br>
diff -r 6a310b24c6a2 -r 893b36b82133 source/x265.h<br>
--- a/source/x265.h Thu Nov 02 12:17:29 2017 +0530<br>
+++ b/source/x265.h Sat Oct 14 09:19:03 2017 -0700<br>
@@ -1505,6 +1505,11 @@<br>
<br>
/* Disable lookahead */<br>
int bDisableLookahead;<br>
+<br>
+ /* Use low-pass truncated dct approximation<br>
+ * This DCT approximation is less computational intensive and gives results close to<br>
+ * standard DCT for QP >= 23 */<br>
+ int bLowPassDct;<br>
} x265_param;<br>
<br>
/* x265_param_alloc:<br>
diff -r 6a310b24c6a2 -r 893b36b82133 source/x265cli.h<br>
--- a/source/x265cli.h Thu Nov 02 12:17:29 2017 +0530<br>
+++ b/source/x265cli.h Sat Oct 14 09:19:03 2017 -0700<br>
@@ -282,6 +282,7 @@<br>
{ "force-flush", required_argument, NULL, 0 },<br>
{ "splitrd-skip", no_argument, NULL, 0 },<br>
{ "no-splitrd-skip", no_argument, NULL, 0 },<br>
+ { "lowpass-dct", no_argument, NULL, 0 },<br>
{ 0, 0, 0, 0 },<br>
{ 0, 0, 0, 0 },<br>
{ 0, 0, 0, 0 },<br>
@@ -543,6 +544,7 @@<br>
H1("-r/--recon <filename> Reconstructed raw image YUV or Y4M output file name\n");<br>
H1(" --recon-depth <integer> Bit-depth of reconstructed raw image file. Defaults to input bit depth, or 8 if Y4M\n");<br>
H1(" --recon-y4m-exec <string> pipe reconstructed frames to Y4M viewer, ex:\"ffplay -i pipe:0 -autoexit\"\n");<br>
+ H0(" --lowpass-dct Use low-pass subband dct approximation. Default %s\n", OPT(param->bLowPassDct));<br>
H1("\nExecutable return codes:\n");<br>
H1(" 0 - encode successful\n");<br>
H1(" 1 - unable to parse command line\n");<br>
</div></div><br>______________________________<wbr>_________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/<wbr>listinfo/x265-devel</a><br>
<br></blockquote></div><br></div></div>