<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Fri, Nov 3, 2017 at 9:05 PM,  <span dir="ltr"><<a href="mailto:mont3z.claro5@gmail.com" target="_blank">mont3z.claro5@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5"># HG changeset patch<br>

# User hribeiro<br>

# Date 1507997943 25200<br>

#      Sat Oct 14 09:19:03 2017 -0700<br>

# Node ID 893b36b82133a2bc4d3cfd6aa3a18c<wbr>544ce0bf94<br>

# Parent  6a310b24c6a2d831ef08bbda1bdcf9<wbr>d929daa308<br>

Implementation of low-pass subband dct approximation.<br></div></div></blockquote><div><br></div><div>Thanks for the contribution. I had to make one small fix in where the new cli option is added in x265cli.h to avoid a compilation error of "if block too deeply nested" in MSVC, but otherwise, it was good to go. I've pushed this to default branch. Thanks for the contribution.</div><div>I have one comment below which I think will be worth addressing in a subsequent patch.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5">

<br>

diff -r 6a310b24c6a2 -r 893b36b82133 doc/reST/cli.rst<br>

--- a/doc/reST/cli.rst  Thu Nov 02 12:17:29 2017 +0530<br>

+++ b/doc/reST/cli.rst  Sat Oct 14 09:19:03 2017 -0700<br>

@@ -2142,6 +2142,18 @@<br>

<br>

        Only effective at RD levels 5 and 6<br>

<br>

+DCT Approximations<br>

+=================<br>

+<br>

+.. option:: --lowpass-dct<br>

+<br>

+    If enabled, x265 will use low-pass truncated dct approximation instead of the<br>

+    standard dct. This approximation is less computational intesive but it generates<br>

+    truncated coefficient matrixes for the transformed block. Empirical analysis shows<br>

+    this approximation gives good PSNR results for QP>=23.<br>

+<br>

+    This approximation should be considered for platforms with performance and time<br>

+    constrains.<br>

<br>

 Debugging options<br>

 =================<br>

diff -r 6a310b24c6a2 -r 893b36b82133 source/CMakeLists.txt<br>

--- a/source/CMakeLists.txt     Thu Nov 02 12:17:29 2017 +0530<br>

+++ b/source/CMakeLists.txt     Sat Oct 14 09:19:03 2017 -0700<br>

@@ -29,7 +29,7 @@<br>

 option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)<br>

 mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)<br>

 # X265_BUILD must be incremented each time the public API is changed<br>

-set(X265_BUILD 136)<br>

+set(X265_BUILD 137)<br>

 configure_file("${PROJECT_<wbr>SOURCE_DIR}/<a href="http://x265.def.in" rel="noreferrer" target="_blank">x265.def.in</a>"<br>

                "${PROJECT_BINARY_DIR}/x265.<wbr>def")<br>

 configure_file("${PROJECT_<wbr>SOURCE_DIR}/<a href="http://x265_config.h.in" rel="noreferrer" target="_blank">x265_config.h.in</a>"<br>

diff -r 6a310b24c6a2 -r 893b36b82133 source/common/CMakeLists.txt<br>

--- a/source/common/CMakeLists.txt      Thu Nov 02 12:17:29 2017 +0530<br>

+++ b/source/common/CMakeLists.txt      Sat Oct 14 09:19:03 2017 -0700<br>

@@ -131,7 +131,7 @@<br>

 add_library(common OBJECT<br>

     ${ASM_PRIMITIVES} ${VEC_PRIMITIVES} ${ALTIVEC_PRIMITIVES} ${WINXP}<br>

     primitives.cpp primitives.h<br>

-    pixel.cpp dct.cpp ipfilter.cpp intrapred.cpp loopfilter.cpp<br>

+    pixel.cpp dct.cpp lowpassdct.cpp ipfilter.cpp intrapred.cpp loopfilter.cpp<br>

     constants.cpp constants.h<br>

     cpu.cpp cpu.h version.cpp<br>

     threading.cpp threading.h<br>

diff -r 6a310b24c6a2 -r 893b36b82133 source/common/lowpassdct.cpp<br>

--- /dev/null   Thu Jan 01 00:00:00 1970 +0000<br>

+++ b/source/common/lowpassdct.cpp      Sat Oct 14 09:19:03 2017 -0700<br>

@@ -0,0 +1,127 @@<br>

+/****************************<wbr>******************************<wbr>*******************<br>

+ * Copyright (C) 2017<br>

+ *<br>

+ * Authors: Humberto Ribeiro Filho <<a href="mailto:mont3z.claro5@gmail.com">mont3z.claro5@gmail.com</a>><br>

+ *<br>

+ * This program is free software; you can redistribute it and/or modify<br>

+ * it under the terms of the GNU General Public License as published by<br>

+ * the Free Software Foundation; either version 2 of the License, or<br>

+ * (at your option) any later version.<br>

+ *<br>

+ * This program is distributed in the hope that it will be useful,<br>

+ * but WITHOUT ANY WARRANTY; without even the implied warranty of<br>

+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the<br>

+ * GNU General Public License for more details.<br>

+ *<br>

+ * You should have received a copy of the GNU General Public License<br>

+ * along with this program; if not, write to the Free Software<br>

+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.<br>

+ *<br>

+ * This program is also available under a commercial proprietary license.<br>

+ * For more information, contact us at license @ <a href="http://x265.com" rel="noreferrer" target="_blank">x265.com</a>.<br>

+ ******************************<wbr>******************************<wbr>*****************/<br>

+<br>

+#include "common.h"<br>

+#include "primitives.h"<br>

+<br>

+using namespace X265_NS;<br>

+<br>

+/* standard dct transformations */<br>

+static dct_t* s_dct4x4;<br>

+static dct_t* s_dct8x8;<br>

+static dct_t* s_dct16x16;<br>

+<br>

+static void lowPassDct8_c(const int16_t* src, int16_t* dst, intptr_t srcStride)<br>

+{<br>

+    ALIGN_VAR_32(int16_t, coef[4 * 4]);<br>

+    ALIGN_VAR_32(int16_t, avgBlock[4 * 4]);<br>

+    int16_t totalSum = 0;<br>

+    int16_t sum = 0;<br>

+<br>

+    for (int i = 0; i < 4; i++)<br>

+        for (int j =0; j < 4; j++)<br>

+        {<br>

+            // Calculate average of 2x2 cells<br>

+            sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1]<br>

+                    + src[(2*i+1)*srcStride + 2*j] + src[(2*i+1)*srcStride + 2*j + 1];<br>

+            avgBlock[i*4 + j] = sum >> 2;<br>

+<br>

+            totalSum += sum; // use to calculate total block average<br>

+        }<br>

+<br>

+    //dct4<br>

+    (*s_dct4x4)(avgBlock, coef, 4);<br>

+    memset(dst, 0, 64 * sizeof(int16_t));<br>

+    for (int i = 0; i < 4; i++)<br>

+    {<br>

+        memcpy(&dst[i * 8], &coef[i * 4], 4 * sizeof(int16_t));<br>

+    }<br>

+<br>

+    // replace first coef with total block average<br>

+    dst[0] = totalSum << 1;<br>

+}<br>

+<br>

+static void lowPassDct16_c(const int16_t* src, int16_t* dst, intptr_t srcStride)<br>

+{<br>

+    ALIGN_VAR_32(int16_t, coef[8 * 8]);<br>

+    ALIGN_VAR_32(int16_t, avgBlock[8 * 8]);<br>

+    int32_t totalSum = 0;<br>

+    int16_t sum = 0;<br>

+    for (int i = 0; i < 8; i++)<br>

+        for (int j =0; j < 8; j++)<br>

+        {<br>

+            sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1]<br>

+                    + src[(2*i+1)*srcStride + 2*j] + src[(2*i+1)*srcStride + 2*j + 1];<br>

+            avgBlock[i*8 + j] = sum >> 2;<br>

+<br>

+            totalSum += sum;<br>

+        }<br>

+<br>

+    (*s_dct8x8)(avgBlock, coef, 8);<br>

+    memset(dst, 0, 256 * sizeof(int16_t));<br>

+    for (int i = 0; i < 8; i++)<br>

+    {<br>

+        memcpy(&dst[i * 16], &coef[i * 8], 8 * sizeof(int16_t));<br>

+    }<br>

+    dst[0] = static_cast<int16_t>(totalSum >> 1);<br>

+}<br>

+<br>

+static void lowPassDct32_c(const int16_t* src, int16_t* dst, intptr_t srcStride)<br>

+{<br>

+    ALIGN_VAR_32(int16_t, coef[16 * 16]);<br>

+    ALIGN_VAR_32(int16_t, avgBlock[16 * 16]);<br>

+    int32_t totalSum = 0;<br>

+    int16_t sum = 0;<br>

+    for (int i = 0; i < 16; i++)<br>

+        for (int j =0; j < 16; j++)<br>

+        {<br>

+            sum = src[2*i*srcStride + 2*j] + src[2*i*srcStride + 2*j + 1]<br>

+                    + src[(2*i+1)*srcStride + 2*j] + src[(2*i+1)*srcStride + 2*j + 1];<br>

+            avgBlock[i*16 + j] = sum >> 2;<br>

+<br>

+            totalSum += sum;<br>

+        }<br>

+<br>

+    (*s_dct16x16)(avgBlock, coef, 16);<br>

+    memset(dst, 0, 1024 * sizeof(int16_t));<br>

+    for (int i = 0; i < 16; i++)<br>

+    {<br>

+        memcpy(&dst[i * 32], &coef[i * 16], 16 * sizeof(int16_t));<br>

+    }<br>

+    dst[0] = static_cast<int16_t>(totalSum >> 3);<br>

+}<br>

+<br>

+namespace X265_NS {<br>

+// x265 private namespace<br>

+<br>

+void setupLowPassPrimitives_c(<wbr>EncoderPrimitives& p)<br>

+{<br>

+    s_dct4x4 = &(<a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_4x4].standard_<wbr>dct);<br>

+    s_dct8x8 = &(<a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_8x8].standard_<wbr>dct);<br>

+    s_dct16x16 = &(<a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_16x16].standard_<wbr>dct);<br>

+<br>

+    <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_8x8].lowpass_dct = lowPassDct8_c;<br>

+    <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_16x16].lowpass_dct = lowPassDct16_c;<br>

+    <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_32x32].lowpass_dct = lowPassDct32_c;<br>

+}<br>

+}<br>

diff -r 6a310b24c6a2 -r 893b36b82133 source/common/param.cpp<br>

--- a/source/common/param.cpp   Thu Nov 02 12:17:29 2017 +0530<br>

+++ b/source/common/param.cpp   Sat Oct 14 09:19:03 2017 -0700<br>

@@ -288,6 +288,9 @@<br>

     param->csvfpt = NULL;<br>

     param->forceFlush = 0;<br>

     param->bDisableLookahead = 0;<br>

+<br>

+    /* DCT Approximations */<br>

+    param->bLowPassDct = 0;<br>

 }<br>

<br>

 int x265_param_default_preset(<wbr>x265_param* param, const char* preset, const char* tune)<br>

@@ -927,6 +930,7 @@<br>

     OPT("max-cll") bError |= sscanf(value, "%hu,%hu", &p->maxCLL, &p->maxFALL) != 2;<br>

     OPT("min-luma") p->minLuma = (uint16_t)atoi(value);<br>

     OPT("max-luma") p->maxLuma = (uint16_t)atoi(value);<br>

+    OPT("lowpass-dct") p->bLowPassDct = atobool(value);<br>

     OPT("uhd-bd") p->uhdBluray = atobool(value);<br>

     else<br>

         bExtraParams = true;<br>

@@ -1676,6 +1680,7 @@<br>

     s += sprintf(s, " refine-mv=%d", p->mvRefine);<br>

     BOOL(p->bLimitSAO, "limit-sao");<br>

     s += sprintf(s, " ctu-info=%d", p->bCTUInfo);<br>

+    BOOL(p->bLowPassDct, "lowpass-dct");<br>

 #undef BOOL<br>

     return buf;<br>

 }<br>

diff -r 6a310b24c6a2 -r 893b36b82133 source/common/primitives.cpp<br>

--- a/source/common/primitives.cpp      Thu Nov 02 12:17:29 2017 +0530<br>

+++ b/source/common/primitives.cpp      Sat Oct 14 09:19:03 2017 -0700<br>

@@ -58,11 +58,13 @@<br>

 void setupLoopFilterPrimitives_c(<wbr>EncoderPrimitives &p);<br>

 void setupSaoPrimitives_c(<wbr>EncoderPrimitives &p);<br>

 void setupSeaIntegralPrimitives_c(<wbr>EncoderPrimitives &p);<br>

+void setupLowPassPrimitives_c(<wbr>EncoderPrimitives& p);<br>

<br>

 void setupCPrimitives(<wbr>EncoderPrimitives &p)<br>

 {<br>

     setupPixelPrimitives_c(p);      // pixel.cpp<br>

     setupDCTPrimitives_c(p);        // dct.cpp<br>

+    setupLowPassPrimitives_c(p);    // lowpassdct.cpp<br>

     setupFilterPrimitives_c(p);     // ipfilter.cpp<br>

     setupIntraPrimitives_c(p);      // intrapred.cpp<br>

     setupLoopFilterPrimitives_c(p)<wbr>; // loopfilter.cpp<br>

@@ -70,6 +72,19 @@<br>

     setupSeaIntegralPrimitives_c(<wbr>p);  // framefilter.cpp<br>

 }<br>

<br>

+void enableLowpassDCTPrimitives(<wbr>EncoderPrimitives &p)<br>

+{<br>

+    // update copies of the standard dct transform<br>

+    <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_4x4].standard_dct = <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_4x4].dct;<br>

+    <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_8x8].standard_dct = <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_8x8].dct;<br>

+    <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_16x16].standard_dct = <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_16x16].dct;<br>

+    <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_32x32].standard_dct = <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_32x32].dct;<br>

+<br>

+    // replace active dct by lowpass dct for high dct transforms<br>

+    <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_16x16].dct = <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_16x16].lowpass_dct;<br>

+    <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_32x32].dct = <a href="http://p.cu" rel="noreferrer" target="_blank">p.cu</a>[BLOCK_32x32].lowpass_dct;<br>

+}<br>

+<br>

 void setupAliasPrimitives(<wbr>EncoderPrimitives &p)<br>

 {<br>

 #if HIGH_BIT_DEPTH<br>

@@ -256,6 +271,11 @@<br>

 #endif<br>

<br>

         setupAliasPrimitives(<wbr>primitives);<br>

+<br>

+        if (param->bLowPassDct && param->rc.qp > 20)<br>

+        {<br>

+            enableLowpassDCTPrimitives(<wbr>primitives);<br>

+        }<br></div></div></blockquote><div><br></div><div>Essentially this means that you enable lowpass-dct only when doing constant QP encodes. You could consider relaxing this to enable the option for other rate-control modes as well (ABR/CRF) and have some directives in your docs as to when the feature is better to use. Hard-coding the limits in the code isn't a great idea, IMO.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5">

     }<br>

<br>

     x265_report_simd(param);<br>

diff -r 6a310b24c6a2 -r 893b36b82133 source/common/primitives.h<br>

--- a/source/common/primitives.h        Thu Nov 02 12:17:29 2017 +0530<br>

+++ b/source/common/primitives.h        Sat Oct 14 09:19:03 2017 -0700<br>

@@ -259,8 +259,12 @@<br>

      * primitives will leave 64x64 pointers NULL.  Indexed by LumaCU */<br>

     struct CU<br>

     {<br>

-        dct_t           dct;<br>

-        idct_t          idct;<br>

+        dct_t           dct;    // active dct transformation<br>

+        idct_t          idct;   // active idct transformation<br>

+<br>

+        dct_t           standard_dct;   // original dct function, used by lowpass_dct<br>

+        dct_t           lowpass_dct;    // lowpass dct approximation<br>

+<br>

         calcresidual_t  calcresidual;<br>

         pixel_sub_ps_t  sub_ps;<br>

         pixel_add_ps_t  add_ps;<br>

diff -r 6a310b24c6a2 -r 893b36b82133 source/x265.h<br>

--- a/source/x265.h     Thu Nov 02 12:17:29 2017 +0530<br>

+++ b/source/x265.h     Sat Oct 14 09:19:03 2017 -0700<br>

@@ -1505,6 +1505,11 @@<br>

<br>

     /* Disable lookahead */<br>

     int       bDisableLookahead;<br>

+<br>

+    /* Use low-pass truncated dct approximation<br>

+    *  This DCT approximation is less computational intensive and gives results close to<br>

+    *  standard DCT for QP >= 23 */<br>

+    int       bLowPassDct;<br>

 } x265_param;<br>

<br>

 /* x265_param_alloc:<br>

diff -r 6a310b24c6a2 -r 893b36b82133 source/x265cli.h<br>

--- a/source/x265cli.h  Thu Nov 02 12:17:29 2017 +0530<br>

+++ b/source/x265cli.h  Sat Oct 14 09:19:03 2017 -0700<br>

@@ -282,6 +282,7 @@<br>

     { "force-flush",    required_argument, NULL, 0 },<br>

     { "splitrd-skip",         no_argument, NULL, 0 },<br>

     { "no-splitrd-skip",      no_argument, NULL, 0 },<br>

+    { "lowpass-dct",          no_argument, NULL, 0 },<br>

     { 0, 0, 0, 0 },<br>

     { 0, 0, 0, 0 },<br>

     { 0, 0, 0, 0 },<br>

@@ -543,6 +544,7 @@<br>

     H1("-r/--recon <filename>            Reconstructed raw image YUV or Y4M output file name\n");<br>

     H1("   --recon-depth <integer>       Bit-depth of reconstructed raw image file. Defaults to input bit depth, or 8 if Y4M\n");<br>

     H1("   --recon-y4m-exec <string>     pipe reconstructed frames to Y4M viewer, ex:\"ffplay -i pipe:0 -autoexit\"\n");<br>

+    H0("   --lowpass-dct                 Use low-pass subband dct approximation. Default %s\n", OPT(param->bLowPassDct));<br>

     H1("\nExecutable return codes:\n");<br>

     H1("    0 - encode successful\n");<br>

     H1("    1 - unable to parse command line\n");<br>

</div></div><br>______________________________<wbr>_________________<br>

x265-devel mailing list<br>

<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>

<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/<wbr>listinfo/x265-devel</a><br>

<br></blockquote></div><br></div></div>