<div dir="ltr"><div dir="ltr"><br></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Tue, Jan 7, 2020 at 8:16 AM <<a href="mailto:srikanth.kurapati@multicorewareinc.com" target="_blank">srikanth.kurapati@multicorewareinc.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"># HG changeset patch<br>

# User Srikanth Kurapati<br>

# Date 1577432829 -19800<br>

#      Fri Dec 27 13:17:09 2019 +0530<br>

# Node ID 3d60a9a728b37f14cbb9cb2332a1aebf87e66334<br>

# Parent  19f6ed1659197aaa4bd78b69eb58139e879230d9<br>

Edge Aware Quad Tree Establishment.</blockquote><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

This patch does the following:<br>

1. Terminates recursion using edge information.<br>

2. Adds modes for "--rskip". Modes 0,1 for current usage and 2 for edge based rskip.<br>

3. Adds option "edge-threshold" to decide recursion skip using CU edge density.<br>

4. Re uses edge information when already available in encoder.<br>

<br>

diff -r 19f6ed165919 -r 3d60a9a728b3 doc/reST/cli.rst<br>

--- a/doc/reST/cli.rst  Mon Dec 30 11:58:44 2019 +0530<br>

+++ b/doc/reST/cli.rst  Fri Dec 27 13:17:09 2019 +0530<br>

@@ -842,15 +842,20 @@<br>

        Measure 2Nx2N merge candidates first; if no residual is found, <br>

        additional modes at that depth are not analysed. Default disabled<br>

<br>

-.. option:: --rskip, --no-rskip<br>

-<br>

-       This option determines early exit from CU depth recursion. When a skip CU is<br>

-       found, additional heuristics (depending on rd-level) are used to decide whether<br>

-       to terminate recursion. In rdlevels 5 and 6, comparison with inter2Nx2N is used, <br>

-       while at rdlevels 4 and neighbour costs are used to skip recursion.<br>

-       Provides minimal quality degradation at good performance gains when enabled. <br>

-<br>

-       Default: enabled, disabled for :option:`--tune grain`<br>

+.. option:: --rskip <0|1|2><br>

+<br>

+       This option determines early exit from CU depth recursion when enabled. When a skip CU is<br>

+       found, additional heuristics (depending on rd-level and rskip mode) are used to decide whether<br>

+       to terminate recursion. In rdlevels 5 and 6, comparison with inter2Nx2N is used,<br>

+       while at rdlevels 4 and below, neighbour costs are used to skip recursion in mode 1, and CU edge density in mode 2.<br>

+       Provides minimal quality degradation at good performance gains when enabled. R-skip mode 0 means disabled.<br></blockquote><div>[KS] R-skip mode 0 means disabled is implied. If necessary mentioning rephrase <br>There is no mention that this patch introduces rkip 2 only to rd0_4</div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

+       Default: 1, disabled when :option:`--tune grain` is used.<br>

+<br>

+.. option:: --edge-threshold <0..100><br>

+<br>

+       Denotes the minimum percentage of edge density in the CU, below which the recursion is skipped.<br>

+       Default: 5, requires :option:`--rskip mode 2` to be enabled. <br></blockquote><div>[KS] It is the minimum edge density the CU is expected to possess. <br>Usage of density and variance is not consistent across the patch</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

 .. option:: --splitrd-skip, --no-splitrd-skip<br>

<br>

diff -r 19f6ed165919 -r 3d60a9a728b3 source/CMakeLists.txt<br>

--- a/source/CMakeLists.txt     Mon Dec 30 11:58:44 2019 +0530<br>

+++ b/source/CMakeLists.txt     Fri Dec 27 13:17:09 2019 +0530<br>

@@ -29,7 +29,7 @@<br>

 option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)<br>

 mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)<br>

 # X265_BUILD must be incremented each time the public API is changed<br>

-set(X265_BUILD 184)<br>

+set(X265_BUILD 185)<br>

 configure_file("${PROJECT_SOURCE_DIR}/<a href="http://x265.def.in" rel="noreferrer" target="_blank">x265.def.in</a>"<br>

                "${PROJECT_BINARY_DIR}/x265.def")<br>

 configure_file("${PROJECT_SOURCE_DIR}/<a href="http://x265_config.h.in" rel="noreferrer" target="_blank">x265_config.h.in</a>"<br>

diff -r 19f6ed165919 -r 3d60a9a728b3 source/common/common.h<br>

--- a/source/common/common.h    Mon Dec 30 11:58:44 2019 +0530<br>

+++ b/source/common/common.h    Fri Dec 27 13:17:09 2019 +0530<br>

@@ -129,6 +129,7 @@<br>

 typedef uint64_t sum2_t;<br>

 typedef uint64_t pixel4;<br>

 typedef int64_t  ssum2_t;<br>

+#define SHIFT_TO_BITPLANE 9<br>

 #define HISTOGRAM_BINS 1024<br>

 #define SHIFT 1<br>

 #else<br>

@@ -137,6 +138,7 @@<br>

 typedef uint32_t sum2_t;<br>

 typedef uint32_t pixel4;<br>

 typedef int32_t  ssum2_t; // Signed sum<br>

+#define SHIFT_TO_BITPLANE 7<br>

 #define HISTOGRAM_BINS 256<br>

 #define SHIFT 0<br>

 #endif // if HIGH_BIT_DEPTH<br>

@@ -272,6 +274,9 @@<br>

 #define MAX_TR_SIZE (1 << MAX_LOG2_TR_SIZE)<br>

 #define MAX_TS_SIZE (1 << MAX_LOG2_TS_SIZE)<br>

<br>

+#define RDCOST_BASED_RSKIP 1<br>

+#define EDGE_BASED_RSKIP 2<br>

+<br>

 #define COEF_REMAIN_BIN_REDUCTION   3 // indicates the level at which the VLC<br>

                                       // transitions from Golomb-Rice to TU+EG(k)<br>

<br>

diff -r 19f6ed165919 -r 3d60a9a728b3 source/common/frame.cpp<br>

--- a/source/common/frame.cpp   Mon Dec 30 11:58:44 2019 +0530<br>

+++ b/source/common/frame.cpp   Fri Dec 27 13:17:09 2019 +0530<br>

@@ -61,6 +61,7 @@<br>

     m_edgePic = NULL;<br>

     m_gaussianPic = NULL;<br>

     m_thetaPic = NULL;<br>

+    m_edgeBitPlane = NULL;<br>

 }<br>

<br>

 bool Frame::create(x265_param *param, float* quantOffsets)<br>

@@ -115,6 +116,18 @@<br>

         m_thetaPic = X265_MALLOC(pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));<br>

     }<br>

<br>

+    if (param->bEnableRecursionSkip == EDGE_BASED_RSKIP)<br>

+    {<br>

+        uint32_t numCuInWidth = (param->sourceWidth + param->maxCUSize - 1) / param->maxCUSize;<br>

+        uint32_t numCuInHeight = (param->sourceHeight + param->maxCUSize - 1) / param->maxCUSize;<br>

+        uint32_t lumaMarginX = param->maxCUSize + 32;<br>

+        uint32_t lumaMarginY = param->maxCUSize + 16;<br>

+        uint32_t stride = (numCuInWidth * param->maxCUSize) + (lumaMarginX << 1);<br>

+        uint32_t maxHeight = numCuInHeight * param->maxCUSize;<br>

+        m_bitPlaneSize = stride * (maxHeight + (lumaMarginY * 2));<br>

+        CHECKED_MALLOC_ZERO(m_edgeBitPlane, pixel, m_bitPlaneSize);<br>

+    }<br>

+<br>

     if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) && m_lowres.create(param, m_fencPic, param->rc.qgSize))<br>

     {<br>

         X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized");<br>

@@ -267,4 +280,9 @@<br>

         X265_FREE(m_gaussianPic);<br>

         X265_FREE(m_thetaPic);<br>

     }<br>

+<br>

+    if (m_param->bEnableRecursionSkip == EDGE_BASED_RSKIP)<br>

+    {<br>

+        X265_FREE(m_edgeBitPlane);<br>

+    }<br>

 }<br>

diff -r 19f6ed165919 -r 3d60a9a728b3 source/common/frame.h<br>

--- a/source/common/frame.h     Mon Dec 30 11:58:44 2019 +0530<br>

+++ b/source/common/frame.h     Fri Dec 27 13:17:09 2019 +0530<br>

@@ -137,6 +137,8 @@<br>

     pixel*                 m_gaussianPic;<br>

     pixel*                 m_thetaPic;<br>

<br>

+    pixel*                 m_edgeBitPlane;<br>

+    uint32_t               m_bitPlaneSize;<br>

     Frame();<br>

<br>

     bool create(x265_param *param, float* quantOffsets);<br>

diff -r 19f6ed165919 -r 3d60a9a728b3 source/common/param.cpp<br>

--- a/source/common/param.cpp   Mon Dec 30 11:58:44 2019 +0530<br>

+++ b/source/common/param.cpp   Fri Dec 27 13:17:09 2019 +0530<br>

@@ -198,6 +198,7 @@<br>

     param->bEnableWeightedBiPred = 0;<br>

     param->bEnableEarlySkip = 1;<br>

     param->bEnableRecursionSkip = 1;<br>

+    param->edgeThreshold = 5.0;<br>

     param->bEnableAMP = 0;<br>

     param->bEnableRectInter = 0;<br>

     param->rdLevel = 3;<br>

@@ -694,7 +695,8 @@<br>

     OPT("ref") p->maxNumReferences = atoi(value);<br>

     OPT("fast-intra") p->bEnableFastIntra = atobool(value);<br>

     OPT("early-skip") p->bEnableEarlySkip = atobool(value);<br>

-    OPT("rskip") p->bEnableRecursionSkip = atobool(value);<br>

+    OPT("rskip") p->bEnableRecursionSkip = atoi(value);<br>

+    OPT("edge-threshold") p->edgeThreshold = atoi(value)/100.0;<br></blockquote><div>[KS] I don't understand the purpose of receiving it as percent and dividing by 100 internally. What is the goal?<br>And across the patch percent and double is used interchangeably providing less clarity. <br><br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

     OPT("me")p->searchMethod = parseName(value, x265_motion_est_names, bError);<br>

     OPT("subme") p->subpelRefine = atoi(value);<br>

     OPT("merange") p->searchRange = atoi(value);<br>

@@ -911,7 +913,7 @@<br>

     OPT("max-merge") p->maxNumMergeCand = (uint32_t)atoi(value);<br>

     OPT("temporal-mvp") p->bEnableTemporalMvp = atobool(value);<br>

     OPT("early-skip") p->bEnableEarlySkip = atobool(value);<br>

-    OPT("rskip") p->bEnableRecursionSkip = atobool(value);<br>

+    OPT("rskip") p->bEnableRecursionSkip = atoi(value);<br>

     OPT("rdpenalty") p->rdPenalty = atoi(value);<br>

     OPT("tskip") p->bEnableTransformSkip = atobool(value);<br>

     OPT("no-tskip-fast") p->bEnableTSkipFast = atobool(value);<br>

@@ -1213,6 +1215,7 @@<br>

             }<br>

         }<br>

         OPT("hist-threshold") p->edgeTransitionThreshold = atof(value);<br>

+        OPT("edge-threshold") p->edgeThreshold = atoi(value)/100.0;<br>

         OPT("lookahead-threads") p->lookaheadThreads = atoi(value);<br>

         OPT("opt-cu-delta-qp") p->bOptCUDeltaQP = atobool(value);<br>

         OPT("multi-pass-opt-analysis") p->analysisMultiPassRefine = atobool(value);<br>

@@ -1579,9 +1582,13 @@<br>

     CHECK(param->rdLevel < 1 || param->rdLevel > 6,<br>

           "RD Level is out of range");<br>

     CHECK(param->rdoqLevel < 0 || param->rdoqLevel > 2,<br>

-        "RDOQ Level is out of range");<br>

+          "RDOQ Level is out of range");<br>

     CHECK(param->dynamicRd < 0 || param->dynamicRd > x265_ADAPT_RD_STRENGTH,<br>

-        "Dynamic RD strength must be between 0 and 4");<br>

+          "Dynamic RD strength must be between 0 and 4");<br>

+    CHECK(param->bEnableRecursionSkip > 2 || param->bEnableRecursionSkip < 0,<br>

+          "Invalid Recursion skip mode. Valid modes 0,1,2");<br>

+    CHECK(param->edgeThreshold < 0 || param->edgeThreshold > 100,<br>

+          "Percentage Edge threshold for a CU should be integer between 0 to 100");<br></blockquote>[KS] You are saying threshold is a percent but you divide by 100 internally and make it double. How is this check valid now?<br><div>[KS] Where are you checking rkip mode 2 with edge threshold combination? <br><br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

     CHECK(param->bframes && param->bframes >= param->lookaheadDepth && !param->rc.bStatRead,<br>

           "Lookahead depth must be greater than the max consecutive bframe count");<br>

     CHECK(param->bframes < 0,<br>

@@ -1887,7 +1894,11 @@<br>

     TOOLVAL(param->psyRdoq, "psy-rdoq=%.2lf");<br>

     TOOLOPT(param->bEnableRdRefine, "rd-refine");<br>

     TOOLOPT(param->bEnableEarlySkip, "early-skip");<br>

-    TOOLOPT(param->bEnableRecursionSkip, "rskip");<br>

+    TOOLVAL(param->bEnableRecursionSkip, "rskip mode=%d");<br>

+    if (param->bEnableRecursionSkip == EDGE_BASED_RSKIP)<br>

+    {<br>

+        TOOLVAL(param->edgeThreshold, "rskip-threshold=%.2lf");<br>

+    }<br>

     TOOLOPT(param->bEnableSplitRdSkip, "splitrd-skip");<br>

     TOOLVAL(param->noiseReductionIntra, "nr-intra=%d");<br>

     TOOLVAL(param->noiseReductionInter, "nr-inter=%d");<br>

@@ -2046,6 +2057,10 @@<br>

     s += sprintf(s, " selective-sao=%d", p->selectiveSAO);<br>

     BOOL(p->bEnableEarlySkip, "early-skip");<br>

     BOOL(p->bEnableRecursionSkip, "rskip");<br>

+    if (p->bEnableRecursionSkip == EDGE_BASED_RSKIP)<br>

+    {<br>

+        s += sprintf(s, " edge-threshold=%lf", p->edgeThreshold);<br>

+    }<br>

     BOOL(p->bEnableFastIntra, "fast-intra");<br>

     BOOL(p->bEnableTSkipFast, "tskip-fast");<br>

     BOOL(p->bCULossless, "cu-lossless");<br>

@@ -2350,6 +2365,7 @@<br>

     dst->rdLevel = src->rdLevel;<br>

     dst->bEnableEarlySkip = src->bEnableEarlySkip;<br>

     dst->bEnableRecursionSkip = src->bEnableRecursionSkip;<br>

+    dst->edgeThreshold = src->edgeThreshold;<br>

     dst->bEnableFastIntra = src->bEnableFastIntra;<br>

     dst->bEnableTSkipFast = src->bEnableTSkipFast;<br>

     dst->bCULossless = src->bCULossless;<br>

diff -r 19f6ed165919 -r 3d60a9a728b3 source/common/pixel.cpp<br>

--- a/source/common/pixel.cpp   Mon Dec 30 11:58:44 2019 +0530<br>

+++ b/source/common/pixel.cpp   Fri Dec 27 13:17:09 2019 +0530<br>

@@ -876,6 +876,18 @@<br>

     }<br>

 }<br>

<br>

+static void planecopy_pp_shr_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift)<br>

+{<br>

+    for (int r = 0; r < height; r++)<br>

+    {<br>

+        for (int c = 0; c < width; c++)<br>

+            dst[c] = (pixel)((src[c] >> shift));</blockquote><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

+        dst += dstStride;<br>

+        src += srcStride;<br>

+    }<br>

+}<br>

+<br></blockquote><div>[KS] Why do we not have an asm version of this primitive?<br> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

 static void planecopy_sp_shl_c(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask)<br>

 {<br>

     for (int r = 0; r < height; r++)<br>

@@ -1316,6 +1328,7 @@<br>

     p.planecopy_cp = planecopy_cp_c;<br>

     p.planecopy_sp = planecopy_sp_c;<br>

     p.planecopy_sp_shl = planecopy_sp_shl_c;<br>

+    p.planecopy_pp_shr = planecopy_pp_shr_c;<br>

 #if HIGH_BIT_DEPTH<br>

     p.planeClipAndMax = planeClipAndMax_c;<br>

 #endif<br>

diff -r 19f6ed165919 -r 3d60a9a728b3 source/common/primitives.h<br>

--- a/source/common/primitives.h        Mon Dec 30 11:58:44 2019 +0530<br>

+++ b/source/common/primitives.h        Fri Dec 27 13:17:09 2019 +0530<br>

@@ -204,6 +204,7 @@<br>

 typedef void (*sign_t)(int8_t *dst, const pixel *src1, const pixel *src2, const int endX);<br>

 typedef void (*planecopy_cp_t) (const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);<br>

 typedef void (*planecopy_sp_t) (const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);<br>

+typedef void (*planecopy_pp_t) (const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);<br>

 typedef pixel (*planeClipAndMax_t)(pixel *src, intptr_t stride, int width, int height, uint64_t *outsum, const pixel minPix, const pixel maxPix);<br>

<br>

 typedef void (*cutree_propagate_cost) (int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts, const int32_t* invQscales, const double* fpsFactor, int len);<br>

@@ -358,6 +359,7 @@<br>

     planecopy_cp_t        planecopy_cp;<br>

     planecopy_sp_t        planecopy_sp;<br>

     planecopy_sp_t        planecopy_sp_shl;<br>

+    planecopy_pp_t        planecopy_pp_shr;<br>

     planeClipAndMax_t     planeClipAndMax;<br>

<br>

     weightp_sp_t          weight_sp;<br>

diff -r 19f6ed165919 -r 3d60a9a728b3 source/encoder/analysis.cpp<br>

--- a/source/encoder/analysis.cpp       Mon Dec 30 11:58:44 2019 +0530<br>

+++ b/source/encoder/analysis.cpp       Fri Dec 27 13:17:09 2019 +0530<br>

@@ -1313,14 +1313,22 @@<br>

         if (md.bestMode && m_param->bEnableRecursionSkip && !bCtuInfoCheck && !(m_param->bAnalysisType == AVC_INFO && m_param->analysisReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1])))<br>

         {<br>

             skipRecursion = md.bestMode->cu.isSkipped(0);<br>

-            if (mightSplit && depth >= minDepth && !skipRecursion)<br>

+            if (mightSplit && !skipRecursion)<br>

             {<br>

-                if (depth)<br>

-                    skipRecursion = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);<br>

-                if (m_bHD && !skipRecursion && m_param->rdLevel == 2 && md.fencYuv.m_size != MAX_CU_SIZE)<br>

-                    skipRecursion = complexityCheckCU(*md.bestMode);<br>

+                if (depth >= minDepth && m_param->bEnableRecursionSkip == RDCOST_BASED_RSKIP)<br>

+                {<br>

+                    if (depth)<br>

+                        skipRecursion = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);<br>

+                    if (m_bHD && !skipRecursion && m_param->rdLevel == 2 && md.fencYuv.m_size != MAX_CU_SIZE)<br>

+                        skipRecursion = complexityCheckCU(*md.bestMode);<br>

+                }<br>

+                else if (m_param->bEnableRecursionSkip == EDGE_BASED_RSKIP)<br>

+                {<br></blockquote><div>[KS] Regarding my question on not considering topSkip's decision, you said there is FPS loss. Can you elaborate the theory behind it? </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

+                    skipRecursion = edgeRecursionSkip(parentCTU, depth);<br>

+                }<br>

             }<br>

         }<br>

+<br>

         if (m_param->bAnalysisType == AVC_INFO && md.bestMode && cuGeom.numPartitions <= 16 && m_param->analysisReuseLevel == 7)<br>

             skipRecursion = true;<br>

         /* Step 2. Evaluate each of the 4 split sub-blocks in series */<br>

@@ -3543,6 +3551,33 @@<br>

     return false;<br>

 }<br>

<br>

+bool Analysis::edgeRecursionSkip(const CUData& ctu, int depth)<br>

+{<br>

+    int cuLen = m_param->maxCUSize >> depth;<br>

+    if (cuLen >= RSKIP2_MIN_CUSIZE)<br>

+    {<br>

+        uint8_t blockType = g_log2Size[cuLen] - 2;<br></blockquote><div>[KS] Can you tell me the purpose of cuLen computation and its subsequent usage when we have cuGeom's log2size? </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

+        int shift = g_log2Size[cuLen] * 2;<br>

+        intptr_t stride = m_frame->m_fencPic->m_stride;<br>

+        pixel* edgePic = m_frame->m_edgeBitPlane + m_frame->m_fencPic->m_lumaMarginY * m_frame->m_fencPic->m_stride + m_frame->m_fencPic->m_lumaMarginX;<br>

+        intptr_t blockOffsetLuma = ctu.m_cuPelX + ctu.m_cuPelY * stride;<br>

+        uint64_t sum_ss = <a href="http://primitives.cu" rel="noreferrer" target="_blank">primitives.cu</a>[blockType].var(edgePic + blockOffsetLuma, stride);<br>

+        uint32_t sum = (uint32_t)sum_ss;<br>

+        uint32_t ss = (uint32_t)(sum_ss >> 32);<br>

+        double pixelCount = pow(2.0, shift);<br></blockquote><div>[KS] When pixel count is pow of 2, what is the reason for using pow instead of shift? </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

+        double cuEdgeVariance = (ss - (sum * sum / pixelCount)) / pixelCount;<br></blockquote><div>[KS] The difference between complexityCheckCU and this module's core functionality is sum of abs diff and sum of squared diff. Although squared diff is more accurate(as you mentioned), are you seeing a very big difference in accuracy of variance across these 2 functions? Have we measured this? I am skeptical about having new module doing same functionality</div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

+<br>

+        if (cuEdgeVariance > m_param->edgeThreshold)<br>

+            return false;<br>

+        else<br>

+            return true;<br>

+    }<br>

+    else<br>

+    {<br>

+        return true;<br>

+    }<br></blockquote><div>[KS] If the size < 32x32 you force recursion skipping? Why? What is the quality impact you are seeing?<br>Can you share some results? </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

+}<br>

+<br></blockquote><div>[KS] If CU size < 32x32 why do you even call this function? I see this as an overhead </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

 uint32_t Analysis::calculateCUVariance(const CUData& ctu, const CUGeom& cuGeom)<br>

 {<br>

     uint32_t cuVariance = 0;<br>

@@ -3566,7 +3601,6 @@<br>

             cnt++;<br>

         }<br>

     }<br>

-    <br>

     return cuVariance / cnt;<br>

 }<br>

<br>

diff -r 19f6ed165919 -r 3d60a9a728b3 source/encoder/analysis.h<br>

--- a/source/encoder/analysis.h Mon Dec 30 11:58:44 2019 +0530<br>

+++ b/source/encoder/analysis.h Fri Dec 27 13:17:09 2019 +0530<br>

@@ -36,6 +36,8 @@<br>

 #include "entropy.h"<br>

 #include "search.h"<br>

<br>

+#define RSKIP2_MIN_CUSIZE 32 /* CU size until which the cu edge variance will be computed. */<br>

+<br></blockquote><div>[KS] When you can use of the relevant global parameters, can you tell me why this additional macro is necessary? </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

 namespace X265_NS {<br>

 // private namespace<br>

<br>

@@ -52,7 +54,7 @@<br>

         splitRefs = 0;<br>

         mvCost[0] = 0; // L0<br>

         mvCost[1] = 0; // L1<br>

-        sa8dCost    = 0;<br>

+        sa8dCost  = 0;<br>

     }<br>

 };<br>

<br>

@@ -120,7 +122,6 @@<br>

<br>

     Mode& compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext);<br>

     int32_t loadTUDepth(CUGeom cuGeom, CUData parentCTU);<br>

-<br>

 protected:<br>

     /* Analysis data for save/load mode, writes/reads data based on absPartIdx */<br>

     x265_analysis_inter_data*  m_reuseInterDataCTU;<br>

@@ -192,6 +193,7 @@<br>

     uint32_t topSkipMinDepth(const CUData& parentCTU, const CUGeom& cuGeom);<br>

     bool recursionDepthCheck(const CUData& parentCTU, const CUGeom& cuGeom, const Mode& bestMode);<br>

     bool complexityCheckCU(const Mode& bestMode);<br>

+    bool edgeRecursionSkip(const CUData& parentCTU, int depth);<br>

<br>

     /* generate residual and recon pixels for an entire CTU recursively (RD0) */<br>

     void encodeResidue(const CUData& parentCTU, const CUGeom& cuGeom);<br>

diff -r 19f6ed165919 -r 3d60a9a728b3 source/encoder/encoder.cpp<br>

--- a/source/encoder/encoder.cpp        Mon Dec 30 11:58:44 2019 +0530<br>

+++ b/source/encoder/encoder.cpp        Fri Dec 27 13:17:09 2019 +0530<br>

@@ -1351,9 +1351,9 @@<br>

     int32_t numBytes = m_param->sourceBitDepth > 8 ? 2 : 1;<br>

     memset(m_edgePic, 0, bufSize * numBytes);<br>

<br>

-    if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, pic->width, false))<br>

-    {<br>

-        x265_log(m_param, X265_LOG_ERROR, "Failed edge computation!");<br>

+    if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, pic->width, false, 1))<br>

+    {<br>

+        x265_log(m_param, X265_LOG_ERROR, "Failed to compute edge!");<br>

         return false;<br>

     }<br>

<br>

@@ -1668,6 +1668,13 @@<br>

                         }<br>

                     }<br>

                 }<br>

+                if (m_param->bEnableRecursionSkip == EDGE_BASED_RSKIP && m_param->bHistBasedSceneCut)<br>

+                {<br>

+                    pixel* src = m_edgePic;<br>

+                    pixel* edgePic = inFrame->m_edgeBitPlane + inFrame->m_fencPic->m_lumaMarginY * inFrame->m_fencPic->m_stride + inFrame->m_fencPic->m_lumaMarginX;<br>

+                    primitives.planecopy_pp_shr(src, inFrame->m_fencPic->m_picWidth, edgePic, inFrame->m_fencPic->m_stride,<br>

+                        inFrame->m_fencPic->m_picWidth, inFrame->m_fencPic->m_picHeight, 0);<br>

+                }<br>

             }<br>

             else<br>

             {<br>

diff -r 19f6ed165919 -r 3d60a9a728b3 source/encoder/frameencoder.cpp<br>

--- a/source/encoder/frameencoder.cpp   Mon Dec 30 11:58:44 2019 +0530<br>

+++ b/source/encoder/frameencoder.cpp   Fri Dec 27 13:17:09 2019 +0530<br>

@@ -130,7 +130,7 @@<br>

         {<br>

             rowSum += sliceGroupSizeAccu;<br>

             m_sliceBaseRow[++sidx] = i;<br>

-        }        <br>

+        }<br>

     }<br>

     X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");<br>

     m_sliceBaseRow[0] = 0;<br>

@@ -268,6 +268,20 @@<br>

     curFrame->m_encData->m_jobProvider = this;<br>

     curFrame->m_encData->m_slice->m_mref = m_mref;<br>

<br>

+    if (!m_param->bHistBasedSceneCut && m_param->rc.aqMode != X265_AQ_EDGE && m_param->bEnableRecursionSkip == EDGE_BASED_RSKIP)<br>

+    {<br>

+        int height = curFrame->m_fencPic->m_picHeight;<br>

+        int width = curFrame->m_fencPic->m_picWidth;<br>

+        intptr_t stride = curFrame->m_fencPic->m_stride;<br>

+        pixel* edgePic = curFrame->m_edgeBitPlane + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;<br>

+<br>

+        if (!computeEdge(edgePic, curFrame->m_fencPic->m_picOrg[0], NULL, stride, height, width, false, 1))<br>

+        {<br>

+            x265_log(m_param, X265_LOG_ERROR, " Failed to compute edge !");<br>

+            return false;<br>

+        }<br>

+    }<br>

+<br>

     if (!m_cuGeoms)<br>

     {<br>

         if (!initializeGeoms())<br>

diff -r 19f6ed165919 -r 3d60a9a728b3 source/encoder/slicetype.cpp<br>

--- a/source/encoder/slicetype.cpp      Mon Dec 30 11:58:44 2019 +0530<br>

+++ b/source/encoder/slicetype.cpp      Fri Dec 27 13:17:09 2019 +0530<br>

@@ -87,7 +87,7 @@<br>

<br>

 namespace X265_NS {<br>

<br>

-bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta)<br>

+bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel)<br>

 {<br>

     intptr_t rowOne = 0, rowTwo = 0, rowThree = 0, colOne = 0, colTwo = 0, colThree = 0;<br>

     intptr_t middle = 0, topLeft = 0, topRight = 0, bottomLeft = 0, bottomRight = 0;<br>

@@ -141,7 +141,7 @@<br>

                        theta = 180 + theta;<br>

                     edgeTheta[middle] = (pixel)theta;<br>

                 }<br>

-                edgePic[middle] = (pixel)(gradientMagnitude >= edgeThreshold ? edgeThreshold : blackPixel);<br>

+                edgePic[middle] = (pixel)(gradientMagnitude >= EDGE_THRESHOLD ? whitePixel : blackPixel);<br></blockquote><div><div class="gmail-im" style="color:rgb(80,0,80)"><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr"><div dir="ltr"><div dir="ltr"><div dir="ltr"><div class="gmail_quote"><br></div></div></div></div></div></blockquote></div></div><div>[KS] Does the output of edgePic differ when AQ_EDGE is enabled and otherwise? Does this make the variance computation in analysis inconsistent? <br>You mentioned about the accuracy of AQ_EDGE due to denoise but would like to confirm if output of edgeRecursionSkip module will vary with/without AQ_EDGE?</div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

             }<br>

         }<br>

         return true;<br>

@@ -519,6 +519,14 @@<br>

                 if (param->rc.aqMode == X265_AQ_EDGE)<br>

                     edgeFilter(curFrame, param);<br>

<br>

+                if (param->rc.aqMode == X265_AQ_EDGE && !param->bHistBasedSceneCut && param->bEnableRecursionSkip == EDGE_BASED_RSKIP)<br>

+                {<br>

+                    pixel* src = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;<br>

+                    pixel* dst = curFrame->m_edgeBitPlane + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;<br>

+                    primitives.planecopy_pp_shr(src, curFrame->m_fencPic->m_stride, dst,<br>

+                        curFrame->m_fencPic->m_stride, curFrame->m_fencPic->m_picWidth, curFrame->m_fencPic->m_picHeight, SHIFT_TO_BITPLANE);<br>

+                }<br>

+<br>

                 if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED || param->rc.aqMode == X265_AQ_EDGE)<br>

                 {<br>

                     double bit_depth_correction = 1.f / (1 << (2 * (X265_DEPTH - 8)));<br>

diff -r 19f6ed165919 -r 3d60a9a728b3 source/encoder/slicetype.h<br>

--- a/source/encoder/slicetype.h        Mon Dec 30 11:58:44 2019 +0530<br>

+++ b/source/encoder/slicetype.h        Fri Dec 27 13:17:09 2019 +0530<br>

@@ -44,9 +44,9 @@<br>

 #define EDGE_INCLINATION 45<br>

<br>

 #if HIGH_BIT_DEPTH<br>

-#define edgeThreshold 1023.0<br>

+#define EDGE_THRESHOLD 1023.0<br>

 #else<br>

-#define edgeThreshold 255.0<br>

+#define EDGE_THRESHOLD 255.0<br>

 #endif<br>

 #define PI 3.14159265<br>

<br>

@@ -101,7 +101,7 @@<br>

 protected:<br>

<br>

     uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp, uint32_t qgSize);<br>

-    uint32_t edgeDensityCu(Frame*curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize);<br>

+    uint32_t edgeDensityCu(Frame* curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize);<br>

     uint32_t lumaSumCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, uint32_t qgSize);<br>

     uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp);<br>

     bool     allocWeightedRef(Lowres& fenc);<br>

@@ -265,7 +265,6 @@<br>

     CostEstimateGroup& operator=(const CostEstimateGroup&);<br>

 };<br>

<br>

-bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta);<br>

-<br>

+bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel = EDGE_THRESHOLD);<br>

 }<br>

 #endif // ifndef X265_SLICETYPE_H<br>

diff -r 19f6ed165919 -r 3d60a9a728b3 source/test/regression-tests.txt<br>

--- a/source/test/regression-tests.txt  Mon Dec 30 11:58:44 2019 +0530<br>

+++ b/source/test/regression-tests.txt  Fri Dec 27 13:17:09 2019 +0530<br>

@@ -161,6 +161,8 @@<br>

 Island_960x540_24.yuv,--no-cutree --aq-mode 0 --bitrate 6000 --scenecut-aware-qp<br>

 sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut --hist-threshold 0.02 --frame-dup --dup-threshold 60 --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000<br>

 sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut --hist-threshold 0.02<br>

+crowd_run_1080p50.yuv, --rskip 2 --edge-threshold 5 --hist-scenecut --hist-threshold 0.1 --aq-mode 4<br>

+crowd_run_1080p50.yuv, --preset slow --rskip 2 --edge-threshold 5 --hist-scenecut --hist-threshold 0.1 --aq-mode 4<br></blockquote><div>[KS] What about other hist-scenecut & aq-mode combinations? We have different flows right? </div><div>[KS] I don't understand why we should restrict this to rd level 0 to 4 when there are so many x265 users doing offline encodes with slower/veryslow preset.<br>Even though rd level 5 or 6 does not consider additional heuristics to perform rskip like rd levels 0 to 4, it still skips recursion based on residual from inter2Nx2N. <br>Hence I am interested to know why this algorithm doesn't support rd 5/6</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

<br>

 # Main12 intraCost overflow bug test<br>

 720p50_parkrun_ter.y4m,--preset medium<br>

diff -r 19f6ed165919 -r 3d60a9a728b3 source/x265.h<br>

--- a/source/x265.h     Mon Dec 30 11:58:44 2019 +0530<br>

+++ b/source/x265.h     Fri Dec 27 13:17:09 2019 +0530<br>

@@ -1859,6 +1859,9 @@<br>

<br>

     /* Enable HME search ranges for L0, L1 and L2 respectively. */<br>

     int       hmeRange[3];<br>

+<br>

+    /* Edge variance threshold for quad tree establishment. */<br>

+    double       edgeThreshold;<br></blockquote><div>[KS] Given the range it holds,why not float? </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

 } x265_param;<br>

<br>

 /* x265_param_alloc:<br>

diff -r 19f6ed165919 -r 3d60a9a728b3 source/x265cli.h<br>

--- a/source/x265cli.h  Mon Dec 30 11:58:44 2019 +0530<br>

+++ b/source/x265cli.h  Fri Dec 27 13:17:09 2019 +0530<br>

@@ -105,8 +105,8 @@<br>

     { "amp",                  no_argument, NULL, 0 },<br>

     { "no-early-skip",        no_argument, NULL, 0 },<br>

     { "early-skip",           no_argument, NULL, 0 },<br>

-    { "no-rskip",             no_argument, NULL, 0 },<br>

-    { "rskip",                no_argument, NULL, 0 },<br>

+    { "rskip",                required_argument, NULL, 0 },<br>

+    { "edge-threshold",       required_argument, NULL, 0 },<br>

     { "no-fast-cbf",          no_argument, NULL, 0 },<br>

     { "fast-cbf",             no_argument, NULL, 0 },<br>

     { "no-tskip",             no_argument, NULL, 0 },<br>

@@ -451,7 +451,8 @@<br>

     H0("   --[no-]ssim-rd                Enable ssim rate distortion optimization, 0 to disable. Default %s\n", OPT(param->bSsimRd));<br>

     H0("   --[no-]rd-refine              Enable QP based RD refinement for rd levels 5 and 6. Default %s\n", OPT(param->bEnableRdRefine));<br>

     H0("   --[no-]early-skip             Enable early SKIP detection. Default %s\n", OPT(param->bEnableEarlySkip));<br>

-    H0("   --[no-]rskip                  Enable early exit from recursion. Default %s\n", OPT(param->bEnableRecursionSkip));<br>

+    H0("   --rskip <mode>                Enable or disable early exit from recursion. Mode 0: Disabled. Mode 1: exit using rdcost. Mode 2: exit using edge density. Default %s\n", OPT(param->bEnableRecursionSkip));<br></blockquote><div>[KS] This is no longer on/off CLI to enable/disable. Keep the description concise </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

+    H1("   --edge-threshold              Threshold in terms of percentage for edge density in CUs to terminate the recursion depth. Applicable only for rskip mode 2. Default %s\n", OPT(param->edgeThreshold));<br>

     H1("   --[no-]tskip-fast             Enable fast intra transform skipping. Default %s\n", OPT(param->bEnableTSkipFast));<br>

     H1("   --[no-]splitrd-skip           Enable skipping split RD analysis when sum of split CU rdCost larger than one split CU rdCost for Intra CU. Default %s\n", OPT(param->bEnableSplitRdSkip));<br>

     H1("   --nr-intra <integer>          An integer value in range of 0 to 2000, which denotes strength of noise reduction in intra CUs. Default 0\n");<br>

_______________________________________________<br>

x265-devel mailing list<br>

<a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a><br>

<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>

</blockquote></div><br clear="all"><div><br></div>-- <br><div dir="ltr"><div dir="ltr"><div><div dir="ltr"><span style="color:rgb(0,0,0)">Regards,<br>Kavitha</span></div></div></div></div></div>