[x265] [PATCH] Edge Aware Quad Tree Establishment
    srikanth.kurapati at multicorewareinc.com 
    srikanth.kurapati at multicorewareinc.com
       
    Wed Feb 19 07:45:07 CET 2020
    
    
  
# HG changeset patch
# User Srikanth Kurapati
# Date 1580280547 -19800
#      Wed Jan 29 12:19:07 2020 +0530
# Node ID ecf19726600a3218c10eb28dcfded16d2a18c301
# Parent  c2769ac5fa9d6776317e720d19f00005f8b0eab4
Edge Aware Quad Tree Establishment.
This patch does the following:
1. Terminates recursion for quadtree establishment using edge information.
2. Adds modes for option "--rskip". Modes 0,1 for current usage and 2,3 for edge
 based recursion skip.
3. Adds option "rskip-edge-threshold" to decide recursion skip using CU edge density.
4. Re uses edge information when already available in encoder.
diff -r c2769ac5fa9d -r ecf19726600a doc/reST/cli.rst
--- a/doc/reST/cli.rst	Mon Feb 17 20:46:53 2020 +0530
+++ b/doc/reST/cli.rst	Wed Jan 29 12:19:07 2020 +0530
@@ -842,15 +842,31 @@
 	Measure 2Nx2N merge candidates first; if no residual is found, 
 	additional modes at that depth are not analysed. Default disabled
 
-.. option:: --rskip, --no-rskip
-
-	This option determines early exit from CU depth recursion. When a skip CU is
-	found, additional heuristics (depending on rd-level) are used to decide whether
-	to terminate recursion. In rdlevels 5 and 6, comparison with inter2Nx2N is used, 
-	while at rdlevels 4 and neighbour costs are used to skip recursion.
-	Provides minimal quality degradation at good performance gains when enabled. 
-
-	Default: enabled, disabled for :option:`--tune grain`
+.. option:: --rskip <0|1|2|3>
+
+	This option determines early exit from CU depth recursion in modes 1, 2 and 3. When a skip CU is
+	found, additional heuristics (depending on RD level and rskip mode) are used to decide whether
+	to terminate recursion. The following table summarizes the behavior.
+	
+	+----------+------------+----------------------------------------------------------------+
+	| RD Level | Rskip Mode |   Skip Recursion Heuristic                                     |
+	+==========+============+================================================================+
+	|   0 - 4  |      1     |   Neighbour costs.                                             |
+	+----------+------------+----------------------------------------------------------------+
+	|   5 - 6  |      1     |   Comparison with inter2Nx2N.                                  |
+	+----------+------------+----------------------------------------------------------------+
+	|   0 - 6  |      2     |   CU edge denstiy.                                             |
+	+----------+------------+----------------------------------------------------------------+
+	|   0 - 6  |      3     |   CU edge denstiy with forceful skip for lower levels of CTU.  |
+	+----------+------------+----------------------------------------------------------------+
+	
+	Provides minimal quality degradation at good performance gains for non-zero modes.
+	:option:`--rskip mode 0` means disabled. Default: 1, disabled when :option:`--tune grain` is used.
+
+.. option:: --rskip-edge-threshold <0..100>
+
+	Denotes the minimum expected edge-density percentage within the CU, below which the recursion is skipped.
+	Default: 5, requires :option:`--rskip mode 2|3` to be enabled.
 
 .. option:: --splitrd-skip, --no-splitrd-skip
 
diff -r c2769ac5fa9d -r ecf19726600a source/CMakeLists.txt
--- a/source/CMakeLists.txt	Mon Feb 17 20:46:53 2020 +0530
+++ b/source/CMakeLists.txt	Wed Jan 29 12:19:07 2020 +0530
@@ -29,7 +29,7 @@
 option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
 mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
 # X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 188)
+set(X265_BUILD 189)
 configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
                "${PROJECT_BINARY_DIR}/x265.def")
 configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r c2769ac5fa9d -r ecf19726600a source/common/common.h
--- a/source/common/common.h	Mon Feb 17 20:46:53 2020 +0530
+++ b/source/common/common.h	Wed Jan 29 12:19:07 2020 +0530
@@ -129,6 +129,7 @@
 typedef uint64_t sum2_t;
 typedef uint64_t pixel4;
 typedef int64_t  ssum2_t;
+#define SHIFT_TO_BITPLANE 9
 #define HISTOGRAM_BINS 1024
 #else
 typedef uint8_t  pixel;
@@ -136,6 +137,7 @@
 typedef uint32_t sum2_t;
 typedef uint32_t pixel4;
 typedef int32_t  ssum2_t; // Signed sum
+#define SHIFT_TO_BITPLANE 7
 #define HISTOGRAM_BINS 256
 #endif // if HIGH_BIT_DEPTH
 
@@ -270,6 +272,9 @@
 #define MAX_TR_SIZE (1 << MAX_LOG2_TR_SIZE)
 #define MAX_TS_SIZE (1 << MAX_LOG2_TS_SIZE)
 
+#define RDCOST_BASED_RSKIP 1
+#define EDGE_BASED_RSKIP 2
+
 #define COEF_REMAIN_BIN_REDUCTION   3 // indicates the level at which the VLC
                                       // transitions from Golomb-Rice to TU+EG(k)
 
diff -r c2769ac5fa9d -r ecf19726600a source/common/frame.cpp
--- a/source/common/frame.cpp	Mon Feb 17 20:46:53 2020 +0530
+++ b/source/common/frame.cpp	Wed Jan 29 12:19:07 2020 +0530
@@ -61,6 +61,8 @@
     m_edgePic = NULL;
     m_gaussianPic = NULL;
     m_thetaPic = NULL;
+    m_edgeBitPlane = NULL;
+    m_edgeBitPic = NULL;
 }
 
 bool Frame::create(x265_param *param, float* quantOffsets)
@@ -115,6 +117,19 @@
         m_thetaPic = X265_MALLOC(pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));
     }
 
+    if (param->enableRecursionSkip >= EDGE_BASED_RSKIP)
+    {
+        uint32_t numCuInWidth = (param->sourceWidth + param->maxCUSize - 1) / param->maxCUSize;
+        uint32_t numCuInHeight = (param->sourceHeight + param->maxCUSize - 1) / param->maxCUSize;
+        uint32_t lumaMarginX = param->maxCUSize + 32;
+        uint32_t lumaMarginY = param->maxCUSize + 16;
+        uint32_t stride = (numCuInWidth * param->maxCUSize) + (lumaMarginX << 1);
+        uint32_t maxHeight = numCuInHeight * param->maxCUSize;
+        uint32_t bitPlaneSize = stride * (maxHeight + (lumaMarginY * 2));
+        CHECKED_MALLOC_ZERO(m_edgeBitPlane, pixel, bitPlaneSize);
+        m_edgeBitPic = m_edgeBitPlane + lumaMarginY * stride + lumaMarginX;
+    }
+
     if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) && m_lowres.create(param, m_fencPic, param->rc.qgSize))
     {
         X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized");
@@ -267,4 +282,10 @@
         X265_FREE(m_gaussianPic);
         X265_FREE(m_thetaPic);
     }
+
+    if (m_param->enableRecursionSkip >= EDGE_BASED_RSKIP)
+    {
+        X265_FREE_ZERO(m_edgeBitPlane);
+        m_edgeBitPic = NULL;
+    }
 }
diff -r c2769ac5fa9d -r ecf19726600a source/common/frame.h
--- a/source/common/frame.h	Mon Feb 17 20:46:53 2020 +0530
+++ b/source/common/frame.h	Wed Jan 29 12:19:07 2020 +0530
@@ -99,7 +99,7 @@
     float*                 m_quantOffsets;       // points to quantOffsets in x265_picture
     x265_sei               m_userSEI;
     uint32_t               m_picStruct;          // picture structure SEI message
-    x265_dolby_vision_rpu            m_rpu;
+    x265_dolby_vision_rpu  m_rpu;
 
     /* Frame Parallelism - notification between FrameEncoders of available motion reference rows */
     ThreadSafeInteger*     m_reconRowFlag;       // flag of CTU rows completely reconstructed and extended for motion reference
@@ -137,6 +137,10 @@
     pixel*                 m_gaussianPic;
     pixel*                 m_thetaPic;
 
+    /* edge bit plane for rskips 2 and 3 */
+    pixel*                 m_edgeBitPlane;
+    pixel*                 m_edgeBitPic;
+
     Frame();
 
     bool create(x265_param *param, float* quantOffsets);
diff -r c2769ac5fa9d -r ecf19726600a source/common/param.cpp
--- a/source/common/param.cpp	Mon Feb 17 20:46:53 2020 +0530
+++ b/source/common/param.cpp	Wed Jan 29 12:19:07 2020 +0530
@@ -198,7 +198,8 @@
     param->bEnableWeightedPred = 1;
     param->bEnableWeightedBiPred = 0;
     param->bEnableEarlySkip = 1;
-    param->bEnableRecursionSkip = 1;
+    param->enableRecursionSkip = 1;
+    param->edgeVarThreshold = 0.05f;
     param->bEnableAMP = 0;
     param->bEnableRectInter = 0;
     param->rdLevel = 3;
@@ -546,7 +547,7 @@
             param->maxNumMergeCand = 5;
             param->searchMethod = X265_STAR_SEARCH;
             param->bEnableTransformSkip = 1;
-            param->bEnableRecursionSkip = 0;
+            param->enableRecursionSkip = 0;
             param->maxNumReferences = 5;
             param->limitReferences = 0;
             param->lookaheadSlices = 0; // disabled for best quality
@@ -598,7 +599,7 @@
             param->rc.hevcAq = 0;
             param->rc.qpStep = 1;
             param->rc.bEnableGrain = 1;
-            param->bEnableRecursionSkip = 0;
+            param->enableRecursionSkip = 0;
             param->psyRd = 4.0;
             param->psyRdoq = 10.0;
             param->bEnableSAO = 0;
@@ -702,8 +703,9 @@
     OPT("ref") p->maxNumReferences = atoi(value);
     OPT("fast-intra") p->bEnableFastIntra = atobool(value);
     OPT("early-skip") p->bEnableEarlySkip = atobool(value);
-    OPT("rskip") p->bEnableRecursionSkip = atobool(value);
-    OPT("me")p->searchMethod = parseName(value, x265_motion_est_names, bError);
+    OPT("rskip") p->enableRecursionSkip = atoi(value);
+    OPT("rskip-edge-threshold") p->edgeVarThreshold = atoi(value)/100.0f;
+    OPT("me") p->searchMethod = parseName(value, x265_motion_est_names, bError);
     OPT("subme") p->subpelRefine = atoi(value);
     OPT("merange") p->searchRange = atoi(value);
     OPT("rect") p->bEnableRectInter = atobool(value);
@@ -919,7 +921,7 @@
     OPT("max-merge") p->maxNumMergeCand = (uint32_t)atoi(value);
     OPT("temporal-mvp") p->bEnableTemporalMvp = atobool(value);
     OPT("early-skip") p->bEnableEarlySkip = atobool(value);
-    OPT("rskip") p->bEnableRecursionSkip = atobool(value);
+    OPT("rskip") p->enableRecursionSkip = atoi(value);
     OPT("rdpenalty") p->rdPenalty = atoi(value);
     OPT("tskip") p->bEnableTransformSkip = atobool(value);
     OPT("no-tskip-fast") p->bEnableTSkipFast = atobool(value);
@@ -1221,6 +1223,7 @@
             }
         }
         OPT("hist-threshold") p->edgeTransitionThreshold = atof(value);
+        OPT("rskip-edge-threshold") p->edgeVarThreshold = atoi(value)/100.0f;
         OPT("lookahead-threads") p->lookaheadThreads = atoi(value);
         OPT("opt-cu-delta-qp") p->bOptCUDeltaQP = atobool(value);
         OPT("multi-pass-opt-analysis") p->analysisMultiPassRefine = atobool(value);
@@ -1596,9 +1599,16 @@
     CHECK(param->rdLevel < 1 || param->rdLevel > 6,
           "RD Level is out of range");
     CHECK(param->rdoqLevel < 0 || param->rdoqLevel > 2,
-        "RDOQ Level is out of range");
+          "RDOQ Level is out of range");
     CHECK(param->dynamicRd < 0 || param->dynamicRd > x265_ADAPT_RD_STRENGTH,
-        "Dynamic RD strength must be between 0 and 4");
+          "Dynamic RD strength must be between 0 and 4");
+    CHECK(param->enableRecursionSkip > 3 || param->enableRecursionSkip < 0,
+          "Invalid Recursion skip mode. Valid modes 0,1,2,3");
+    if (param->enableRecursionSkip >= EDGE_BASED_RSKIP)
+    {
+        CHECK(param->edgeVarThreshold < 0.0f || param->edgeVarThreshold > 1.0f,
+              "Minimum edge density percentage for a CU should be an integer between 0 to 100");
+    }
     CHECK(param->bframes && param->bframes >= param->lookaheadDepth && !param->rc.bStatRead,
           "Lookahead depth must be greater than the max consecutive bframe count");
     CHECK(param->bframes < 0,
@@ -1908,7 +1918,9 @@
     TOOLVAL(param->psyRdoq, "psy-rdoq=%.2lf");
     TOOLOPT(param->bEnableRdRefine, "rd-refine");
     TOOLOPT(param->bEnableEarlySkip, "early-skip");
-    TOOLOPT(param->bEnableRecursionSkip, "rskip");
+    TOOLVAL(param->enableRecursionSkip, "rskip mode=%d");
+    if (param->enableRecursionSkip >= EDGE_BASED_RSKIP)
+        TOOLVAL(param->edgeVarThreshold, "rskip-edge-threshold=%.2f");
     TOOLOPT(param->bEnableSplitRdSkip, "splitrd-skip");
     TOOLVAL(param->noiseReductionIntra, "nr-intra=%d");
     TOOLVAL(param->noiseReductionInter, "nr-inter=%d");
@@ -2066,7 +2078,10 @@
     s += sprintf(s, " rd=%d", p->rdLevel);
     s += sprintf(s, " selective-sao=%d", p->selectiveSAO);
     BOOL(p->bEnableEarlySkip, "early-skip");
-    BOOL(p->bEnableRecursionSkip, "rskip");
+    BOOL(p->enableRecursionSkip, "rskip");
+    if (p->enableRecursionSkip >= EDGE_BASED_RSKIP)
+        s += sprintf(s, " rskip-edge-threshold=%f", p->edgeVarThreshold);
+
     BOOL(p->bEnableFastIntra, "fast-intra");
     BOOL(p->bEnableTSkipFast, "tskip-fast");
     BOOL(p->bCULossless, "cu-lossless");
@@ -2373,7 +2388,8 @@
     dst->bSaoNonDeblocked = src->bSaoNonDeblocked;
     dst->rdLevel = src->rdLevel;
     dst->bEnableEarlySkip = src->bEnableEarlySkip;
-    dst->bEnableRecursionSkip = src->bEnableRecursionSkip;
+    dst->enableRecursionSkip = src->enableRecursionSkip;
+    dst->edgeVarThreshold = src->edgeVarThreshold;
     dst->bEnableFastIntra = src->bEnableFastIntra;
     dst->bEnableTSkipFast = src->bEnableTSkipFast;
     dst->bCULossless = src->bCULossless;
diff -r c2769ac5fa9d -r ecf19726600a source/common/pixel.cpp
--- a/source/common/pixel.cpp	Mon Feb 17 20:46:53 2020 +0530
+++ b/source/common/pixel.cpp	Wed Jan 29 12:19:07 2020 +0530
@@ -876,6 +876,18 @@
     }
 }
 
+static void planecopy_pp_shr_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift)
+{
+    for (int r = 0; r < height; r++)
+    {
+        for (int c = 0; c < width; c++)
+            dst[c] = (pixel)((src[c] >> shift));
+
+        dst += dstStride;
+        src += srcStride;
+    }
+}
+
 static void planecopy_sp_shl_c(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask)
 {
     for (int r = 0; r < height; r++)
@@ -1316,6 +1328,7 @@
     p.planecopy_cp = planecopy_cp_c;
     p.planecopy_sp = planecopy_sp_c;
     p.planecopy_sp_shl = planecopy_sp_shl_c;
+    p.planecopy_pp_shr = planecopy_pp_shr_c;
 #if HIGH_BIT_DEPTH
     p.planeClipAndMax = planeClipAndMax_c;
 #endif
diff -r c2769ac5fa9d -r ecf19726600a source/common/primitives.h
--- a/source/common/primitives.h	Mon Feb 17 20:46:53 2020 +0530
+++ b/source/common/primitives.h	Wed Jan 29 12:19:07 2020 +0530
@@ -204,6 +204,7 @@
 typedef void (*sign_t)(int8_t *dst, const pixel *src1, const pixel *src2, const int endX);
 typedef void (*planecopy_cp_t) (const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);
 typedef void (*planecopy_sp_t) (const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);
+typedef void (*planecopy_pp_t) (const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);
 typedef pixel (*planeClipAndMax_t)(pixel *src, intptr_t stride, int width, int height, uint64_t *outsum, const pixel minPix, const pixel maxPix);
 
 typedef void (*cutree_propagate_cost) (int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts, const int32_t* invQscales, const double* fpsFactor, int len);
@@ -358,6 +359,7 @@
     planecopy_cp_t        planecopy_cp;
     planecopy_sp_t        planecopy_sp;
     planecopy_sp_t        planecopy_sp_shl;
+    planecopy_pp_t        planecopy_pp_shr;
     planeClipAndMax_t     planeClipAndMax;
 
     weightp_sp_t          weight_sp;
diff -r c2769ac5fa9d -r ecf19726600a source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Mon Feb 17 20:46:53 2020 +0530
+++ b/source/encoder/analysis.cpp	Wed Jan 29 12:19:07 2020 +0530
@@ -1272,7 +1272,7 @@
                     md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
                     checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
 
-                    skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode;
+                    skipRecursion = !!m_param->enableRecursionSkip && md.bestMode;
                     if (m_param->rdLevel)
                         skipModes = m_param->bEnableEarlySkip && md.bestMode;
                 }
@@ -1296,7 +1296,7 @@
                     md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
                     checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
 
-                    skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode;
+                    skipRecursion = !!m_param->enableRecursionSkip && md.bestMode;
                     if (m_param->rdLevel)
                         skipModes = m_param->bEnableEarlySkip && md.bestMode;
                 }
@@ -1314,15 +1314,24 @@
                 skipModes = (m_param->bEnableEarlySkip || m_refineLevel == 2)
                 && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth
         }
-        if (md.bestMode && m_param->bEnableRecursionSkip && !bCtuInfoCheck && !(m_param->bAnalysisType == AVC_INFO && m_param->analysisLoadReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1])))
+        if (md.bestMode && m_param->enableRecursionSkip && !bCtuInfoCheck && !(m_param->bAnalysisType == AVC_INFO && m_param->analysisLoadReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1])))
         {
             skipRecursion = md.bestMode->cu.isSkipped(0);
-            if (mightSplit && depth >= minDepth && !skipRecursion)
+            if (mightSplit && !skipRecursion)
             {
-                if (depth)
-                    skipRecursion = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);
-                if (m_bHD && !skipRecursion && m_param->rdLevel == 2 && md.fencYuv.m_size != MAX_CU_SIZE)
+                if (depth >= minDepth && m_param->enableRecursionSkip == RDCOST_BASED_RSKIP)
+                {
+                    if (depth)
+                        skipRecursion = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);
+                    if (m_bHD && !skipRecursion && m_param->rdLevel == 2 && md.fencYuv.m_size != MAX_CU_SIZE)
+                        skipRecursion = complexityCheckCU(*md.bestMode);
+                }
+                else if (cuGeom.log2CUSize >= MAX_LOG2_CU_SIZE - 1 && m_param->enableRecursionSkip >= EDGE_BASED_RSKIP)
+                {
                     skipRecursion = complexityCheckCU(*md.bestMode);
+                }
+                else if (m_param->enableRecursionSkip > EDGE_BASED_RSKIP)
+                    skipRecursion = true;
             }
         }
         if (m_param->bAnalysisType == AVC_INFO && md.bestMode && cuGeom.numPartitions <= 16 && m_param->analysisLoadReuseLevel == 7)
@@ -1972,7 +1981,7 @@
                     checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
                     checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
 
-                    if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode)
+                    if (m_param->enableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode)
                         skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
                 }
                 if (m_param->analysisLoadReuseLevel > 4 && m_reusePartSize[cuGeom.absPartIdx] == SIZE_2Nx2N)
@@ -1996,7 +2005,7 @@
                     checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
                     checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
 
-                    if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode)
+                    if (m_param->enableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode)
                         skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
                 }
             }
@@ -2015,8 +2024,12 @@
             checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
             checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
 
-            if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode)
+            if (m_param->enableRecursionSkip == RDCOST_BASED_RSKIP && depth && m_modeDepth[depth - 1].bestMode)
                 skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
+            else if (cuGeom.log2CUSize >= MAX_LOG2_CU_SIZE - 1 && m_param->enableRecursionSkip >= EDGE_BASED_RSKIP)
+                skipRecursion = md.bestMode && complexityCheckCU(*md.bestMode);
+            else if (m_param->enableRecursionSkip > EDGE_BASED_RSKIP)
+                skipRecursion = true;
         }
         if (m_param->bAnalysisType == AVC_INFO && md.bestMode && cuGeom.numPartitions <= 16 && m_param->analysisLoadReuseLevel == 7)
             skipRecursion = true;
@@ -3525,27 +3538,47 @@
 
 bool Analysis::complexityCheckCU(const Mode& bestMode)
 {
-    uint32_t mean = 0;
-    uint32_t homo = 0;
-    uint32_t cuSize = bestMode.fencYuv->m_size;
-    for (uint32_t y = 0; y < cuSize; y++) {
-        for (uint32_t x = 0; x < cuSize; x++) {
-            mean += (bestMode.fencYuv->m_buf[0][y * cuSize + x]);
+    if (m_param->enableRecursionSkip == RDCOST_BASED_RSKIP)
+    {
+        uint32_t mean = 0;
+        uint32_t homo = 0;
+        uint32_t cuSize = bestMode.fencYuv->m_size;
+        for (uint32_t y = 0; y < cuSize; y++) {
+            for (uint32_t x = 0; x < cuSize; x++) {
+                mean += (bestMode.fencYuv->m_buf[0][y * cuSize + x]);
+            }
         }
+        mean = mean / (cuSize * cuSize);
+        for (uint32_t y = 0; y < cuSize; y++) {
+            for (uint32_t x = 0; x < cuSize; x++) {
+                homo += abs(int(bestMode.fencYuv->m_buf[0][y * cuSize + x] - mean));
+            }
+        }
+        homo = homo / (cuSize * cuSize);
+
+        if (homo < (.1 * mean))
+            return true;
+
+        return false;
     }
-    mean = mean / (cuSize * cuSize);
-    for (uint32_t y = 0 ; y < cuSize; y++){
-        for (uint32_t x = 0 ; x < cuSize; x++){
-            homo += abs(int(bestMode.fencYuv->m_buf[0][y * cuSize + x] - mean));
-        }
+    else
+    {
+        int blockType = bestMode.cu.m_log2CUSize[0] - 2;
+        int shift = bestMode.cu.m_log2CUSize[0] * 2;
+        intptr_t stride = m_frame->m_fencPic->m_stride;
+        intptr_t blockOffsetLuma = bestMode.cu.m_cuPelX + bestMode.cu.m_cuPelY * stride;
+        uint64_t sum_ss = primitives.cu[blockType].var(m_frame->m_edgeBitPic + blockOffsetLuma, stride);
+        uint32_t sum = (uint32_t)sum_ss;
+        uint32_t ss = (uint32_t)(sum_ss >> 32);
+        uint32_t pixelCount = 1 << shift;
+        double cuEdgeVariance = (ss - ((double)sum * sum / pixelCount)) / pixelCount;
+
+        if (cuEdgeVariance > (double)m_param->edgeVarThreshold)
+            return false;
+        else
+            return true;
     }
-    homo = homo / (cuSize * cuSize);
-
-    if (homo < (.1 * mean))
-        return true;
-
-    return false;
-}
+ }
 
 uint32_t Analysis::calculateCUVariance(const CUData& ctu, const CUGeom& cuGeom)
 {
@@ -3570,7 +3603,6 @@
             cnt++;
         }
     }
-    
     return cuVariance / cnt;
 }
 
diff -r c2769ac5fa9d -r ecf19726600a source/encoder/analysis.h
--- a/source/encoder/analysis.h	Mon Feb 17 20:46:53 2020 +0530
+++ b/source/encoder/analysis.h	Wed Jan 29 12:19:07 2020 +0530
@@ -52,7 +52,7 @@
         splitRefs = 0;
         mvCost[0] = 0; // L0
         mvCost[1] = 0; // L1
-        sa8dCost    = 0;
+        sa8dCost  = 0;
     }
 };
 
@@ -120,7 +120,6 @@
 
     Mode& compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext);
     int32_t loadTUDepth(CUGeom cuGeom, CUData parentCTU);
-
 protected:
     /* Analysis data for save/load mode, writes/reads data based on absPartIdx */
     x265_analysis_inter_data*  m_reuseInterDataCTU;
diff -r c2769ac5fa9d -r ecf19726600a source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Mon Feb 17 20:46:53 2020 +0530
+++ b/source/encoder/encoder.cpp	Wed Jan 29 12:19:07 2020 +0530
@@ -1443,9 +1443,9 @@
     int32_t planeCount = x265_cli_csps[m_param->internalCsp].planes;
     memset(m_edgePic, 0, bufSize);
 
-    if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, pic->width, false))
-    {
-        x265_log(m_param, X265_LOG_ERROR, "Failed edge computation!");
+    if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, pic->width, false, 1))
+    {
+        x265_log(m_param, X265_LOG_ERROR, "Failed to compute edge!");
         return false;
     }
 
@@ -1752,6 +1752,12 @@
                         }
                     }
                 }
+                if (m_param->enableRecursionSkip >= EDGE_BASED_RSKIP && m_param->bHistBasedSceneCut)
+                {
+                    pixel* src = m_edgePic;
+                    primitives.planecopy_pp_shr(src, inFrame->m_fencPic->m_picWidth, inFrame->m_edgeBitPic, inFrame->m_fencPic->m_stride,
+                        inFrame->m_fencPic->m_picWidth, inFrame->m_fencPic->m_picHeight, 0);
+                }
             }
             else
             {
@@ -2414,7 +2420,7 @@
         encParam->maxNumReferences = param->maxNumReferences; // never uses more refs than specified in stream headers
         encParam->bEnableFastIntra = param->bEnableFastIntra;
         encParam->bEnableEarlySkip = param->bEnableEarlySkip;
-        encParam->bEnableRecursionSkip = param->bEnableRecursionSkip;
+        encParam->enableRecursionSkip = param->enableRecursionSkip;
         encParam->searchMethod = param->searchMethod;
         /* Scratch buffer prevents me_range from being increased for esa/tesa */
         if (param->searchRange < encParam->searchRange)
@@ -3400,7 +3406,7 @@
         p->maxNumReferences = zone->maxNumReferences;
         p->bEnableFastIntra = zone->bEnableFastIntra;
         p->bEnableEarlySkip = zone->bEnableEarlySkip;
-        p->bEnableRecursionSkip = zone->bEnableRecursionSkip;
+        p->enableRecursionSkip = zone->enableRecursionSkip;
         p->searchMethod = zone->searchMethod;
         p->searchRange = zone->searchRange;
         p->subpelRefine = zone->subpelRefine;
@@ -5701,7 +5707,7 @@
     TOOLCMP(oldParam->maxNumReferences, newParam->maxNumReferences, "ref=%d to %d\n");
     TOOLCMP(oldParam->bEnableFastIntra, newParam->bEnableFastIntra, "fast-intra=%d to %d\n");
     TOOLCMP(oldParam->bEnableEarlySkip, newParam->bEnableEarlySkip, "early-skip=%d to %d\n");
-    TOOLCMP(oldParam->bEnableRecursionSkip, newParam->bEnableRecursionSkip, "rskip=%d to %d\n");
+    TOOLCMP(oldParam->enableRecursionSkip, newParam->enableRecursionSkip, "rskip=%d to %d\n");
     TOOLCMP(oldParam->searchMethod, newParam->searchMethod, "me=%d to %d\n");
     TOOLCMP(oldParam->searchRange, newParam->searchRange, "merange=%d to %d\n");
     TOOLCMP(oldParam->subpelRefine, newParam->subpelRefine, "subme= %d to %d\n");
diff -r c2769ac5fa9d -r ecf19726600a source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Mon Feb 17 20:46:53 2020 +0530
+++ b/source/encoder/frameencoder.cpp	Wed Jan 29 12:19:07 2020 +0530
@@ -130,7 +130,7 @@
         {
             rowSum += sliceGroupSizeAccu;
             m_sliceBaseRow[++sidx] = i;
-        }        
+        }
     }
     X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
     m_sliceBaseRow[0] = 0;
@@ -448,6 +448,18 @@
     m_ssimCnt = 0;
     memset(&(m_frame->m_encData->m_frameStats), 0, sizeof(m_frame->m_encData->m_frameStats));
 
+    if (!m_param->bHistBasedSceneCut && m_param->rc.aqMode != X265_AQ_EDGE && m_param->enableRecursionSkip >= EDGE_BASED_RSKIP)
+    {
+        int height = m_frame->m_fencPic->m_picHeight;
+        int width = m_frame->m_fencPic->m_picWidth;
+        intptr_t stride = m_frame->m_fencPic->m_stride;
+
+        if (!computeEdge(m_frame->m_edgeBitPic, m_frame->m_fencPic->m_picOrg[0], NULL, stride, height, width, false, 1))
+        {
+            x265_log(m_param, X265_LOG_ERROR, " Failed to compute edge !");
+        }
+    }
+
     /* Emit access unit delimiter unless this is the first frame and the user is
      * not repeating headers (since AUD is supposed to be the first NAL in the access
      * unit) */
diff -r c2769ac5fa9d -r ecf19726600a source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp	Mon Feb 17 20:46:53 2020 +0530
+++ b/source/encoder/slicetype.cpp	Wed Jan 29 12:19:07 2020 +0530
@@ -87,7 +87,7 @@
 
 namespace X265_NS {
 
-bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta)
+bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel)
 {
     intptr_t rowOne = 0, rowTwo = 0, rowThree = 0, colOne = 0, colTwo = 0, colThree = 0;
     intptr_t middle = 0, topLeft = 0, topRight = 0, bottomLeft = 0, bottomRight = 0;
@@ -141,7 +141,7 @@
                        theta = 180 + theta;
                     edgeTheta[middle] = (pixel)theta;
                 }
-                edgePic[middle] = (pixel)(gradientMagnitude >= edgeThreshold ? edgeThreshold : blackPixel);
+                edgePic[middle] = (pixel)(gradientMagnitude >= EDGE_THRESHOLD ? whitePixel : blackPixel);
             }
         }
         return true;
@@ -519,6 +519,13 @@
                 if (param->rc.aqMode == X265_AQ_EDGE)
                     edgeFilter(curFrame, param);
 
+                if (param->rc.aqMode == X265_AQ_EDGE && !param->bHistBasedSceneCut && param->enableRecursionSkip >= EDGE_BASED_RSKIP)
+                {
+                    pixel* src = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
+                    primitives.planecopy_pp_shr(src, curFrame->m_fencPic->m_stride, curFrame->m_edgeBitPic,
+                        curFrame->m_fencPic->m_stride, curFrame->m_fencPic->m_picWidth, curFrame->m_fencPic->m_picHeight, SHIFT_TO_BITPLANE);
+                }
+
                 if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED || param->rc.aqMode == X265_AQ_EDGE)
                 {
                     double bit_depth_correction = 1.f / (1 << (2 * (X265_DEPTH - 8)));
diff -r c2769ac5fa9d -r ecf19726600a source/encoder/slicetype.h
--- a/source/encoder/slicetype.h	Mon Feb 17 20:46:53 2020 +0530
+++ b/source/encoder/slicetype.h	Wed Jan 29 12:19:07 2020 +0530
@@ -44,9 +44,9 @@
 #define EDGE_INCLINATION 45
 
 #if HIGH_BIT_DEPTH
-#define edgeThreshold 1023.0
+#define EDGE_THRESHOLD 1023.0
 #else
-#define edgeThreshold 255.0
+#define EDGE_THRESHOLD 255.0
 #endif
 #define PI 3.14159265
 
@@ -101,7 +101,7 @@
 protected:
 
     uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp, uint32_t qgSize);
-    uint32_t edgeDensityCu(Frame*curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize);
+    uint32_t edgeDensityCu(Frame* curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize);
     uint32_t lumaSumCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, uint32_t qgSize);
     uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp);
     bool     allocWeightedRef(Lowres& fenc);
@@ -265,7 +265,6 @@
     CostEstimateGroup& operator=(const CostEstimateGroup&);
 };
 
-bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta);
-
+bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel = EDGE_THRESHOLD);
 }
 #endif // ifndef X265_SLICETYPE_H
diff -r c2769ac5fa9d -r ecf19726600a source/test/regression-tests.txt
--- a/source/test/regression-tests.txt	Mon Feb 17 20:46:53 2020 +0530
+++ b/source/test/regression-tests.txt	Wed Jan 29 12:19:07 2020 +0530
@@ -162,7 +162,15 @@
 sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut --hist-threshold 0.02 --frame-dup --dup-threshold 60 --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000
 sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut --hist-threshold 0.02
 sintel_trailer_2k_1920x1080_24.yuv, --preset ultrafast --hist-scenecut --hist-threshold 0.02
-
+crowd_run_1080p50.yuv, --preset faster --ctu 32 --rskip 2 --rskip-edge-threshold 5
+crowd_run_1080p50.yuv, --preset fast --ctu 64 --rskip 2 --rskip-edge-threshold 5 --aq-mode 4
+crowd_run_1080p50.yuv, --preset slow --ctu 32 --rskip 2 --rskip-edge-threshold 5 --hist-scenecut --hist-threshold 0.1
+crowd_run_1080p50.yuv, --preset slower --ctu 16 --rskip 2 --rskip-edge-threshold 5 --hist-scenecut --hist-threshold 0.1 --aq-mode 4
+crowd_run_1080p50.yuv, --preset faster --ctu 32 --rskip 3 --rskip-edge-threshold 5
+crowd_run_1080p50.yuv, --preset fast --ctu 64 --rskip 3 --rskip-edge-threshold 5 --aq-mode 4
+crowd_run_1080p50.yuv, --preset slow --ctu 32 --rskip 3 --rskip-edge-threshold 5 --hist-scenecut --hist-threshold 0.1
+crowd_run_1080p50.yuv, --preset slower --ctu 16 --rskip 3 --rskip-edge-threshold 5 --hist-scenecut --hist-threshold 0.1 --aq-mode 4
+ 
 # Main12 intraCost overflow bug test
 720p50_parkrun_ter.y4m,--preset medium
 
diff -r c2769ac5fa9d -r ecf19726600a source/x265.h
--- a/source/x265.h	Mon Feb 17 20:46:53 2020 +0530
+++ b/source/x265.h	Wed Jan 29 12:19:07 2020 +0530
@@ -1255,9 +1255,9 @@
      * skip blocks. Default is disabled */
     int       bEnableEarlySkip;
 
-    /* Enable early CU size decisions to avoid recursing to higher depths. 
+    /* Enable early CU size decisions to avoid recursing to higher depths.
      * Default is enabled */
-    int bEnableRecursionSkip;
+    int       enableRecursionSkip;
 
     /* Use a faster search method to find the best intra mode. Default is 0 */
     int       bEnableFastIntra;
@@ -1857,7 +1857,7 @@
     double    edgeTransitionThreshold;
 
     /* Enables histogram based scenecut detection algorithm to detect scenecuts. Default disabled */
-    int      bHistBasedSceneCut;
+    int       bHistBasedSceneCut;
 
     /* Enable HME search ranges for L0, L1 and L2 respectively. */
     int       hmeRange[3];
@@ -1874,7 +1874,7 @@
     * analysis information stored in analysis-save. Higher the refine level higher
     * the information stored. Default is 5 */
     int       analysisSaveReuseLevel;
-    
+
     /* A value between 1 and 10 (both inclusive) determines the level of
     * analysis information reused in analysis-load. Higher the refine level higher
     * the information reused. Default is 5 */
@@ -1901,6 +1901,9 @@
     * info is available from the corresponding analysis-save. */
 
     int      confWinBottomOffset;
+
+    /* Edge variance threshold for quad tree establishment. */
+    float    edgeVarThreshold;
 } x265_param;
 
 /* x265_param_alloc:
diff -r c2769ac5fa9d -r ecf19726600a source/x265cli.h
--- a/source/x265cli.h	Mon Feb 17 20:46:53 2020 +0530
+++ b/source/x265cli.h	Wed Jan 29 12:19:07 2020 +0530
@@ -105,8 +105,8 @@
     { "amp",                  no_argument, NULL, 0 },
     { "no-early-skip",        no_argument, NULL, 0 },
     { "early-skip",           no_argument, NULL, 0 },
-    { "no-rskip",             no_argument, NULL, 0 },
-    { "rskip",                no_argument, NULL, 0 },
+    { "rskip",                required_argument, NULL, 0 },
+    { "rskip-edge-threshold", required_argument, NULL, 0 },
     { "no-fast-cbf",          no_argument, NULL, 0 },
     { "fast-cbf",             no_argument, NULL, 0 },
     { "no-tskip",             no_argument, NULL, 0 },
@@ -457,7 +457,9 @@
     H0("   --[no-]ssim-rd                Enable ssim rate distortion optimization, 0 to disable. Default %s\n", OPT(param->bSsimRd));
     H0("   --[no-]rd-refine              Enable QP based RD refinement for rd levels 5 and 6. Default %s\n", OPT(param->bEnableRdRefine));
     H0("   --[no-]early-skip             Enable early SKIP detection. Default %s\n", OPT(param->bEnableEarlySkip));
-    H0("   --[no-]rskip                  Enable early exit from recursion. Default %s\n", OPT(param->bEnableRecursionSkip));
+    H0("   --rskip <mode>                Set mode for early exit from recursion. Mode 1: exit using rdcost. Mode 2: exit using edge density. Mode 3: exit using edge density with forceful skip for small sized CU's."
+       "                                          Mode 0: disabled. Default %s\n", OPT(param->enableRecursionSkip));
+    H1("   --rskip-edge-threshold        Threshold in terms of percentage (integer of range [0,100]) for minimum edge density in CUs to prun the recursion depth. Applicable only for rskip modes 2 and 3. Default: %.f\n", param->edgeVarThreshold*100.0f);
     H1("   --[no-]tskip-fast             Enable fast intra transform skipping. Default %s\n", OPT(param->bEnableTSkipFast));
     H1("   --[no-]splitrd-skip           Enable skipping split RD analysis when sum of split CU rdCost larger than one split CU rdCost for Intra CU. Default %s\n", OPT(param->bEnableSplitRdSkip));
     H1("   --nr-intra <integer>          An integer value in range of 0 to 2000, which denotes strength of noise reduction in intra CUs. Default 0\n");
-------------- next part --------------
A non-text attachment was scrubbed...
Name: x265-default.patch
Type: text/x-patch
Size: 36992 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20200219/9a79ee55/attachment-0001.bin>
    
    
More information about the x265-devel
mailing list