[x265] [PATCH] intra: skip RD analysis when sum of sub CUsplitcostbigger than non-split cost

Sat Aug 12 19:43:27 CEST 2017

Thanks for this additional explanation, and thanks again for your
contribution!



*From:* x265-devel [mailto:x265-devel-bounces at videolan.org] *On Behalf
Of *Ximing
Cheng
*Sent:* Friday, August 11, 2017 12:32 PM
*To:* Ximing Cheng
*Subject:* Re: [x265] [PATCH] intra: skip RD analysis when sum of sub
CUsplitcostbigger than non-split cost



In fact, this skip is not a fast skip algorithm.

As the sum of split cost is larger than none split CU's best cost (both
rdcost of sub-cu and none split CU are without split flag cost), which
means splitting into 4 parts at this depth of cu is a worse case compared
with none split CU. So that, the remain N * 1/4 parts of CU analysis is
useless.



....................

.    A   .    B   .

.         .         .

....................

.    C   .    D   .

.         .         .

....................   (A B C D is the 4 parts of a CU)

If sum of sub CU split cost(A_Cost + B_Cost) larger than non-split
cost(NSCost), assume  NSCost < A_Cost + B_Cost, the remain parts (C, D)
continue to analysis rd.

C_Cost + D_Cost >= 0 --->

NSCost < A_Cost + B_Cost + C_Cost + D_Cost ---> (likely that)

NSCost + splitCost(splitflag = 0) < A_Cost + B_Cost + C_Cost + D_Cost +
splitCost(splitflag = 1)  ---> choose none split



So, C and D rd analysis can be skipped.

So in my test cases, the MD5 checksum of the output bitstream is the same
with the original after this skip.



------------------ Original ------------------

*From: * "Ximing Cheng";<chengximing1989 at foxmail.com>;

*Send time:* Friday, Aug 4, 2017 1:56 AM

*To:* "x265-devel"<x265-devel at videolan.org>;

*Subject: * [x265] [PATCH] intra: skip RD analysis when sum of sub
CUsplitcostbigger than non-split cost



# HG changeset patch
# User Ximing Cheng <ximingcheng at tencent.com>
# Date 1501782508 -28800
#      Fri Aug 04 01:48:28 2017 +0800
# Node ID 5943a1f73d5814a3a723f814a4dd0635b1fe2b35
# Parent  d11482e5fedbcdaf62ee3c6872f43827d99ad181
intra: skip RD analysis when sum of sub CUsplitcost bigger than non-split
cost

diff -r d11482e5fedb -r 5943a1f73d58 source/CMakeLists.txt

--- a/source/CMakeLists.txt Mon Jul 24 11:15:38 2017 +0530
+++ b/source/CMakeLists.txt Fri Aug 04 01:48:28 2017 +0800
@@ -29,7 +29,7 @@
 option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
 mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
 # X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 131)
+set(X265_BUILD 132)
 configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
                "${PROJECT_BINARY_DIR}/x265.def")
 configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r d11482e5fedb -r 5943a1f73d58 source/common/param.cpp
--- a/source/common/param.cpp Mon Jul 24 11:15:38 2017 +0530
+++ b/source/common/param.cpp Fri Aug 04 01:48:28 2017 +0800
@@ -157,6 +157,7 @@
     param->bEnableConstrainedIntra = 0;
     param->bEnableStrongIntraSmoothing = 1;
     param->bEnableFastIntra = 0;
+    param->bEnableSplitRdSkip = 0;

     /* Inter Coding tools */
     param->searchMethod = X265_HEX_SEARCH;
@@ -975,6 +976,7 @@
         OPT("refine-inter")p->interRefine = atobool(value);
         OPT("refine-mv")p->mvRefine = atobool(value);
         OPT("force-flush")p->forceFlush = atoi(value);
+        OPT("splitrd-skip") p->bEnableSplitRdSkip = atobool(value);
         else
             return X265_PARAM_BAD_NAME;
     }
@@ -1431,6 +1433,7 @@
     TOOLOPT(param->bEnableRdRefine, "rd-refine");
     TOOLOPT(param->bEnableEarlySkip, "early-skip");
     TOOLOPT(param->bEnableRecursionSkip, "rskip");
+    TOOLOPT(param->bEnableSplitRdSkip, "splitrd-skip");
     TOOLVAL(param->noiseReductionIntra, "nr-intra=%d");
     TOOLVAL(param->noiseReductionInter, "nr-inter=%d");
     TOOLOPT(param->bEnableTSkipFast, "tskip-fast");
@@ -1560,6 +1563,7 @@
     BOOL(p->bEnableTSkipFast, "tskip-fast");
     BOOL(p->bCULossless, "cu-lossless");
     BOOL(p->bIntraInBFrames, "b-intra");
+    BOOL(p->bEnableSplitRdSkip, "splitrd-skip");
     s += sprintf(s, " rdpenalty=%d", p->rdPenalty);
     s += sprintf(s, " psy-rd=%.2f", p->psyRd);
     s += sprintf(s, " psy-rdoq=%.2f", p->psyRdoq);
diff -r d11482e5fedb -r 5943a1f73d58 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Mon Jul 24 11:15:38 2017 +0530
+++ b/source/encoder/analysis.cpp Fri Aug 04 01:48:28 2017 +0800
@@ -485,7 +485,7 @@
     md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic,
parentCTU.m_cuAddr, cuGeom.absPartIdx);
 }

-void Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom&
cuGeom, int32_t qp)
+uint64_t Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom&
cuGeom, int32_t qp)
 {
     uint32_t depth = cuGeom.depth;
     ModeDepth& md = m_modeDepth[depth];
@@ -560,6 +560,8 @@
         invalidateContexts(nextDepth);
         Entropy* nextContext = &m_rqt[depth].cur;
         int32_t nextQP = qp;
+        uint64_t curCost = 0;
+        int skipSplitCheck = 0;

         for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
         {
@@ -572,7 +574,17 @@
                 if (m_slice->m_pps->bUseDQP && nextDepth <=
m_slice->m_pps->maxCuDQPDepth)
                     nextQP = setLambdaFromQP(parentCTU,
calculateQpforCuSize(parentCTU, childGeom));

-                compressIntraCU(parentCTU, childGeom, nextQP);
+                if (m_param->bEnableSplitRdSkip)
+                {
+                    curCost += compressIntraCU(parentCTU, childGeom,
nextQP);
+                    if (m_modeDepth[depth].bestMode && curCost >
m_modeDepth[depth].bestMode->rdCost)
+                    {
+                        skipSplitCheck = 1;
+                        break;
+                    }
+                }
+                else
+                    compressIntraCU(parentCTU, childGeom, nextQP);

                 // Save best CU and pred data for this sub CU
                 splitCU->copyPartFrom(nd.bestMode->cu, childGeom,
subPartIdx);
@@ -590,14 +602,17 @@
                     memset(parentCTU.m_cuDepth + childGeom.absPartIdx, 0,
childGeom.numPartitions);
             }
         }
-        nextContext->store(splitPred->contexts);
-        if (mightNotSplit)
-            addSplitFlagCost(*splitPred, cuGeom.depth);
-        else
-            updateModeCost(*splitPred);
-
-        checkDQPForSplitPred(*splitPred, cuGeom);
-        checkBestMode(*splitPred, depth);
+        if (!skipSplitCheck)
+        {
+            nextContext->store(splitPred->contexts);
+            if (mightNotSplit)
+                addSplitFlagCost(*splitPred, cuGeom.depth);
+            else
+                updateModeCost(*splitPred);
+
+            checkDQPForSplitPred(*splitPred, cuGeom);
+            checkBestMode(*splitPred, depth);
+        }
     }

     if (m_param->bEnableRdRefine && depth <= m_slice->m_pps->maxCuDQPDepth)
@@ -620,6 +635,8 @@
     md.bestMode->cu.copyToPic(depth);
     if (md.bestMode != &md.pred[PRED_SPLIT])
         md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic,
parentCTU.m_cuAddr, cuGeom.absPartIdx);
+
+    return md.bestMode->rdCost;
 }

 void Analysis::PMODE::processTasks(int workerThreadId)
diff -r d11482e5fedb -r 5943a1f73d58 source/encoder/analysis.h
--- a/source/encoder/analysis.h Mon Jul 24 11:15:38 2017 +0530
+++ b/source/encoder/analysis.h Fri Aug 04 01:48:28 2017 +0800
@@ -145,7 +145,7 @@
     void qprdRefine(const CUData& parentCTU, const CUGeom& cuGeom, int32_t
qp, int32_t lqp);

     /* full analysis for an I-slice CU */
-    void compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom,
int32_t qp);
+    uint64_t compressIntraCU(const CUData& parentCTU, const CUGeom&
cuGeom, int32_t qp);

     /* full analysis for a P or B slice CU */
     uint32_t compressInterCU_dist(const CUData& parentCTU, const CUGeom&
cuGeom, int32_t qp);
diff -r d11482e5fedb -r 5943a1f73d58 source/x265.h
--- a/source/x265.h Mon Jul 24 11:15:38 2017 +0530
+++ b/source/x265.h Fri Aug 04 01:48:28 2017 +0800
@@ -1482,6 +1482,9 @@

     /* Force flushing the frames from encoder */
     int       forceFlush;
+
+    /* Enable skipping split RD analysis when sum of split CU rdCost
larger than none split CU rdCost for Intra CU */
+    int       bEnableSplitRdSkip;
 } x265_param;

 /* x265_param_alloc:
diff -r d11482e5fedb -r 5943a1f73d58 source/x265cli.h
--- a/source/x265cli.h Mon Jul 24 11:15:38 2017 +0530
+++ b/source/x265cli.h Fri Aug 04 01:48:28 2017 +0800
@@ -281,6 +281,8 @@
     { "refine-mv",            no_argument, NULL, 0 },
     { "no-refine-mv",         no_argument, NULL, 0 },
     { "force-flush",    required_argument, NULL, 0 },
+    { "splitrd-skip",         no_argument, NULL, 0 },
+    { "no-splitrd-skip",      no_argument, NULL, 0 },
     { 0, 0, 0, 0 },
     { 0, 0, 0, 0 },
     { 0, 0, 0, 0 },
@@ -375,6 +377,7 @@
     H0("   --[no-]early-skip             Enable early SKIP detection.
Default %s\n", OPT(param->bEnableEarlySkip));
     H0("   --[no-]rskip                  Enable early exit from recursion.
Default %s\n", OPT(param->bEnableRecursionSkip));
     H1("   --[no-]tskip-fast             Enable fast intra transform
skipping. Default %s\n", OPT(param->bEnableTSkipFast));
+    H1("   --[no-]splitrd-skip           Enable skipping split RD analysis
when sum of split CU rdCost larger than none split CU rdCost for Intra CU.
Default %s\n", OPT(param->bEnableSplitRdSkip));
     H1("   --nr-intra <integer>          An integer value in range of 0 to
2000, which denotes strength of noise reduction in intra CUs. Default 0\n");
     H1("   --nr-inter <integer>          An integer value in range of 0 to
2000, which denotes strength of noise reduction in inter CUs. Default 0\n");
     H0("   --ctu-info <integer>          Enable receiving ctu information
asynchronously and determine reaction to the CTU information (0, 1, 2, 4,
6) Default 0\n"


_______________________________________________
x265-devel mailing list
x265-devel at videolan.org
https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20170812/1b6601d5/attachment-0001.html>