[x265-commits] [x265] analysis: add #if to make pmode exactly match non-pmode

Steve Borho steve at borho.org
Thu Oct 30 04:48:26 CET 2014


details:   http://hg.videolan.org/x265/rev/e92170188568
branches:  
changeset: 8747:e92170188568
user:      Steve Borho <steve at borho.org>
date:      Wed Oct 29 20:02:53 2014 -0500
description:
analysis: add #if to make pmode exactly match non-pmode

This switch will throw away the hard work of some worker thread, so it should
only be used for debugging.

With the flag enabled, pmode matches non-pmode output exactly for RD levels
2, 3 and 4. But RD 5 and 6 still have problems.
Subject: [x265] analysis: clarify --rd 1

details:   http://hg.videolan.org/x265/rev/2a719b6e07ee
branches:  
changeset: 8748:2a719b6e07ee
user:      Steve Borho <steve at borho.org>
date:      Wed Oct 29 20:03:21 2014 -0500
description:
analysis: clarify --rd 1
Subject: [x265] asm: correct wrong index name

details:   http://hg.videolan.org/x265/rev/3995c5e0f313
branches:  stable
changeset: 8749:3995c5e0f313
user:      Min Chen <chenm003 at 163.com>
date:      Wed Oct 29 16:36:37 2014 -0700
description:
asm: correct wrong index name
Subject: [x265] analysis: do not allow top-skip and depth earlyout in --pmode with --rd 5/6

details:   http://hg.videolan.org/x265/rev/476acb7a4088
branches:  stable
changeset: 8750:476acb7a4088
user:      Steve Borho <steve at borho.org>
date:      Wed Oct 29 22:20:55 2014 -0500
description:
analysis: do not allow top-skip and depth earlyout in --pmode with --rd 5/6

Now outputs match or are better than those without --pmode
Subject: [x265] search: nits - pull Mode out of Search class, remove unused NUM_LAYERS

details:   http://hg.videolan.org/x265/rev/2bcf4e77b4bf
branches:  
changeset: 8751:2bcf4e77b4bf
user:      Steve Borho <steve at borho.org>
date:      Wed Oct 29 22:34:31 2014 -0500
description:
search: nits - pull Mode out of Search class, remove unused NUM_LAYERS
Subject: [x265] cmake: remove obsolete pool test

details:   http://hg.videolan.org/x265/rev/393eb6c95e7c
branches:  
changeset: 8752:393eb6c95e7c
user:      Steve Borho <steve at borho.org>
date:      Wed Oct 29 22:35:05 2014 -0500
description:
cmake: remove obsolete pool test
Subject: [x265] Merge with stable

details:   http://hg.videolan.org/x265/rev/2b7d08c60105
branches:  
changeset: 8753:2b7d08c60105
user:      Steve Borho <steve at borho.org>
date:      Wed Oct 29 22:35:39 2014 -0500
description:
Merge with stable
Subject: [x265] analysis: inline checkBestMode(), improve comments

details:   http://hg.videolan.org/x265/rev/86ca1de606e3
branches:  
changeset: 8754:86ca1de606e3
user:      Steve Borho <steve at borho.org>
date:      Wed Oct 29 22:38:58 2014 -0500
description:
analysis: inline checkBestMode(), improve comments

diffstat:

 source/common/x86/asm-primitives.cpp |    2 +-
 source/encoder/analysis.cpp          |   97 ++++++++-----
 source/encoder/analysis.h            |   24 +++-
 source/encoder/frameencoder.cpp      |    2 +-
 source/encoder/search.cpp            |    2 +
 source/encoder/search.h              |  111 +++++++--------
 source/test/CMakeLists.txt           |    3 -
 source/test/testpool.cpp             |  238 -----------------------------------
 8 files changed, 138 insertions(+), 341 deletions(-)

diffs (truncated from 661 to 300 lines):

diff -r 31ed48cdbefe -r 86ca1de606e3 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Oct 29 17:50:05 2014 -0500
+++ b/source/common/x86/asm-primitives.cpp	Wed Oct 29 22:38:58 2014 -0500
@@ -1798,7 +1798,7 @@ void Setup_Assembly_Primitives(EncoderPr
         p.transpose[BLOCK_32x32] = x265_transpose32_avx2;
         p.transpose[BLOCK_64x64] = x265_transpose64_avx2;
 #endif
-        p.luma_hpp[BLOCK_4x4] = x265_interp_8tap_horiz_pp_4x4_avx2;
+        p.luma_hpp[LUMA_4x4] = x265_interp_8tap_horiz_pp_4x4_avx2;
     }
 #endif // if HIGH_BIT_DEPTH
 }
diff -r 31ed48cdbefe -r 86ca1de606e3 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Wed Oct 29 17:50:05 2014 -0500
+++ b/source/encoder/analysis.cpp	Wed Oct 29 22:38:58 2014 -0500
@@ -116,7 +116,7 @@ void Analysis::destroy()
     }
 }
 
-Search::Mode& Analysis::compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext)
+Mode& Analysis::compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext)
 {
     m_slice = ctu.m_slice;
     m_frame = &frame;
@@ -479,6 +479,8 @@ void Analysis::parallelModeAnalysis(int 
     }
 }
 
+#define MATCH_NON_PMODE 0
+
 void Analysis::compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom)
 {
     uint32_t depth = cuGeom.depth;
@@ -488,7 +490,7 @@ void Analysis::compressInterCU_dist(cons
 
     bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
     bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
-    uint32_t minDepth = mightNotSplit ? topSkipMinDepth(parentCTU, cuGeom) : 4;
+    uint32_t minDepth = m_param->rdLevel <= 4 ? topSkipMinDepth(parentCTU, cuGeom) : 0;
 
     X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not support RD 0 or 1\n");
 
@@ -560,13 +562,31 @@ void Analysis::compressInterCU_dist(cons
 
             if (bTryAmp)
             {
-                if (md.pred[PRED_2NxnU].sa8dCost < bestInter->sa8dCost)
+#if MATCH_NON_PMODE
+                bool bHor = false, bVer = false;
+                if (bestInter->cu.m_partSize[0] == SIZE_2NxN)
+                    bHor = true;
+                else if (bestInter->cu.m_partSize[0] == SIZE_Nx2N)
+                    bVer = true;
+                else if (bestInter->cu.m_partSize[0] == SIZE_2Nx2N &&
+                         md.bestMode && md.bestMode->cu.getQtRootCbf(0))
+                {
+                    bHor = true;
+                    bVer = true;
+                }
+#define HOR && bHor
+#define VER && bVer
+#else
+#define HOR
+#define VER
+#endif
+                if (md.pred[PRED_2NxnU].sa8dCost < bestInter->sa8dCost HOR)
                     bestInter = &md.pred[PRED_2NxnU];
-                if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost)
+                if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost HOR)
                     bestInter = &md.pred[PRED_2NxnD];
-                if (md.pred[PRED_nLx2N].sa8dCost < bestInter->sa8dCost)
+                if (md.pred[PRED_nLx2N].sa8dCost < bestInter->sa8dCost VER)
                     bestInter = &md.pred[PRED_nLx2N];
-                if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost)
+                if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost VER)
                     bestInter = &md.pred[PRED_nRx2N];
             }
 
@@ -583,7 +603,11 @@ void Analysis::compressInterCU_dist(cons
                 /* RD selection between merge, inter and intra */
                 checkBestMode(*bestInter, depth);
 
+#if MATCH_NON_PMODE
+                if ((bTryIntra && md.bestMode->cu.getQtRootCbf(0)) || md.bestMode->sa8dCost == MAX_INT64)
+#else
                 if (bTryIntra)
+#endif
                     checkBestMode(md.pred[PRED_INTRA], depth);
             }
             else /* m_param->rdLevel == 2 */
@@ -623,10 +647,26 @@ void Analysis::compressInterCU_dist(cons
 
             if (bTryAmp)
             {
-                checkBestMode(md.pred[PRED_2NxnU], depth);
-                checkBestMode(md.pred[PRED_2NxnD], depth);
-                checkBestMode(md.pred[PRED_nLx2N], depth);
-                checkBestMode(md.pred[PRED_nRx2N], depth);
+#if MATCH_NON_PMODE
+                bool bHor = false, bVer = false;
+                if (md.bestMode->cu.m_partSize[0] == SIZE_2NxN)
+                    bHor = true;
+                else if (md.bestMode->cu.m_partSize[0] == SIZE_Nx2N)
+                    bVer = true;
+                else if (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N && !md.bestMode->cu.m_mergeFlag[0] && !md.bestMode->cu.isSkipped(0))
+                {
+                    bHor = true;
+                    bVer = true;
+                }
+#undef HOR
+#undef VER
+#define VER if (bVer)
+#define HOR if (bHor)
+#endif
+                VER checkBestMode(md.pred[PRED_2NxnU], depth);
+                VER checkBestMode(md.pred[PRED_2NxnD], depth);
+                HOR checkBestMode(md.pred[PRED_nLx2N], depth);
+                HOR checkBestMode(md.pred[PRED_nRx2N], depth);
             }
 
             if (bTryIntra)
@@ -656,7 +696,7 @@ void Analysis::compressInterCU_dist(cons
     if (md.bestMode)
     {
         bNoSplit = !!md.bestMode->cu.isSkipped(0);
-        if (mightSplit && depth && depth >= minDepth && !bNoSplit)
+        if (mightSplit && depth && depth >= minDepth && !bNoSplit && m_param->rdLevel <= 4)
             bNoSplit = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);
     }
 
@@ -728,7 +768,7 @@ void Analysis::compressInterCU_rd0_4(con
 
     bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
     bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
-    uint32_t minDepth = mightNotSplit ? topSkipMinDepth(parentCTU, cuGeom) : 4;
+    uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
 
     if (mightNotSplit && depth >= minDepth)
     {
@@ -961,23 +1001,17 @@ void Analysis::compressInterCU_rd0_4(con
 
         if (mightNotSplit)
             addSplitFlagCost(*splitPred, cuGeom.depth);
-        else if (m_param->rdLevel <= 1)
+        else if (m_param->rdLevel > 1)
+            updateModeCost(*splitPred);
+        else
             splitPred->sa8dCost = m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->sa8dBits);
-        else
-            updateModeCost(*splitPred);
 
         if (!md.bestMode)
             md.bestMode = splitPred;
-        else if (m_param->rdLevel >= 1)
-        {
-            if (splitPred->rdCost < md.bestMode->rdCost)
-                md.bestMode = splitPred;
-        }
-        else
-        {
-            if (splitPred->sa8dCost < md.bestMode->sa8dCost)
-                md.bestMode = splitPred;
-        }
+        else if (m_param->rdLevel > 1)
+            checkBestMode(*splitPred, cuGeom.depth);
+        else if (splitPred->sa8dCost < md.bestMode->sa8dCost)
+            md.bestMode = splitPred;
     }
 
     if (!depth || md.bestMode->cu.m_predMode[0] != MODE_INTRA)
@@ -1503,19 +1537,6 @@ void Analysis::encodeResidue(const CUDat
     cu.updatePic(cuGeom.depth);
 }
 
-/* check whether current try is the best with identifying the depth of current try */
-void Analysis::checkBestMode(Mode& mode, uint32_t depth)
-{
-    ModeDepth& md = m_modeDepth[depth];
-    if (md.bestMode)
-    {
-        if (mode.rdCost < md.bestMode->rdCost)
-            md.bestMode = &mode;
-    }
-    else
-        md.bestMode = &mode;
-}
-
 void Analysis::addSplitFlagCost(Mode& mode, uint32_t depth)
 {
     if (m_param->rdLevel >= 3)
diff -r 31ed48cdbefe -r 86ca1de606e3 source/encoder/analysis.h
--- a/source/encoder/analysis.h	Wed Oct 29 17:50:05 2014 -0500
+++ b/source/encoder/analysis.h	Wed Oct 29 22:38:58 2014 -0500
@@ -75,7 +75,7 @@ public:
     Analysis();
     bool create(ThreadLocalData* tld);
     void destroy();
-    Search::Mode& compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext);
+    Mode& compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext);
 
 protected:
 
@@ -107,13 +107,31 @@ protected:
     /* encode current bestMode losslessly, pick best RD cost */
     void tryLossless(const CUGeom& cuGeom);
 
+    /* add the RD cost of coding a split flag (0 or 1) to the given mode */
+    void addSplitFlagCost(Mode& mode, uint32_t depth);
+
+    /* update CBF flags and QP values to be internally consistent */
     void checkDQP(CUData& cu, const CUGeom& cuGeom);
-    void addSplitFlagCost(Mode& mode, uint32_t depth);
-    void checkBestMode(Mode& mode, uint32_t depth);
+
+    /* work-avoidance heuristics for RD levels < 5 */
     uint32_t topSkipMinDepth(const CUData& parentCTU, const CUGeom& cuGeom);
     bool recursionDepthCheck(const CUData& parentCTU, const CUGeom& cuGeom, const Mode& bestMode);
 
+    /* generate residual and recon pixels for an entire CTU recursively (RD0) */
     void encodeResidue(const CUData& parentCTU, const CUGeom& cuGeom);
+
+    /* check whether current mode is the new best */
+    inline void checkBestMode(Mode& mode, uint32_t depth)
+    {
+        ModeDepth& md = m_modeDepth[depth];
+        if (md.bestMode)
+        {
+            if (mode.rdCost < md.bestMode->rdCost)
+                md.bestMode = &mode;
+        }
+        else
+            md.bestMode = &mode;
+    }
 };
 
 struct ThreadLocalData
diff -r 31ed48cdbefe -r 86ca1de606e3 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Wed Oct 29 17:50:05 2014 -0500
+++ b/source/encoder/frameencoder.cpp	Wed Oct 29 22:38:58 2014 -0500
@@ -758,7 +758,7 @@ void FrameEncoder::processRowEncoder(int
         }
 
         // Does all the CU analysis, returns best top level mode decision
-        Search::Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);
+        Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);
 
         /* advance top-level row coder to include the context of this CTU.
          * if SAO is disabled, rowCoder writes the final CTU bitstream */
diff -r 31ed48cdbefe -r 86ca1de606e3 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Wed Oct 29 17:50:05 2014 -0500
+++ b/source/encoder/search.cpp	Wed Oct 29 22:38:58 2014 -0500
@@ -37,6 +37,8 @@ using namespace x265;
 #pragma warning(disable: 4244) // '=' : conversion from 'int' to 'uint8_t', possible loss of data)
 #endif
 
+#define MVP_IDX_BITS 1
+
 ALIGN_VAR_32(const pixel, Search::zeroPixel[MAX_CU_SIZE]) = { 0 };
 ALIGN_VAR_32(const int16_t, Search::zeroShort[MAX_CU_SIZE]) = { 0 };
 
diff -r 31ed48cdbefe -r 86ca1de606e3 source/encoder/search.h
--- a/source/encoder/search.h	Wed Oct 29 17:50:05 2014 -0500
+++ b/source/encoder/search.h	Wed Oct 29 22:38:58 2014 -0500
@@ -35,9 +35,6 @@
 #include "entropy.h"
 #include "motion.h"
 
-#define MVP_IDX_BITS 1
-#define NUM_LAYERS 4
-
 namespace x265 {
 // private namespace
 
@@ -68,6 +65,48 @@ struct RQTData
     Yuv      bidirPredYuv[2];
 };
 
+struct Mode
+{
+    CUData     cu;
+    const Yuv* fencYuv;
+    Yuv        predYuv;
+    Yuv        reconYuv;
+    Entropy    contexts;
+
+    uint64_t   rdCost;     // sum of partition (psy) RD costs          (sse(fenc, recon) + lambda2 * bits)
+    uint64_t   sa8dCost;   // sum of partition sa8d distortion costs   (sa8d(fenc, pred) + lambda * bits)
+    uint32_t   sa8dBits;   // signal bits used in sa8dCost calculation
+    uint32_t   psyEnergy;  // sum of partition psycho-visual energy difference
+    uint32_t   distortion; // sum of partition SSE distortion
+    uint32_t   totalBits;  // sum of partition bits (mv + coeff)
+    uint32_t   mvBits;     // Mv bits + Ref + block type (or intra mode)
+    uint32_t   coeffBits;  // Texture bits (DCT Coeffs)
+
+    void initCosts()
+    {
+        rdCost = 0;
+        sa8dCost = 0;
+        sa8dBits = 0;
+        psyEnergy = 0;


More information about the x265-commits mailing list