[x265-commits] [x265] analysis: add #if to make pmode exactly match non-pmode
Steve Borho
steve at borho.org
Thu Oct 30 04:48:26 CET 2014
details: http://hg.videolan.org/x265/rev/e92170188568
branches:
changeset: 8747:e92170188568
user: Steve Borho <steve at borho.org>
date: Wed Oct 29 20:02:53 2014 -0500
description:
analysis: add #if to make pmode exactly match non-pmode
This switch will throw away the hard work of some worker thread, so it should
only be used for debugging.
With the flag enabled, pmode matches non-pmode output exactly for RD levels
2, 3 and 4. But RD 5 and 6 still have problems.
Subject: [x265] analysis: clarify --rd 1
details: http://hg.videolan.org/x265/rev/2a719b6e07ee
branches:
changeset: 8748:2a719b6e07ee
user: Steve Borho <steve at borho.org>
date: Wed Oct 29 20:03:21 2014 -0500
description:
analysis: clarify --rd 1
Subject: [x265] asm: correct wrong index name
details: http://hg.videolan.org/x265/rev/3995c5e0f313
branches: stable
changeset: 8749:3995c5e0f313
user: Min Chen <chenm003 at 163.com>
date: Wed Oct 29 16:36:37 2014 -0700
description:
asm: correct wrong index name
Subject: [x265] analysis: do not allow top-skip and depth earlyout in --pmode with --rd 5/6
details: http://hg.videolan.org/x265/rev/476acb7a4088
branches: stable
changeset: 8750:476acb7a4088
user: Steve Borho <steve at borho.org>
date: Wed Oct 29 22:20:55 2014 -0500
description:
analysis: do not allow top-skip and depth earlyout in --pmode with --rd 5/6
Now outputs match or are better than those without --pmode
Subject: [x265] search: nits - pull Mode out of Search class, remove unused NUM_LAYERS
details: http://hg.videolan.org/x265/rev/2bcf4e77b4bf
branches:
changeset: 8751:2bcf4e77b4bf
user: Steve Borho <steve at borho.org>
date: Wed Oct 29 22:34:31 2014 -0500
description:
search: nits - pull Mode out of Search class, remove unused NUM_LAYERS
Subject: [x265] cmake: remove obsolete pool test
details: http://hg.videolan.org/x265/rev/393eb6c95e7c
branches:
changeset: 8752:393eb6c95e7c
user: Steve Borho <steve at borho.org>
date: Wed Oct 29 22:35:05 2014 -0500
description:
cmake: remove obsolete pool test
Subject: [x265] Merge with stable
details: http://hg.videolan.org/x265/rev/2b7d08c60105
branches:
changeset: 8753:2b7d08c60105
user: Steve Borho <steve at borho.org>
date: Wed Oct 29 22:35:39 2014 -0500
description:
Merge with stable
Subject: [x265] analysis: inline checkBestMode(), improve comments
details: http://hg.videolan.org/x265/rev/86ca1de606e3
branches:
changeset: 8754:86ca1de606e3
user: Steve Borho <steve at borho.org>
date: Wed Oct 29 22:38:58 2014 -0500
description:
analysis: inline checkBestMode(), improve comments
diffstat:
source/common/x86/asm-primitives.cpp | 2 +-
source/encoder/analysis.cpp | 97 ++++++++-----
source/encoder/analysis.h | 24 +++-
source/encoder/frameencoder.cpp | 2 +-
source/encoder/search.cpp | 2 +
source/encoder/search.h | 111 +++++++--------
source/test/CMakeLists.txt | 3 -
source/test/testpool.cpp | 238 -----------------------------------
8 files changed, 138 insertions(+), 341 deletions(-)
diffs (truncated from 661 to 300 lines):
diff -r 31ed48cdbefe -r 86ca1de606e3 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Oct 29 17:50:05 2014 -0500
+++ b/source/common/x86/asm-primitives.cpp Wed Oct 29 22:38:58 2014 -0500
@@ -1798,7 +1798,7 @@ void Setup_Assembly_Primitives(EncoderPr
p.transpose[BLOCK_32x32] = x265_transpose32_avx2;
p.transpose[BLOCK_64x64] = x265_transpose64_avx2;
#endif
- p.luma_hpp[BLOCK_4x4] = x265_interp_8tap_horiz_pp_4x4_avx2;
+ p.luma_hpp[LUMA_4x4] = x265_interp_8tap_horiz_pp_4x4_avx2;
}
#endif // if HIGH_BIT_DEPTH
}
diff -r 31ed48cdbefe -r 86ca1de606e3 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Wed Oct 29 17:50:05 2014 -0500
+++ b/source/encoder/analysis.cpp Wed Oct 29 22:38:58 2014 -0500
@@ -116,7 +116,7 @@ void Analysis::destroy()
}
}
-Search::Mode& Analysis::compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext)
+Mode& Analysis::compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext)
{
m_slice = ctu.m_slice;
m_frame = &frame;
@@ -479,6 +479,8 @@ void Analysis::parallelModeAnalysis(int
}
}
+#define MATCH_NON_PMODE 0
+
void Analysis::compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom)
{
uint32_t depth = cuGeom.depth;
@@ -488,7 +490,7 @@ void Analysis::compressInterCU_dist(cons
bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
- uint32_t minDepth = mightNotSplit ? topSkipMinDepth(parentCTU, cuGeom) : 4;
+ uint32_t minDepth = m_param->rdLevel <= 4 ? topSkipMinDepth(parentCTU, cuGeom) : 0;
X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not support RD 0 or 1\n");
@@ -560,13 +562,31 @@ void Analysis::compressInterCU_dist(cons
if (bTryAmp)
{
- if (md.pred[PRED_2NxnU].sa8dCost < bestInter->sa8dCost)
+#if MATCH_NON_PMODE
+ bool bHor = false, bVer = false;
+ if (bestInter->cu.m_partSize[0] == SIZE_2NxN)
+ bHor = true;
+ else if (bestInter->cu.m_partSize[0] == SIZE_Nx2N)
+ bVer = true;
+ else if (bestInter->cu.m_partSize[0] == SIZE_2Nx2N &&
+ md.bestMode && md.bestMode->cu.getQtRootCbf(0))
+ {
+ bHor = true;
+ bVer = true;
+ }
+#define HOR && bHor
+#define VER && bVer
+#else
+#define HOR
+#define VER
+#endif
+ if (md.pred[PRED_2NxnU].sa8dCost < bestInter->sa8dCost HOR)
bestInter = &md.pred[PRED_2NxnU];
- if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost)
+ if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost HOR)
bestInter = &md.pred[PRED_2NxnD];
- if (md.pred[PRED_nLx2N].sa8dCost < bestInter->sa8dCost)
+ if (md.pred[PRED_nLx2N].sa8dCost < bestInter->sa8dCost VER)
bestInter = &md.pred[PRED_nLx2N];
- if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost)
+ if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost VER)
bestInter = &md.pred[PRED_nRx2N];
}
@@ -583,7 +603,11 @@ void Analysis::compressInterCU_dist(cons
/* RD selection between merge, inter and intra */
checkBestMode(*bestInter, depth);
+#if MATCH_NON_PMODE
+ if ((bTryIntra && md.bestMode->cu.getQtRootCbf(0)) || md.bestMode->sa8dCost == MAX_INT64)
+#else
if (bTryIntra)
+#endif
checkBestMode(md.pred[PRED_INTRA], depth);
}
else /* m_param->rdLevel == 2 */
@@ -623,10 +647,26 @@ void Analysis::compressInterCU_dist(cons
if (bTryAmp)
{
- checkBestMode(md.pred[PRED_2NxnU], depth);
- checkBestMode(md.pred[PRED_2NxnD], depth);
- checkBestMode(md.pred[PRED_nLx2N], depth);
- checkBestMode(md.pred[PRED_nRx2N], depth);
+#if MATCH_NON_PMODE
+ bool bHor = false, bVer = false;
+ if (md.bestMode->cu.m_partSize[0] == SIZE_2NxN)
+ bHor = true;
+ else if (md.bestMode->cu.m_partSize[0] == SIZE_Nx2N)
+ bVer = true;
+ else if (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N && !md.bestMode->cu.m_mergeFlag[0] && !md.bestMode->cu.isSkipped(0))
+ {
+ bHor = true;
+ bVer = true;
+ }
+#undef HOR
+#undef VER
+#define VER if (bVer)
+#define HOR if (bHor)
+#endif
+ VER checkBestMode(md.pred[PRED_2NxnU], depth);
+ VER checkBestMode(md.pred[PRED_2NxnD], depth);
+ HOR checkBestMode(md.pred[PRED_nLx2N], depth);
+ HOR checkBestMode(md.pred[PRED_nRx2N], depth);
}
if (bTryIntra)
@@ -656,7 +696,7 @@ void Analysis::compressInterCU_dist(cons
if (md.bestMode)
{
bNoSplit = !!md.bestMode->cu.isSkipped(0);
- if (mightSplit && depth && depth >= minDepth && !bNoSplit)
+ if (mightSplit && depth && depth >= minDepth && !bNoSplit && m_param->rdLevel <= 4)
bNoSplit = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);
}
@@ -728,7 +768,7 @@ void Analysis::compressInterCU_rd0_4(con
bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
- uint32_t minDepth = mightNotSplit ? topSkipMinDepth(parentCTU, cuGeom) : 4;
+ uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
if (mightNotSplit && depth >= minDepth)
{
@@ -961,23 +1001,17 @@ void Analysis::compressInterCU_rd0_4(con
if (mightNotSplit)
addSplitFlagCost(*splitPred, cuGeom.depth);
- else if (m_param->rdLevel <= 1)
+ else if (m_param->rdLevel > 1)
+ updateModeCost(*splitPred);
+ else
splitPred->sa8dCost = m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->sa8dBits);
- else
- updateModeCost(*splitPred);
if (!md.bestMode)
md.bestMode = splitPred;
- else if (m_param->rdLevel >= 1)
- {
- if (splitPred->rdCost < md.bestMode->rdCost)
- md.bestMode = splitPred;
- }
- else
- {
- if (splitPred->sa8dCost < md.bestMode->sa8dCost)
- md.bestMode = splitPred;
- }
+ else if (m_param->rdLevel > 1)
+ checkBestMode(*splitPred, cuGeom.depth);
+ else if (splitPred->sa8dCost < md.bestMode->sa8dCost)
+ md.bestMode = splitPred;
}
if (!depth || md.bestMode->cu.m_predMode[0] != MODE_INTRA)
@@ -1503,19 +1537,6 @@ void Analysis::encodeResidue(const CUDat
cu.updatePic(cuGeom.depth);
}
-/* check whether current try is the best with identifying the depth of current try */
-void Analysis::checkBestMode(Mode& mode, uint32_t depth)
-{
- ModeDepth& md = m_modeDepth[depth];
- if (md.bestMode)
- {
- if (mode.rdCost < md.bestMode->rdCost)
- md.bestMode = &mode;
- }
- else
- md.bestMode = &mode;
-}
-
void Analysis::addSplitFlagCost(Mode& mode, uint32_t depth)
{
if (m_param->rdLevel >= 3)
diff -r 31ed48cdbefe -r 86ca1de606e3 source/encoder/analysis.h
--- a/source/encoder/analysis.h Wed Oct 29 17:50:05 2014 -0500
+++ b/source/encoder/analysis.h Wed Oct 29 22:38:58 2014 -0500
@@ -75,7 +75,7 @@ public:
Analysis();
bool create(ThreadLocalData* tld);
void destroy();
- Search::Mode& compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext);
+ Mode& compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext);
protected:
@@ -107,13 +107,31 @@ protected:
/* encode current bestMode losslessly, pick best RD cost */
void tryLossless(const CUGeom& cuGeom);
+ /* add the RD cost of coding a split flag (0 or 1) to the given mode */
+ void addSplitFlagCost(Mode& mode, uint32_t depth);
+
+ /* update CBF flags and QP values to be internally consistent */
void checkDQP(CUData& cu, const CUGeom& cuGeom);
- void addSplitFlagCost(Mode& mode, uint32_t depth);
- void checkBestMode(Mode& mode, uint32_t depth);
+
+ /* work-avoidance heuristics for RD levels < 5 */
uint32_t topSkipMinDepth(const CUData& parentCTU, const CUGeom& cuGeom);
bool recursionDepthCheck(const CUData& parentCTU, const CUGeom& cuGeom, const Mode& bestMode);
+ /* generate residual and recon pixels for an entire CTU recursively (RD0) */
void encodeResidue(const CUData& parentCTU, const CUGeom& cuGeom);
+
+ /* check whether current mode is the new best */
+ inline void checkBestMode(Mode& mode, uint32_t depth)
+ {
+ ModeDepth& md = m_modeDepth[depth];
+ if (md.bestMode)
+ {
+ if (mode.rdCost < md.bestMode->rdCost)
+ md.bestMode = &mode;
+ }
+ else
+ md.bestMode = &mode;
+ }
};
struct ThreadLocalData
diff -r 31ed48cdbefe -r 86ca1de606e3 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Wed Oct 29 17:50:05 2014 -0500
+++ b/source/encoder/frameencoder.cpp Wed Oct 29 22:38:58 2014 -0500
@@ -758,7 +758,7 @@ void FrameEncoder::processRowEncoder(int
}
// Does all the CU analysis, returns best top level mode decision
- Search::Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);
+ Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);
/* advance top-level row coder to include the context of this CTU.
* if SAO is disabled, rowCoder writes the final CTU bitstream */
diff -r 31ed48cdbefe -r 86ca1de606e3 source/encoder/search.cpp
--- a/source/encoder/search.cpp Wed Oct 29 17:50:05 2014 -0500
+++ b/source/encoder/search.cpp Wed Oct 29 22:38:58 2014 -0500
@@ -37,6 +37,8 @@ using namespace x265;
#pragma warning(disable: 4244) // '=' : conversion from 'int' to 'uint8_t', possible loss of data)
#endif
+#define MVP_IDX_BITS 1
+
ALIGN_VAR_32(const pixel, Search::zeroPixel[MAX_CU_SIZE]) = { 0 };
ALIGN_VAR_32(const int16_t, Search::zeroShort[MAX_CU_SIZE]) = { 0 };
diff -r 31ed48cdbefe -r 86ca1de606e3 source/encoder/search.h
--- a/source/encoder/search.h Wed Oct 29 17:50:05 2014 -0500
+++ b/source/encoder/search.h Wed Oct 29 22:38:58 2014 -0500
@@ -35,9 +35,6 @@
#include "entropy.h"
#include "motion.h"
-#define MVP_IDX_BITS 1
-#define NUM_LAYERS 4
-
namespace x265 {
// private namespace
@@ -68,6 +65,48 @@ struct RQTData
Yuv bidirPredYuv[2];
};
+struct Mode
+{
+ CUData cu;
+ const Yuv* fencYuv;
+ Yuv predYuv;
+ Yuv reconYuv;
+ Entropy contexts;
+
+ uint64_t rdCost; // sum of partition (psy) RD costs (sse(fenc, recon) + lambda2 * bits)
+ uint64_t sa8dCost; // sum of partition sa8d distortion costs (sa8d(fenc, pred) + lambda * bits)
+ uint32_t sa8dBits; // signal bits used in sa8dCost calculation
+ uint32_t psyEnergy; // sum of partition psycho-visual energy difference
+ uint32_t distortion; // sum of partition SSE distortion
+ uint32_t totalBits; // sum of partition bits (mv + coeff)
+ uint32_t mvBits; // Mv bits + Ref + block type (or intra mode)
+ uint32_t coeffBits; // Texture bits (DCT Coeffs)
+
+ void initCosts()
+ {
+ rdCost = 0;
+ sa8dCost = 0;
+ sa8dBits = 0;
+ psyEnergy = 0;
More information about the x265-commits
mailing list