[x265-commits] [x265] search: split generateCoeffRecon() into its two callers, ...
Steve Borho
steve at borho.org
Wed Oct 29 17:08:57 CET 2014
details: http://hg.videolan.org/x265/rev/7c2b831e52fb
branches:
changeset: 8736:7c2b831e52fb
user: Steve Borho <steve at borho.org>
date: Wed Oct 29 10:45:23 2014 -0500
description:
search: split generateCoeffRecon() into its two callers, improve comments
The function consisted of a big if (inter) else (intra) expression and it had
only two callers, one which knew it was inter and one which knew it was intra.
Subject: [x265] analysis: fix for --rd 0, do not assume a CU is only inter or intra
details: http://hg.videolan.org/x265/rev/3aec7242d0be
branches: stable
changeset: 8737:3aec7242d0be
user: Steve Borho <steve at borho.org>
date: Wed Oct 29 10:53:33 2014 -0500
description:
analysis: fix for --rd 0, do not assume a CU is only inter or intra
Subject: [x265] Merge with stable
details: http://hg.videolan.org/x265/rev/f5603998be03
branches:
changeset: 8738:f5603998be03
user: Steve Borho <steve at borho.org>
date: Wed Oct 29 10:53:51 2014 -0500
description:
Merge with stable
Subject: [x265] search: relocate intra in inter functions to search.cpp
details: http://hg.videolan.org/x265/rev/fe3b88e11f44
branches:
changeset: 8739:fe3b88e11f44
user: Steve Borho <steve at borho.org>
date: Wed Oct 29 11:07:44 2014 -0500
description:
search: relocate intra in inter functions to search.cpp
diffstat:
source/encoder/analysis.cpp | 264 ++++++-------------------------------------
source/encoder/analysis.h | 4 -
source/encoder/search.cpp | 255 ++++++++++++++++++++++++++++++++++++------
source/encoder/search.h | 19 ++-
4 files changed, 271 insertions(+), 271 deletions(-)
diffs (truncated from 696 to 300 lines):
diff -r 7cfc1edb083f -r fe3b88e11f44 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Tue Oct 28 16:31:31 2014 -0500
+++ b/source/encoder/analysis.cpp Wed Oct 29 11:07:44 2014 -0500
@@ -389,7 +389,7 @@ void Analysis::parallelModeAnalysis(int
case 0:
if (slave != this)
slave->m_rqt[m_curGeom->depth].cur.load(m_rqt[m_curGeom->depth].cur);
- slave->checkIntraInInter_rd0_4(md.pred[PRED_INTRA], *m_curGeom);
+ slave->checkIntraInInter(md.pred[PRED_INTRA], *m_curGeom);
if (m_param->rdLevel > 2)
slave->encodeIntraInInter(md.pred[PRED_INTRA], *m_curGeom);
break;
@@ -554,8 +554,8 @@ void Analysis::compressInterCU_dist(cons
{
if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_Nx2N];
- if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost)
- bestInter = &md.pred[PRED_Nx2N];
+ if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
+ bestInter = &md.pred[PRED_2NxN];
}
if (bTryAmp)
@@ -640,7 +640,7 @@ void Analysis::compressInterCU_dist(cons
if (md.bestMode->rdCost == MAX_INT64 && !bTryIntra)
{
md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
- checkIntraInInter_rd0_4(md.pred[PRED_INTRA], cuGeom);
+ checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
encodeIntraInInter(md.pred[PRED_INTRA], cuGeom);
checkBestMode(md.pred[PRED_INTRA], depth);
}
@@ -825,7 +825,7 @@ void Analysis::compressInterCU_rd0_4(con
md.bestMode->sa8dCost == MAX_INT64)
{
md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
- checkIntraInInter_rd0_4(md.pred[PRED_INTRA], cuGeom);
+ checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
encodeIntraInInter(md.pred[PRED_INTRA], cuGeom);
if (md.pred[PRED_INTRA].rdCost < md.bestMode->rdCost)
md.bestMode = &md.pred[PRED_INTRA];
@@ -840,7 +840,7 @@ void Analysis::compressInterCU_rd0_4(con
if (bTryIntra || md.bestMode->sa8dCost == MAX_INT64)
{
md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
- checkIntraInInter_rd0_4(md.pred[PRED_INTRA], cuGeom);
+ checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
if (md.pred[PRED_INTRA].sa8dCost < md.bestMode->sa8dCost)
md.bestMode = &md.pred[PRED_INTRA];
}
@@ -865,8 +865,23 @@ void Analysis::compressInterCU_rd0_4(con
encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
else if (m_param->rdLevel == 1)
{
- m_rqt[cuGeom.depth].tmpResiYuv.subtract(md.fencYuv, md.bestMode->predYuv, cuGeom.log2CUSize);
- generateCoeffRecon(*md.bestMode, cuGeom);
+ /* generate recon pixels with no rate distortion considerations */
+ CUData& cu = md.bestMode->cu;
+ m_quant.setQPforQuant(cu);
+
+ uint32_t tuDepthRange[2];
+ cu.getInterTUQtDepthRange(tuDepthRange, 0);
+
+ m_rqt[cuGeom.depth].tmpResiYuv.subtract(*md.bestMode->fencYuv, md.bestMode->predYuv, cuGeom.log2CUSize);
+ residualTransformQuantInter(*md.bestMode, cuGeom, 0, cuGeom.depth, tuDepthRange);
+ if (cu.getQtRootCbf(0))
+ md.bestMode->reconYuv.addClip(md.bestMode->predYuv, m_rqt[cuGeom.depth].tmpResiYuv, cu.m_log2CUSize[0]);
+ else
+ {
+ md.bestMode->reconYuv.copyFromYuv(md.bestMode->predYuv);
+ if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N)
+ cu.setSkipFlagSubParts(true);
+ }
}
}
else
@@ -874,7 +889,19 @@ void Analysis::compressInterCU_rd0_4(con
if (m_param->rdLevel == 2)
encodeIntraInInter(*md.bestMode, cuGeom);
else if (m_param->rdLevel == 1)
- generateCoeffRecon(*md.bestMode, cuGeom);
+ {
+ /* generate recon pixels with no rate distortion considerations */
+ CUData& cu = md.bestMode->cu;
+ m_quant.setQPforQuant(cu);
+
+ uint32_t tuDepthRange[2];
+ cu.getIntraTUQtDepthRange(tuDepthRange, 0);
+
+ uint32_t initTrDepth = cu.m_partSize[0] == SIZE_NxN;
+ residualTransformQuantIntra(*md.bestMode, cuGeom, initTrDepth, 0, tuDepthRange);
+ getBestIntraModeChroma(*md.bestMode, cuGeom);
+ residualQTIntraChroma(*md.bestMode, cuGeom, 0, 0);
+ }
}
}
} // !earlyskip
@@ -1369,222 +1396,6 @@ void Analysis::checkInter_rd5_6(Mode& in
}
}
-/* Note that this function does not save the best intra prediction, it must
- * be generated later. It records the best mode in the cu */
-void Analysis::checkIntraInInter_rd0_4(Mode& intraMode, const CUGeom& cuGeom)
-{
- CUData& cu = intraMode.cu;
- uint32_t depth = cu.m_cuDepth[0];
-
- cu.setPartSizeSubParts(SIZE_2Nx2N);
- cu.setPredModeSubParts(MODE_INTRA);
-
- uint32_t initTrDepth = 0;
- uint32_t log2TrSize = cu.m_log2CUSize[0] - initTrDepth;
- uint32_t tuSize = 1 << log2TrSize;
- const uint32_t absPartIdx = 0;
-
- // Reference sample smoothing
- initAdiPattern(cu, cuGeom, absPartIdx, initTrDepth, ALL_IDX);
-
- pixel* fenc = m_modeDepth[depth].fencYuv.m_buf[0];
- uint32_t stride = m_modeDepth[depth].fencYuv.m_size;
-
- pixel *above = m_refAbove + tuSize - 1;
- pixel *aboveFiltered = m_refAboveFlt + tuSize - 1;
- pixel *left = m_refLeft + tuSize - 1;
- pixel *leftFiltered = m_refLeftFlt + tuSize - 1;
- int sad, bsad;
- uint32_t bits, bbits, mode, bmode;
- uint64_t cost, bcost;
-
- // 33 Angle modes once
- ALIGN_VAR_32(pixel, bufScale[32 * 32]);
- ALIGN_VAR_32(pixel, bufTrans[32 * 32]);
- ALIGN_VAR_32(pixel, tmp[33 * 32 * 32]);
- int scaleTuSize = tuSize;
- int scaleStride = stride;
- int costShift = 0;
- int sizeIdx = log2TrSize - 2;
-
- if (tuSize > 32)
- {
- // origin is 64x64, we scale to 32x32 and setup required parameters
- primitives.scale2D_64to32(bufScale, fenc, stride);
- fenc = bufScale;
-
- // reserve space in case primitives need to store data in above
- // or left buffers
- pixel _above[4 * 32 + 1];
- pixel _left[4 * 32 + 1];
- pixel *aboveScale = _above + 2 * 32;
- pixel *leftScale = _left + 2 * 32;
- aboveScale[0] = leftScale[0] = above[0];
- primitives.scale1D_128to64(aboveScale + 1, above + 1, 0);
- primitives.scale1D_128to64(leftScale + 1, left + 1, 0);
-
- scaleTuSize = 32;
- scaleStride = 32;
- costShift = 2;
- sizeIdx = 5 - 2; // log2(scaleTuSize) - 2
-
- // Filtered and Unfiltered refAbove and refLeft pointing to above and left.
- above = aboveScale;
- left = leftScale;
- aboveFiltered = aboveScale;
- leftFiltered = leftScale;
- }
-
- pixelcmp_t sa8d = primitives.sa8d[sizeIdx];
- int predsize = scaleTuSize * scaleTuSize;
-
- m_entropyCoder.loadIntraDirModeLuma(m_rqt[depth].cur);
-
- /* there are three cost tiers for intra modes:
- * pred[0] - mode probable, least cost
- * pred[1], pred[2] - less probable, slightly more cost
- * non-mpm modes - all cost the same (rbits) */
- uint64_t mpms;
- uint32_t preds[3];
- uint32_t rbits = getIntraRemModeBits(cu, absPartIdx, preds, mpms);
-
- // DC
- primitives.intra_pred[DC_IDX][sizeIdx](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
- bsad = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
- bmode = mode = DC_IDX;
- bbits = (mpms & ((uint64_t)1 << mode)) ? m_entropyCoder.bitsIntraModeMPM(preds, mode) : rbits;
- bcost = m_rdCost.calcRdSADCost(bsad, bbits);
-
- pixel *abovePlanar = above;
- pixel *leftPlanar = left;
-
- if (tuSize & (8 | 16 | 32))
- {
- abovePlanar = aboveFiltered;
- leftPlanar = leftFiltered;
- }
-
- // PLANAR
- primitives.intra_pred[PLANAR_IDX][sizeIdx](tmp, scaleStride, leftPlanar, abovePlanar, 0, 0);
- sad = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
- mode = PLANAR_IDX;
- bits = (mpms & ((uint64_t)1 << mode)) ? m_entropyCoder.bitsIntraModeMPM(preds, mode) : rbits;
- cost = m_rdCost.calcRdSADCost(sad, bits);
- COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits);
-
- // Transpose NxN
- primitives.transpose[sizeIdx](bufTrans, fenc, scaleStride);
-
- primitives.intra_pred_allangs[sizeIdx](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
-
- bool modeHor;
- pixel *cmp;
- intptr_t srcStride;
-
-#define TRY_ANGLE(angle) \
- modeHor = angle < 18; \
- cmp = modeHor ? bufTrans : fenc; \
- srcStride = modeHor ? scaleTuSize : scaleStride; \
- sad = sa8d(cmp, srcStride, &tmp[(angle - 2) * predsize], scaleTuSize) << costShift; \
- bits = (mpms & ((uint64_t)1 << angle)) ? m_entropyCoder.bitsIntraModeMPM(preds, angle) : rbits; \
- cost = m_rdCost.calcRdSADCost(sad, bits)
-
- if (m_param->bEnableFastIntra)
- {
- int asad = 0;
- uint32_t lowmode, highmode, amode = 5, abits = 0;
- uint64_t acost = MAX_INT64;
-
- /* pick the best angle, sampling at distance of 5 */
- for (mode = 5; mode < 35; mode += 5)
- {
- TRY_ANGLE(mode);
- COPY4_IF_LT(acost, cost, amode, mode, asad, sad, abits, bits);
- }
-
- /* refine best angle at distance 2, then distance 1 */
- for (uint32_t dist = 2; dist >= 1; dist--)
- {
- lowmode = amode - dist;
- highmode = amode + dist;
-
- X265_CHECK(lowmode >= 2 && lowmode <= 34, "low intra mode out of range\n");
- TRY_ANGLE(lowmode);
- COPY4_IF_LT(acost, cost, amode, lowmode, asad, sad, abits, bits);
-
- X265_CHECK(highmode >= 2 && highmode <= 34, "high intra mode out of range\n");
- TRY_ANGLE(highmode);
- COPY4_IF_LT(acost, cost, amode, highmode, asad, sad, abits, bits);
- }
-
- if (amode == 33)
- {
- TRY_ANGLE(34);
- COPY4_IF_LT(acost, cost, amode, 34, asad, sad, abits, bits);
- }
-
- COPY4_IF_LT(bcost, acost, bmode, amode, bsad, asad, bbits, abits);
- }
- else // calculate and search all intra prediction angles for lowest cost
- {
- for (mode = 2; mode < 35; mode++)
- {
- TRY_ANGLE(mode);
- COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits);
- }
- }
-
- cu.setLumaIntraDirSubParts((uint8_t)bmode, absPartIdx, depth + initTrDepth);
- intraMode.initCosts();
- intraMode.totalBits = bbits;
- intraMode.distortion = bsad;
- intraMode.sa8dCost = bcost;
-}
-
-void Analysis::encodeIntraInInter(Mode& intraMode, const CUGeom& cuGeom)
-{
- CUData& cu = intraMode.cu;
- Yuv* reconYuv = &intraMode.reconYuv;
- Yuv* fencYuv = &m_modeDepth[cuGeom.depth].fencYuv;
-
- X265_CHECK(cu.m_partSize[0] == SIZE_2Nx2N, "encodeIntraInInter does not expect NxN intra\n");
- X265_CHECK(!m_slice->isIntra(), "encodeIntraInInter does not expect to be used in I slices\n");
-
- m_quant.setQPforQuant(cu);
-
- uint32_t tuDepthRange[2];
- cu.getIntraTUQtDepthRange(tuDepthRange, 0);
-
- m_entropyCoder.load(m_rqt[cuGeom.depth].cur);
-
- Cost icosts;
- codeIntraLumaQT(intraMode, cuGeom, 0, 0, false, icosts, tuDepthRange);
- extractIntraResultQT(cu, *reconYuv, 0, 0);
-
- intraMode.distortion = icosts.distortion;
- intraMode.distortion += estIntraPredChromaQT(intraMode, cuGeom);
-
- m_entropyCoder.resetBits();
- if (m_slice->m_pps->bTransquantBypassEnabled)
- m_entropyCoder.codeCUTransquantBypassFlag(cu.m_tqBypass[0]);
- m_entropyCoder.codeSkipFlag(cu, 0);
More information about the x265-commits
mailing list