[x265] [PATCH 3 of 3] implement QP based RD refinement [CHANGES OUTPUT]
kavitha at multicorewareinc.com
kavitha at multicorewareinc.com
Tue Oct 27 06:51:36 CET 2015
# HG changeset patch
# User Kavitha Sampath <kavitha at multicorewareinc.com>
# Date 1445612726 -19800
# Fri Oct 23 20:35:26 2015 +0530
# Node ID 067f831bfc9a6c023cbf642e8c9742deff631ea5
# Parent 8f08f346dd675a638df2ca8aa030b5be61ab06d7
implement QP based RD refinement [CHANGES OUTPUT]
After CU analysis, calculate R-D cost on the best partition mode
for a range of QP values to find the optimal rounding effect.
diff -r 8f08f346dd67 -r 067f831bfc9a source/common/cudata.cpp
--- a/source/common/cudata.cpp Fri Oct 23 19:32:21 2015 +0530
+++ b/source/common/cudata.cpp Fri Oct 23 20:35:26 2015 +0530
@@ -430,7 +430,7 @@
}
/* The reverse of copyToPic, called only by encodeResidue */
-void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom)
+void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom, bool copyQp)
{
m_encData = ctu.m_encData;
m_slice = ctu.m_slice;
@@ -441,7 +441,8 @@
m_numPartitions = cuGeom.numPartitions;
/* copy out all prediction info for this part */
- m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU);
+ if (copyQp) m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU);
+
m_partCopy(m_log2CUSize, ctu.m_log2CUSize + m_absIdxInCTU);
m_partCopy(m_lumaIntraDir, ctu.m_lumaIntraDir + m_absIdxInCTU);
m_partCopy(m_tqBypass, ctu.m_tqBypass + m_absIdxInCTU);
diff -r 8f08f346dd67 -r 067f831bfc9a source/common/cudata.h
--- a/source/common/cudata.h Fri Oct 23 19:32:21 2015 +0530
+++ b/source/common/cudata.h Fri Oct 23 20:35:26 2015 +0530
@@ -222,7 +222,7 @@
void copyToPic(uint32_t depth) const;
/* RD-0 methods called only from encodeResidue */
- void copyFromPic(const CUData& ctu, const CUGeom& cuGeom);
+ void copyFromPic(const CUData& ctu, const CUGeom& cuGeom, bool copyQp = true);
void updatePic(uint32_t depth) const;
void setPartSizeSubParts(PartSize size) { m_partSet(m_partSize, (uint8_t)size); }
diff -r 8f08f346dd67 -r 067f831bfc9a source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Fri Oct 23 19:32:21 2015 +0530
+++ b/source/encoder/analysis.cpp Fri Oct 23 20:35:26 2015 +0530
@@ -201,6 +201,9 @@
}
}
+ if (m_param->bEnableRdRefine)
+ qpRdRefine(ctu, cuGeom);
+
return *m_modeDepth[0].bestMode;
}
@@ -229,6 +232,53 @@
}
}
+void Analysis::qpRdRefine(const CUData& parentCTU, const CUGeom& cuGeom)
+{
+ uint64_t origCost, bestCost, cost, prevCost;
+ int failure, nQP, lambdaQP;
+ double bestQp, origQP;
+ bestQp = origQP = m_frame->m_encData->m_cuStat[parentCTU.m_cuAddr].baseQp;
+ bestCost = origCost = m_modeDepth[0].bestMode->rdCost;
+ lambdaQP = calculateQpforCuSize(parentCTU, cuGeom);
+
+ for (int16_t dir = 1; dir >= -1; dir -= 2)
+ {
+ int threshold = !!m_param->psyRd;
+
+ failure = 0;
+ prevCost = origCost;
+ double modQP = origQP + dir;
+
+ while (modQP >= QP_MIN && modQP <= QP_MAX_SPEC)
+ {
+ /* set modified QP for quant, maintain constant lambda for all QPs
+ * use lambda of QP used for CU analysis for cost calculation */
+ nQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, cuGeom, modQP), lambdaQP);
+
+ recodeCU(parentCTU, cuGeom, nQP, modQP);
+ cost = m_modeDepth[0].bestMode->rdCost;
+ COPY2_IF_LT(bestCost, cost, bestQp, modQP);
+
+ if (cost < prevCost)
+ failure = 0;
+ else
+ failure++;
+
+ if (failure > threshold)
+ break;
+
+ prevCost = cost;
+ modQP += dir;
+ }
+ }
+
+ /* TODO: Try last CU's QP to decide the bestQP before re-encode */
+
+ /* Re-encode CU for best chosen QP */
+ nQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, cuGeom, bestQp));
+ recodeCU(parentCTU, cuGeom, nQP, bestQp);
+}
+
void Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp)
{
uint32_t depth = cuGeom.depth;
@@ -1422,6 +1472,110 @@
return refMask;
}
+void Analysis::recodeCU(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp, double modQP)
+{
+ uint32_t depth = cuGeom.depth;
+ ModeDepth& md = m_modeDepth[depth];
+ md.bestMode = NULL;
+
+ bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
+ bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
+ bool bDecidedDepth = parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
+
+ if (bDecidedDepth)
+ {
+ Mode& mode = md.pred[0];
+ md.bestMode = &mode;
+ mode.cu.initSubCU(parentCTU, cuGeom, qp);
+ PartSize size = (PartSize)parentCTU.m_partSize[cuGeom.absPartIdx];
+ if (parentCTU.isIntra(cuGeom.absPartIdx))
+ {
+ memcpy(mode.cu.m_lumaIntraDir, parentCTU.m_lumaIntraDir + cuGeom.absPartIdx, cuGeom.numPartitions);
+ memcpy(mode.cu.m_chromaIntraDir, parentCTU.m_chromaIntraDir + cuGeom.absPartIdx, cuGeom.numPartitions);
+ checkIntra(mode, cuGeom, size);
+ }
+ else
+ {
+ mode.cu.copyFromPic(parentCTU, cuGeom, false);
+ for (int part = 0; part < (int)parentCTU.getNumPartInter(cuGeom.absPartIdx); part++)
+ {
+ PredictionUnit pu(mode.cu, cuGeom, part);
+ motionCompensation(mode.cu, pu, mode.predYuv, true, true);
+ }
+
+ if (parentCTU.isSkipped(cuGeom.absPartIdx))
+ encodeResAndCalcRdSkipCU(mode);
+ else
+ encodeResAndCalcRdInterCU(mode, cuGeom);
+
+ /* checkMerge2Nx2N function performs checkDQP after encoding residual, do the same */
+ bool mergeInter2Nx2N = size == SIZE_2Nx2N && parentCTU.m_mergeFlag[cuGeom.absPartIdx];
+ if (parentCTU.isSkipped(cuGeom.absPartIdx) || mergeInter2Nx2N)
+ checkDQP(mode, cuGeom);
+ }
+
+ if (m_bTryLossless)
+ tryLossless(cuGeom);
+
+ if (mightSplit)
+ addSplitFlagCost(*md.bestMode, cuGeom.depth);
+ }
+ else
+ {
+ Mode* splitPred = &md.pred[PRED_SPLIT];
+ md.bestMode = splitPred;
+ splitPred->initCosts();
+ CUData* splitCU = &splitPred->cu;
+ splitCU->initSubCU(parentCTU, cuGeom, qp);
+
+ uint32_t nextDepth = depth + 1;
+ ModeDepth& nd = m_modeDepth[nextDepth];
+ invalidateContexts(nextDepth);
+ Entropy* nextContext = &m_rqt[depth].cur;
+ int nextQP = qp;
+
+ for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
+ {
+ const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
+ if (childGeom.flags & CUGeom::PRESENT)
+ {
+ m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);
+ m_rqt[nextDepth].cur.load(*nextContext);
+
+ if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
+ {
+ int constLambdaQp = calculateQpforCuSize(parentCTU, childGeom);
+ nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom, modQP), constLambdaQp);
+ }
+ recodeCU(parentCTU, childGeom, nextQP, modQP);
+
+ // Save best CU and pred data for this sub CU
+ splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
+ splitPred->addSubCosts(*nd.bestMode);
+ nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx);
+ nextContext = &nd.bestMode->contexts;
+ }
+ else
+ {
+ splitCU->setEmptyPart(childGeom, subPartIdx);
+ // Set depth of non-present CU to 0 to ensure that correct CU is fetched as reference to code deltaQP
+ memset(parentCTU.m_cuDepth + childGeom.absPartIdx, 0, childGeom.numPartitions);
+ }
+ }
+ nextContext->store(splitPred->contexts);
+ if (mightNotSplit)
+ addSplitFlagCost(*splitPred, cuGeom.depth);
+ else
+ updateModeCost(*splitPred);
+
+ checkDQPForSplitPred(*splitPred, cuGeom);
+ }
+
+ /* Copy best data to encData CTU and recon */
+ md.bestMode->cu.copyToPic(depth);
+ md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.absPartIdx);
+}
+
/* sets md.bestMode if a valid merge candidate is found, else leaves it NULL */
void Analysis::checkMerge2Nx2N_rd0_4(Mode& skip, Mode& merge, const CUGeom& cuGeom)
{
diff -r 8f08f346dd67 -r 067f831bfc9a source/encoder/analysis.h
--- a/source/encoder/analysis.h Fri Oct 23 19:32:21 2015 +0530
+++ b/source/encoder/analysis.h Fri Oct 23 20:35:26 2015 +0530
@@ -110,6 +110,9 @@
uint32_t m_splitRefIdx[4];
+ /* refine RD based on QP for rd-levels 5 and 6 */
+ void qpRdRefine(const CUData& ctu, const CUGeom& cuGeom);
+
/* full analysis for an I-slice CU */
void compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp);
@@ -118,6 +121,8 @@
uint32_t compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp);
uint32_t compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qp);
+ void recodeCU(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp, double modQP);
+
/* measure merge and skip */
void checkMerge2Nx2N_rd0_4(Mode& skip, Mode& merge, const CUGeom& cuGeom);
void checkMerge2Nx2N_rd5_6(Mode& skip, Mode& merge, const CUGeom& cuGeom, bool isShareMergeCand);
More information about the x265-devel
mailing list