[x265] [PATCH] analysis: always configure quant QP directly after setting RD lambda
Steve Borho
steve at borho.org
Sat Apr 25 08:59:36 CEST 2015
On Sat, Apr 25, 2015 at 1:50 AM, Steve Borho <steve at borho.org> wrote:
> # HG changeset patch
> # User Steve Borho <steve at borho.org>
> # Date 1429943995 18000
> # Sat Apr 25 01:39:55 2015 -0500
> # Node ID 67b0c29c45e67da8045b720bc0d657a14f952c33
> # Parent 318aa6a4eaf2d58cf7d223bf92448d94cc439a5b
> analysis: always configure quant QP directly after setting RD lambda
>
> Basically, everywhere we adjust or assign QP we set quant QP immediately. This
> removes a great many ad-hoc calls to setQPforQuant() and hopefully makes it
> impossible to miss quant being configured properly.
>
> This patch fixes a layering violation where the frame encoder was setting the
> RDO lambdas directly, but only when delta-QP was not enabled.
this patch needs work for --cu-lossless, and changes outputs
> diff -r 318aa6a4eaf2 -r 67b0c29c45e6 source/common/quant.cpp
> --- a/source/common/quant.cpp Sat Apr 25 00:41:25 2015 -0500
> +++ b/source/common/quant.cpp Sat Apr 25 01:39:55 2015 -0500
> @@ -225,16 +225,15 @@
> X265_FREE(m_fencShortBuf);
> }
>
> -void Quant::setQPforQuant(const CUData& cu)
> +void Quant::setQPforQuant(const CUData& ctu, int qp)
> {
> - m_tqBypass = !!cu.m_tqBypass[0];
> + m_tqBypass = !!ctu.m_tqBypass[0];
> if (m_tqBypass)
> return;
> - m_nr = m_frameNr ? &m_frameNr[cu.m_encData->m_frameEncoderID] : NULL;
> - int qpy = cu.m_qp[0];
> - m_qpParam[TEXT_LUMA].setQpParam(qpy + QP_BD_OFFSET);
> - setChromaQP(qpy + cu.m_slice->m_pps->chromaQpOffset[0], TEXT_CHROMA_U, cu.m_chromaFormat);
> - setChromaQP(qpy + cu.m_slice->m_pps->chromaQpOffset[1], TEXT_CHROMA_V, cu.m_chromaFormat);
> + m_nr = m_frameNr ? &m_frameNr[ctu.m_encData->m_frameEncoderID] : NULL;
> + m_qpParam[TEXT_LUMA].setQpParam(qp + QP_BD_OFFSET);
> + setChromaQP(qp + ctu.m_slice->m_pps->chromaQpOffset[0], TEXT_CHROMA_U, ctu.m_chromaFormat);
> + setChromaQP(qp + ctu.m_slice->m_pps->chromaQpOffset[1], TEXT_CHROMA_V, ctu.m_chromaFormat);
> }
>
> void Quant::setChromaQP(int qpin, TextType ttype, int chFmt)
> diff -r 318aa6a4eaf2 -r 67b0c29c45e6 source/common/quant.h
> --- a/source/common/quant.h Sat Apr 25 00:41:25 2015 -0500
> +++ b/source/common/quant.h Sat Apr 25 01:39:55 2015 -0500
> @@ -103,7 +103,7 @@
> bool allocNoiseReduction(const x265_param& param);
>
> /* CU setup */
> - void setQPforQuant(const CUData& cu);
> + void setQPforQuant(const CUData& ctu, int qp);
>
> uint32_t transformNxN(const CUData& cu, const pixel* fenc, uint32_t fencStride, const int16_t* residual, uint32_t resiStride, coeff_t* coeff,
> uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip);
> diff -r 318aa6a4eaf2 -r 67b0c29c45e6 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Sat Apr 25 00:41:25 2015 -0500
> +++ b/source/encoder/analysis.cpp Sat Apr 25 01:39:55 2015 -0500
> @@ -146,18 +146,16 @@
>
> if (m_slice->m_pps->bUseDQP)
> {
> - m_aqQP[0] = calculateQpforCuSize(ctu, cuGeom);
> - setLambdaFromQP(*m_slice, m_aqQP[0]);
> - m_aqQP[0] = x265_clip3(QP_MIN, QP_MAX_SPEC, m_aqQP[0]);
> - ctu.setQPSubParts((int8_t)m_aqQP[0], 0, 0);
> + m_aqQP[0] = setLambdaFromQP(ctu, calculateQpforCuSize(ctu, cuGeom));
>
> if (m_slice->m_pps->maxCuDQPDepth)
> initAqQPs(1, ctu, &cuGeom + 1);
> }
> else
> - m_aqQP[0] = m_slice->m_sliceQp;
> + /* adaptive quant disabled, CTU QP is always slice QP, and within spec range */
> + m_aqQP[0] = setLambdaFromQP(ctu, m_slice->m_sliceQp);
>
> - m_quant.setQPforQuant(ctu);
> + ctu.setQPSubParts((int8_t)m_aqQP[0], 0, 0);
> m_rqt[0].cur.load(initialContext);
> m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic, ctu.m_cuAddr, 0);
>
> @@ -269,7 +267,6 @@
> PartSize size = (PartSize)reusePartSizes[zOrder];
> Mode& mode = size == SIZE_2Nx2N ? md.pred[PRED_INTRA] : md.pred[PRED_INTRA_NxN];
> mode.cu.initSubCU(parentCTU, cuGeom, qp);
> - m_quant.setQPforQuant(mode.cu);
> checkIntra(mode, cuGeom, size, &reuseModes[zOrder], &reuseChromaModes[zOrder]);
> checkBestMode(mode, depth);
>
> @@ -287,7 +284,6 @@
> else if (mightNotSplit)
> {
> md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
> - m_quant.setQPforQuant(md.pred[PRED_INTRA].cu);
> checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, NULL);
> checkBestMode(md.pred[PRED_INTRA], depth);
>
> @@ -327,11 +323,7 @@
> m_rqt[nextDepth].cur.load(*nextContext);
>
> if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
> - {
> - nextQP = m_aqQP[childGeom.index];
> - setLambdaFromQP(*m_slice, nextQP);
> - nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
> - }
> + nextQP = setLambdaFromQP(parentCTU, m_aqQP[childGeom.index]);
>
> compressIntraCU(parentCTU, childGeom, zOrder, nextQP);
>
> @@ -400,14 +392,9 @@
> {
> slave.m_slice = m_slice;
> slave.m_frame = m_frame;
> - slave.setLambdaFromQP(*m_slice, m_rdCost.m_qp);
> + slave.setLambdaFromQP(md.pred[PRED_2Nx2N].cu, m_rdCost.m_qp);
> slave.invalidateContexts(0);
> -
> - if (m_param->rdLevel >= 5)
> - {
> - slave.m_rqt[pmode.cuGeom.depth].cur.load(m_rqt[pmode.cuGeom.depth].cur);
> - slave.m_quant.setQPforQuant(md.pred[PRED_2Nx2N].cu);
> - }
> + slave.m_rqt[pmode.cuGeom.depth].cur.load(m_rqt[pmode.cuGeom.depth].cur);
> }
>
> /* perform Mode task, repeat until no more work is available */
> @@ -418,11 +405,6 @@
> switch (pmode.modes[task])
> {
> case PRED_INTRA:
> - if (&slave != this)
> - {
> - slave.m_rqt[pmode.cuGeom.depth].cur.load(m_rqt[pmode.cuGeom.depth].cur);
> - slave.m_quant.setQPforQuant(md.pred[PRED_INTRA].cu);
> - }
> slave.checkIntraInInter(md.pred[PRED_INTRA], pmode.cuGeom);
> if (m_param->rdLevel > 2)
> slave.encodeIntraInInter(md.pred[PRED_INTRA], pmode.cuGeom);
> @@ -738,11 +720,7 @@
> m_rqt[nextDepth].cur.load(*nextContext);
>
> if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
> - {
> - nextQP = m_aqQP[childGeom.index];
> - setLambdaFromQP(*m_slice, nextQP);
> - nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
> - }
> + nextQP = setLambdaFromQP(parentCTU, m_aqQP[childGeom.index]);
>
> compressInterCU_dist(parentCTU, childGeom, nextQP);
>
> @@ -943,7 +921,6 @@
> {
> /* generate recon pixels with no rate distortion considerations */
> CUData& cu = md.bestMode->cu;
> - m_quant.setQPforQuant(cu);
>
> uint32_t tuDepthRange[2];
> cu.getInterTUQtDepthRange(tuDepthRange, 0);
> @@ -968,7 +945,6 @@
> {
> /* generate recon pixels with no rate distortion considerations */
> CUData& cu = md.bestMode->cu;
> - m_quant.setQPforQuant(cu);
>
> uint32_t tuDepthRange[2];
> cu.getIntraTUQtDepthRange(tuDepthRange, 0);
> @@ -1019,11 +995,7 @@
> m_rqt[nextDepth].cur.load(*nextContext);
>
> if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
> - {
> - nextQP = m_aqQP[childGeom.index];
> - setLambdaFromQP(*m_slice, nextQP);
> - nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
> - }
> + nextQP = setLambdaFromQP(parentCTU, m_aqQP[childGeom.index]);
>
> compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
>
> @@ -1225,11 +1197,7 @@
> m_rqt[nextDepth].cur.load(*nextContext);
>
> if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
> - {
> - nextQP = m_aqQP[childGeom.index];
> - setLambdaFromQP(*m_slice, nextQP);
> - nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
> - }
> + nextQP = setLambdaFromQP(parentCTU, m_aqQP[childGeom.index]);
>
> compressInterCU_rd5_6(parentCTU, childGeom, zOrder, nextQP);
>
> @@ -1755,7 +1723,6 @@
> CUData& cu = bestMode->cu;
>
> cu.copyFromPic(ctu, cuGeom);
> - m_quant.setQPforQuant(cu);
>
> Yuv& fencYuv = m_modeDepth[cuGeom.depth].fencYuv;
> if (cuGeom.depth)
> diff -r 318aa6a4eaf2 -r 67b0c29c45e6 source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp Sat Apr 25 00:41:25 2015 -0500
> +++ b/source/encoder/frameencoder.cpp Sat Apr 25 01:39:55 2015 -0500
> @@ -851,12 +851,11 @@
>
> if (m_param->rc.aqMode || bIsVbv)
> {
> + X265_CHECK(slice->m_pps->bUseDQP, "adaptive quant in use without DQP\n");
> int qp = calcQpForCu(cuAddr, curEncData.m_cuStat[cuAddr].baseQp);
> qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
> curEncData.m_rowStat[row].sumQpAq += qp;
> }
> - else
> - tld.analysis.setLambdaFromQP(*slice, slice->m_sliceQp);
>
> if (m_param->bEnableWavefront && !col && row)
> {
> diff -r 318aa6a4eaf2 -r 67b0c29c45e6 source/encoder/rdcost.h
> --- a/source/encoder/rdcost.h Sat Apr 25 00:41:25 2015 -0500
> +++ b/source/encoder/rdcost.h Sat Apr 25 01:39:55 2015 -0500
> @@ -40,12 +40,13 @@
> uint32_t m_chromaDistWeight[2];
> uint32_t m_psyRdBase;
> uint32_t m_psyRd;
> - int m_qp;
> + int m_qp; /* QP used to configure lambda, may be higher than QP_MAX_SPEC but <= QP_MAX_MAX */
>
> void setPsyRdScale(double scale) { m_psyRdBase = (uint32_t)floor(65536.0 * scale * 0.33); }
>
> void setQP(const Slice& slice, int qp)
> {
> + x265_emms(); /* TODO: if the lambda tables were ints, this would not be necessary */
> m_qp = qp;
>
> /* Scale PSY RD factor by a slice type factor */
> diff -r 318aa6a4eaf2 -r 67b0c29c45e6 source/encoder/search.cpp
> --- a/source/encoder/search.cpp Sat Apr 25 00:41:25 2015 -0500
> +++ b/source/encoder/search.cpp Sat Apr 25 01:39:55 2015 -0500
> @@ -163,11 +163,16 @@
> X265_FREE(m_tsRecon);
> }
>
> -void Search::setLambdaFromQP(const Slice& slice, int qp)
> +int Search::setLambdaFromQP(const CUData& ctu, int qp)
> {
> - x265_emms(); /* TODO: if the lambda tables were ints, this would not be necessary */
> + X265_CHECK(qp >= QP_MIN && qp <= QP_MAX_MAX, "QP used for lambda is out of range\n");
> +
> m_me.setQP(qp);
> - m_rdCost.setQP(slice, qp);
> + m_rdCost.setQP(*m_slice, qp);
> +
> + int quantQP = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
> + m_quant.setQPforQuant(ctu, quantQP);
> + return quantQP;
> }
>
> #if CHECKED_BUILD || _DEBUG
> @@ -1364,8 +1369,6 @@
> X265_CHECK(cu.m_partSize[0] == SIZE_2Nx2N, "encodeIntraInInter does not expect NxN intra\n");
> X265_CHECK(!m_slice->isIntra(), "encodeIntraInInter does not expect to be used in I slices\n");
>
> - m_quant.setQPforQuant(cu);
> -
> uint32_t tuDepthRange[2];
> cu.getIntraTUQtDepthRange(tuDepthRange, 0);
>
> @@ -1888,7 +1891,7 @@
> /* Setup slave Search instance for ME for master's CU */
> if (&slave != this)
> {
> - slave.setLambdaFromQP(*m_slice, m_rdCost.m_qp);
> + slave.setLambdaFromQP(pme.mode.cu, m_rdCost.m_qp);
> slave.m_slice = m_slice;
> slave.m_frame = m_frame;
>
> @@ -2502,9 +2505,6 @@
> uint32_t log2CUSize = cuGeom.log2CUSize;
> int sizeIdx = log2CUSize - 2;
>
> - uint32_t tqBypass = cu.m_tqBypass[0];
> - m_quant.setQPforQuant(interMode.cu);
> -
> resiYuv->subtract(*fencYuv, *predYuv, log2CUSize);
>
> uint32_t tuDepthRange[2];
> @@ -2515,6 +2515,7 @@
> Cost costs;
> estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);
>
> + uint32_t tqBypass = cu.m_tqBypass[0];
> if (!tqBypass)
> {
> uint32_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
> diff -r 318aa6a4eaf2 -r 67b0c29c45e6 source/encoder/search.h
> --- a/source/encoder/search.h Sat Apr 25 00:41:25 2015 -0500
> +++ b/source/encoder/search.h Sat Apr 25 01:39:55 2015 -0500
> @@ -287,7 +287,7 @@
> ~Search();
>
> bool initSearch(const x265_param& param, ScalingList& scalingList);
> - void setLambdaFromQP(const Slice& slice, int qp);
> + int setLambdaFromQP(const CUData& ctu, int qp); /* returns real quant QP in valid spec range */
>
> // mark temp RD entropy contexts as uninitialized; useful for finding loads without stores
> void invalidateContexts(int fromDepth);
--
Steve Borho
More information about the x265-devel
mailing list