[x265] quant: add m_tqBypass
Steve Borho
steve at borho.org
Wed Jan 28 17:12:32 CET 2015
On 01/28, Satoshi Nakagawa wrote:
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1422456196 -32400
> # Wed Jan 28 23:43:16 2015 +0900
> # Node ID 231f1a91eaefdd5e79bc250b0c505178a89f185e
> # Parent c1371f175178edcc0d0402a745b7478aa240c3b4
> quant: add m_tqBypass
>
> diff -r c1371f175178 -r 231f1a91eaef source/common/deblock.cpp
> --- a/source/common/deblock.cpp Mon Jan 26 15:31:42 2015 -0600
> +++ b/source/common/deblock.cpp Wed Jan 28 23:43:16 2015 +0900
> @@ -401,14 +401,22 @@
> if (!bs)
> continue;
>
> - int32_t qpQ = cuQ->m_qp[partQ];
> -
> // Derive neighboring PU index
> uint32_t partP;
> const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
>
> + if (bCheckNoFilter)
> + {
> + // check if each of PUs is lossless coded
> + maskP = cuP->m_tqBypass[partP] - 1;
> + maskQ = cuQ->m_tqBypass[partQ] - 1;
> + if (!(maskP | maskQ))
> + continue;
> + }
> +
> + int32_t qpQ = cuQ->m_qp[partQ];
> int32_t qpP = cuP->m_qp[partP];
> - int32_t qp = (qpP + qpQ + 1) >> 1;
> + int32_t qp = (qpP + qpQ + 1) >> 1;
>
> int32_t indexB = x265_clip3(0, QP_MAX_SPEC, qp + betaOffset);
>
> @@ -428,13 +436,6 @@
> if (d >= beta)
> continue;
>
> - if (bCheckNoFilter)
> - {
> - // check if each of PUs is lossless coded
> - maskP = (cuP->m_tqBypass[partP] ? 0 : -1);
> - maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1);
> - }
> -
> int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
> int32_t tc = s_tcTable[indexTC] << bitdepthShift;
>
> @@ -506,33 +507,29 @@
> if (bs <= 1)
> continue;
>
> - int32_t qpQ = cuQ->m_qp[partQ];
> -
> // Derive neighboring PU index
> uint32_t partP;
> const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
>
> - int32_t qpP = cuP->m_qp[partP];
> -
> if (bCheckNoFilter)
> {
> // check if each of PUs is lossless coded
> maskP = (cuP->m_tqBypass[partP] ? 0 : -1);
> maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1);
> + if (!(maskP | maskQ))
> + continue;
> }
>
> + int32_t qpQ = cuQ->m_qp[partQ];
> + int32_t qpP = cuP->m_qp[partP];
> + int32_t qpA = (qpP + qpQ + 1) >> 1;
> +
> intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
> for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
> {
> - int32_t chromaQPOffset = pps->chromaQpOffset[chromaIdx];
> - int32_t qp = ((qpP + qpQ + 1) >> 1) + chromaQPOffset;
> + int32_t qp = qpA + pps->chromaQpOffset[chromaIdx];
> if (qp >= 30)
> - {
> - if (chFmt == X265_CSP_I420)
> - qp = g_chromaScale[qp];
> - else
> - qp = X265_MIN(qp, 51);
> - }
> + qp = chFmt == X265_CSP_I420 ? g_chromaScale[qp] : X265_MIN(qp, 51);
>
> int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
> const int32_t bitdepthShift = X265_DEPTH - 8;
> diff -r c1371f175178 -r 231f1a91eaef source/common/quant.cpp
> --- a/source/common/quant.cpp Mon Jan 26 15:31:42 2015 -0600
> +++ b/source/common/quant.cpp Wed Jan 28 23:43:16 2015 +0900
> @@ -169,6 +169,7 @@
> m_resiDctCoeff = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE * 2);
> m_fencDctCoeff = m_resiDctCoeff + (MAX_TR_SIZE * MAX_TR_SIZE);
> m_fencShortBuf = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE);
> + m_tqBypass = false;
>
> return m_resiDctCoeff && m_fencShortBuf;
> }
> @@ -326,7 +327,7 @@
> coeff_t* coeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip)
> {
> const uint32_t sizeIdx = log2TrSize - 2;
> - if (cu.m_tqBypass[absPartIdx])
> + if (m_tqBypass)
> {
> X265_CHECK(log2TrSize >= 2 && log2TrSize <= 5, "Block size mistake!\n");
> return primitives.cu[sizeIdx].copy_cnt(coeff, residual, resiStride);
> @@ -406,11 +407,11 @@
> }
> }
>
> -void Quant::invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
> +void Quant::invtransformNxN(int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
> uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig)
> {
> const uint32_t sizeIdx = log2TrSize - 2;
> - if (transQuantBypass)
> + if (m_tqBypass)
> {
> primitives.cu[sizeIdx].cpy1Dto2D_shl(residual, coeff, resiStride, 0);
> return;
> diff -r c1371f175178 -r 231f1a91eaef source/common/quant.h
> --- a/source/common/quant.h Mon Jan 26 15:31:42 2015 -0600
> +++ b/source/common/quant.h Wed Jan 28 23:43:16 2015 +0900
> @@ -93,6 +93,7 @@
>
> NoiseReduction* m_nr;
> NoiseReduction* m_frameNr; // Array of NR structures, one for each frameEncoder
> + bool m_tqBypass;
>
> Quant();
> ~Quant();
> @@ -107,7 +108,7 @@
> uint32_t transformNxN(const CUData& cu, const pixel* fenc, uint32_t fencStride, const int16_t* residual, uint32_t resiStride, coeff_t* coeff,
> uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip);
>
> - void invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
> + void invtransformNxN(int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
> uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig);
>
> /* static methods shared with entropy.cpp */
> diff -r c1371f175178 -r 231f1a91eaef source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Mon Jan 26 15:31:42 2015 -0600
> +++ b/source/encoder/analysis.cpp Wed Jan 28 23:43:16 2015 +0900
> @@ -924,6 +924,7 @@
> /* generate recon pixels with no rate distortion considerations */
> CUData& cu = md.bestMode->cu;
> m_quant.setQPforQuant(cu);
> + m_quant.m_tqBypass = !!cu.m_tqBypass[0];
this patch looks ok except for the fact that this assignment looks like
it should be done in setupQPForQuant(cu) itself.
> uint32_t tuDepthRange[2];
> cu.getInterTUQtDepthRange(tuDepthRange, 0);
> @@ -949,6 +950,7 @@
> /* generate recon pixels with no rate distortion considerations */
> CUData& cu = md.bestMode->cu;
> m_quant.setQPforQuant(cu);
> + m_quant.m_tqBypass = !!cu.m_tqBypass[0];
>
> uint32_t tuDepthRange[2];
> cu.getIntraTUQtDepthRange(tuDepthRange, 0);
> @@ -1735,6 +1737,7 @@
>
> cu.copyFromPic(ctu, cuGeom);
> m_quant.setQPforQuant(cu);
> + m_quant.m_tqBypass = !!cu.m_tqBypass[0];
>
> Yuv& fencYuv = m_modeDepth[cuGeom.depth].fencYuv;
> if (cuGeom.depth)
> diff -r c1371f175178 -r 231f1a91eaef source/encoder/entropy.cpp
> --- a/source/encoder/entropy.cpp Mon Jan 26 15:31:42 2015 -0600
> +++ b/source/encoder/entropy.cpp Wed Jan 28 23:43:16 2015 +0900
> @@ -1419,7 +1419,7 @@
>
> bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled && !tqBypass;
>
> - if (cu.m_slice->m_pps->bTransformSkipEnabled && !tqBypass && (trSize == 4))
> + if (log2TrSize <= MAX_LOG2_TS_SIZE && !tqBypass && cu.m_slice->m_pps->bTransformSkipEnabled)
> codeTransformSkipFlags(cu.m_transformSkip[ttype][absPartIdx], ttype);
>
> bool bIsLuma = ttype == TEXT_LUMA;
> diff -r c1371f175178 -r 231f1a91eaef source/encoder/search.cpp
> --- a/source/encoder/search.cpp Mon Jan 26 15:31:42 2015 -0600
> +++ b/source/encoder/search.cpp Wed Jan 28 23:43:16 2015 +0900
> @@ -294,7 +294,7 @@
> uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
> if (numSig)
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
> + m_quant.invtransformNxN(residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
> primitives.cu[sizeIdx].add_ps(reconQt, reconQtStride, pred, residual, stride, stride);
> }
> else
> @@ -437,7 +437,7 @@
> uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
> uint32_t tuSize = 1 << log2TrSize;
>
> - X265_CHECK(tuSize == MAX_TS_SIZE, "transform skip is only possible at 4x4 TUs\n");
> + X265_CHECK(tuSize <= MAX_TS_SIZE, "transform skip is only possible at 4x4 TUs\n");
>
> CUData& cu = mode.cu;
> Yuv* predYuv = &mode.predYuv;
> @@ -495,7 +495,7 @@
> uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSize, TEXT_LUMA, absPartIdx, useTSkip);
> if (numSig)
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTSkip, numSig);
> + m_quant.invtransformNxN(residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTSkip, numSig);
> primitives.cu[sizeIdx].add_ps(tmpRecon, tmpReconStride, pred, residual, stride, stride);
> }
> else if (useTSkip)
> @@ -645,7 +645,7 @@
> uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
> if (numSig)
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
> + m_quant.invtransformNxN(residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
> primitives.cu[sizeIdx].add_ps(picReconY, picStride, pred, residual, stride, stride);
> cu.setCbfSubParts(1 << tuDepth, TEXT_LUMA, absPartIdx, fullDepth);
> }
> @@ -819,7 +819,7 @@
> uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
> if (numSig)
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
> + m_quant.invtransformNxN(residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
> primitives.cu[sizeIdxC].add_ps(reconQt, reconQtStride, pred, residual, stride, stride);
> cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
> }
> @@ -923,7 +923,7 @@
> uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSizeC, ttype, absPartIdxC, useTSkip);
> if (numSig)
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeff, log2TrSizeC, ttype, true, useTSkip, numSig);
> + m_quant.invtransformNxN(residual, stride, coeff, log2TrSizeC, ttype, true, useTSkip, numSig);
> primitives.cu[sizeIdxC].add_ps(recon, reconStride, pred, residual, stride, stride);
> cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
> }
> @@ -1110,7 +1110,7 @@
> uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
> if (numSig)
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
> + m_quant.invtransformNxN(residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
> primitives.cu[sizeIdxC].add_ps(picReconC, picStride, pred, residual, stride, stride);
> cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
> }
> @@ -1137,6 +1137,7 @@
>
> cu.setPartSizeSubParts(partSize);
> cu.setPredModeSubParts(MODE_INTRA);
> + m_quant.m_tqBypass = !!cu.m_tqBypass[0];
>
> uint32_t tuDepthRange[2];
> cu.getIntraTUQtDepthRange(tuDepthRange, 0);
> @@ -1353,6 +1354,7 @@
> X265_CHECK(!m_slice->isIntra(), "encodeIntraInInter does not expect to be used in I slices\n");
>
> m_quant.setQPforQuant(cu);
> + m_quant.m_tqBypass = !!cu.m_tqBypass[0];
>
> uint32_t tuDepthRange[2];
> cu.getIntraTUQtDepthRange(tuDepthRange, 0);
> @@ -2493,7 +2495,9 @@
> uint32_t log2CUSize = cuGeom.log2CUSize;
> int sizeIdx = log2CUSize - 2;
>
> + uint32_t tqBypass = cu.m_tqBypass[0];
> m_quant.setQPforQuant(interMode.cu);
> + m_quant.m_tqBypass = !!tqBypass;
>
> resiYuv->subtract(*fencYuv, *predYuv, log2CUSize);
>
> @@ -2505,7 +2509,7 @@
> Cost costs;
> estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);
>
> - if (!cu.m_tqBypass[0])
> + if (!tqBypass)
> {
> uint32_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
> cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
> @@ -2540,15 +2544,16 @@
> /* calculate signal bits for inter/merge/skip coded CU */
> m_entropyCoder.load(m_rqt[depth].cur);
>
> + m_entropyCoder.resetBits();
> + if (m_slice->m_pps->bTransquantBypassEnabled)
> + m_entropyCoder.codeCUTransquantBypassFlag(tqBypass);
> +
> uint32_t coeffBits, bits;
> if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N && !cu.getQtRootCbf(0))
> {
> cu.setPredModeSubParts(MODE_SKIP);
>
> /* Merge/Skip */
> - m_entropyCoder.resetBits();
> - if (m_slice->m_pps->bTransquantBypassEnabled)
> - m_entropyCoder.codeCUTransquantBypassFlag(cu.m_tqBypass[0]);
> m_entropyCoder.codeSkipFlag(cu, 0);
> m_entropyCoder.codeMergeIndex(cu, 0);
> coeffBits = 0;
> @@ -2556,9 +2561,6 @@
> }
> else
> {
> - m_entropyCoder.resetBits();
> - if (m_slice->m_pps->bTransquantBypassEnabled)
> - m_entropyCoder.codeCUTransquantBypassFlag(cu.m_tqBypass[0]);
> m_entropyCoder.codeSkipFlag(cu, 0);
> m_entropyCoder.codePredMode(cu.m_predMode[0]);
> m_entropyCoder.codePartSize(cu, 0, cuGeom.depth);
> @@ -2639,7 +2641,7 @@
>
> if (numSigY)
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[absPartIdx], curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
> + m_quant.invtransformNxN(curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
> cu.setCbfSubParts(setCbf, TEXT_LUMA, absPartIdx, depth);
> }
> else
> @@ -2672,7 +2674,7 @@
> uint32_t numSigU = m_quant.transformNxN(cu, fencCb, fencYuv->m_csize, curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, absPartIdxC, false);
> if (numSigU)
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU);
> + m_quant.invtransformNxN(curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU);
> cu.setCbfPartRange(setCbf, TEXT_CHROMA_U, absPartIdxC, tuIterator.absPartIdxStep);
> }
> else
> @@ -2686,7 +2688,7 @@
> uint32_t numSigV = m_quant.transformNxN(cu, fencCr, fencYuv->m_csize, curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, absPartIdxC, false);
> if (numSigV)
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV);
> + m_quant.invtransformNxN(curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV);
> cu.setCbfPartRange(setCbf, TEXT_CHROMA_V, absPartIdxC, tuIterator.absPartIdxStep);
> }
> else
> @@ -2832,7 +2834,7 @@
>
> if (cbfFlag[TEXT_LUMA][0])
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[absPartIdx], curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSig[TEXT_LUMA][0]); //this is for inter mode only
> + m_quant.invtransformNxN(curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSig[TEXT_LUMA][0]); //this is for inter mode only
>
> // non-zero cost calculation for luma - This is an approximation
> // finally we have to encode correct cbf after comparing with null cost
> @@ -2931,7 +2933,7 @@
>
> if (cbfFlag[chromaId][tuIterator.section])
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], curResiC, strideResiC, coeffCurC + subTUOffset,
> + m_quant.invtransformNxN(curResiC, strideResiC, coeffCurC + subTUOffset,
> log2TrSizeC, (TextType)chromaId, false, false, numSig[chromaId][tuIterator.section]);
>
> // non-zero cost calculation for luma, same as luma - This is an approximation
> @@ -3023,7 +3025,7 @@
> m_entropyCoder.codeCoeffNxN(cu, tsCoeffY, absPartIdx, log2TrSize, TEXT_LUMA);
> const uint32_t skipSingleBitsY = m_entropyCoder.getNumberOfWrittenBits();
>
> - m_quant.invtransformNxN(cu.m_tqBypass[absPartIdx], tsResiY, trSize, tsCoeffY, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
> + m_quant.invtransformNxN(tsResiY, trSize, tsCoeffY, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
>
> nonZeroDistY = primitives.cu[partSize].sse_ss(resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size, tsResiY, trSize);
>
> @@ -3094,7 +3096,7 @@
> m_entropyCoder.codeCoeffNxN(cu, tsCoeffC, absPartIdxC, log2TrSizeC, (TextType)chromaId);
> singleBits[chromaId][tuIterator.section] = m_entropyCoder.getNumberOfWrittenBits();
>
> - m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], tsResiC, trSizeC, tsCoeffC,
> + m_quant.invtransformNxN(tsResiC, trSizeC, tsCoeffC,
> log2TrSizeC, (TextType)chromaId, false, true, numSigTSkipC);
> uint32_t dist = primitives.cu[partSizeC].sse_ss(resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize, tsResiC, trSizeC);
> nonZeroDistC = m_rdCost.scaleChromaDist(chromaId, dist);
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list