[x265] quant: add m_tqBypass
Steve Borho
steve at borho.org
Fri Jan 30 18:22:11 CET 2015
On 01/30, Satoshi Nakagawa wrote:
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1422623908 -32400
> # Fri Jan 30 22:18:28 2015 +0900
> # Node ID ee56555c683e73c4207a98e6626167d4445a8d76
> # Parent 5e5dc3763f6386da9722903033a2b9dd263a5226
> quant: add m_tqBypass
queued, thanks
> diff -r 5e5dc3763f63 -r ee56555c683e source/common/deblock.cpp
> --- a/source/common/deblock.cpp Thu Jan 29 10:37:54 2015 -0600
> +++ b/source/common/deblock.cpp Fri Jan 30 22:18:28 2015 +0900
> @@ -401,14 +401,22 @@
> if (!bs)
> continue;
>
> - int32_t qpQ = cuQ->m_qp[partQ];
> -
> // Derive neighboring PU index
> uint32_t partP;
> const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
>
> + if (bCheckNoFilter)
> + {
> + // check if each of PUs is lossless coded
> + maskP = cuP->m_tqBypass[partP] - 1;
> + maskQ = cuQ->m_tqBypass[partQ] - 1;
> + if (!(maskP | maskQ))
> + continue;
> + }
> +
> + int32_t qpQ = cuQ->m_qp[partQ];
> int32_t qpP = cuP->m_qp[partP];
> - int32_t qp = (qpP + qpQ + 1) >> 1;
> + int32_t qp = (qpP + qpQ + 1) >> 1;
>
> int32_t indexB = x265_clip3(0, QP_MAX_SPEC, qp + betaOffset);
>
> @@ -428,13 +436,6 @@
> if (d >= beta)
> continue;
>
> - if (bCheckNoFilter)
> - {
> - // check if each of PUs is lossless coded
> - maskP = (cuP->m_tqBypass[partP] ? 0 : -1);
> - maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1);
> - }
> -
> int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
> int32_t tc = s_tcTable[indexTC] << bitdepthShift;
>
> @@ -506,33 +507,29 @@
> if (bs <= 1)
> continue;
>
> - int32_t qpQ = cuQ->m_qp[partQ];
> -
> // Derive neighboring PU index
> uint32_t partP;
> const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
>
> - int32_t qpP = cuP->m_qp[partP];
> -
> if (bCheckNoFilter)
> {
> // check if each of PUs is lossless coded
> maskP = (cuP->m_tqBypass[partP] ? 0 : -1);
> maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1);
> + if (!(maskP | maskQ))
> + continue;
> }
>
> + int32_t qpQ = cuQ->m_qp[partQ];
> + int32_t qpP = cuP->m_qp[partP];
> + int32_t qpA = (qpP + qpQ + 1) >> 1;
> +
> intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
> for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
> {
> - int32_t chromaQPOffset = pps->chromaQpOffset[chromaIdx];
> - int32_t qp = ((qpP + qpQ + 1) >> 1) + chromaQPOffset;
> + int32_t qp = qpA + pps->chromaQpOffset[chromaIdx];
> if (qp >= 30)
> - {
> - if (chFmt == X265_CSP_I420)
> - qp = g_chromaScale[qp];
> - else
> - qp = X265_MIN(qp, 51);
> - }
> + qp = chFmt == X265_CSP_I420 ? g_chromaScale[qp] : X265_MIN(qp, 51);
>
> int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
> const int32_t bitdepthShift = X265_DEPTH - 8;
> diff -r 5e5dc3763f63 -r ee56555c683e source/common/quant.cpp
> --- a/source/common/quant.cpp Thu Jan 29 10:37:54 2015 -0600
> +++ b/source/common/quant.cpp Fri Jan 30 22:18:28 2015 +0900
> @@ -169,6 +169,7 @@
> m_resiDctCoeff = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE * 2);
> m_fencDctCoeff = m_resiDctCoeff + (MAX_TR_SIZE * MAX_TR_SIZE);
> m_fencShortBuf = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE);
> + m_tqBypass = false;
>
> return m_resiDctCoeff && m_fencShortBuf;
> }
> @@ -190,13 +191,16 @@
> X265_FREE(m_fencShortBuf);
> }
>
> -void Quant::setQPforQuant(const CUData& ctu)
> +void Quant::setQPforQuant(const CUData& cu)
> {
> - m_nr = m_frameNr ? &m_frameNr[ctu.m_encData->m_frameEncoderID] : NULL;
> - int qpy = ctu.m_qp[0];
> + m_tqBypass = !!cu.m_tqBypass[0];
> + if (m_tqBypass)
> + return;
> + m_nr = m_frameNr ? &m_frameNr[cu.m_encData->m_frameEncoderID] : NULL;
> + int qpy = cu.m_qp[0];
> m_qpParam[TEXT_LUMA].setQpParam(qpy + QP_BD_OFFSET);
> - setChromaQP(qpy + ctu.m_slice->m_pps->chromaQpOffset[0], TEXT_CHROMA_U, ctu.m_chromaFormat);
> - setChromaQP(qpy + ctu.m_slice->m_pps->chromaQpOffset[1], TEXT_CHROMA_V, ctu.m_chromaFormat);
> + setChromaQP(qpy + cu.m_slice->m_pps->chromaQpOffset[0], TEXT_CHROMA_U, cu.m_chromaFormat);
> + setChromaQP(qpy + cu.m_slice->m_pps->chromaQpOffset[1], TEXT_CHROMA_V, cu.m_chromaFormat);
> }
>
> void Quant::setChromaQP(int qpin, TextType ttype, int chFmt)
> @@ -326,7 +330,7 @@
> coeff_t* coeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip)
> {
> const uint32_t sizeIdx = log2TrSize - 2;
> - if (cu.m_tqBypass[absPartIdx])
> + if (m_tqBypass)
> {
> X265_CHECK(log2TrSize >= 2 && log2TrSize <= 5, "Block size mistake!\n");
> return primitives.cu[sizeIdx].copy_cnt(coeff, residual, resiStride);
> @@ -406,11 +410,11 @@
> }
> }
>
> -void Quant::invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
> +void Quant::invtransformNxN(int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
> uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig)
> {
> const uint32_t sizeIdx = log2TrSize - 2;
> - if (transQuantBypass)
> + if (m_tqBypass)
> {
> primitives.cu[sizeIdx].cpy1Dto2D_shl(residual, coeff, resiStride, 0);
> return;
> diff -r 5e5dc3763f63 -r ee56555c683e source/common/quant.h
> --- a/source/common/quant.h Thu Jan 29 10:37:54 2015 -0600
> +++ b/source/common/quant.h Fri Jan 30 22:18:28 2015 +0900
> @@ -93,6 +93,7 @@
>
> NoiseReduction* m_nr;
> NoiseReduction* m_frameNr; // Array of NR structures, one for each frameEncoder
> + bool m_tqBypass;
>
> Quant();
> ~Quant();
> @@ -102,12 +103,12 @@
> bool allocNoiseReduction(const x265_param& param);
>
> /* CU setup */
> - void setQPforQuant(const CUData& ctu);
> + void setQPforQuant(const CUData& cu);
>
> uint32_t transformNxN(const CUData& cu, const pixel* fenc, uint32_t fencStride, const int16_t* residual, uint32_t resiStride, coeff_t* coeff,
> uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip);
>
> - void invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
> + void invtransformNxN(int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
> uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig);
>
> /* static methods shared with entropy.cpp */
> diff -r 5e5dc3763f63 -r ee56555c683e source/encoder/entropy.cpp
> --- a/source/encoder/entropy.cpp Thu Jan 29 10:37:54 2015 -0600
> +++ b/source/encoder/entropy.cpp Fri Jan 30 22:18:28 2015 +0900
> @@ -1419,7 +1419,7 @@
>
> bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled && !tqBypass;
>
> - if (cu.m_slice->m_pps->bTransformSkipEnabled && !tqBypass && (trSize == 4))
> + if (log2TrSize <= MAX_LOG2_TS_SIZE && !tqBypass && cu.m_slice->m_pps->bTransformSkipEnabled)
> codeTransformSkipFlags(cu.m_transformSkip[ttype][absPartIdx], ttype);
>
> bool bIsLuma = ttype == TEXT_LUMA;
> diff -r 5e5dc3763f63 -r ee56555c683e source/encoder/search.cpp
> --- a/source/encoder/search.cpp Thu Jan 29 10:37:54 2015 -0600
> +++ b/source/encoder/search.cpp Fri Jan 30 22:18:28 2015 +0900
> @@ -294,7 +294,7 @@
> uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
> if (numSig)
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
> + m_quant.invtransformNxN(residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
> primitives.cu[sizeIdx].add_ps(reconQt, reconQtStride, pred, residual, stride, stride);
> }
> else
> @@ -437,7 +437,7 @@
> uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
> uint32_t tuSize = 1 << log2TrSize;
>
> - X265_CHECK(tuSize == MAX_TS_SIZE, "transform skip is only possible at 4x4 TUs\n");
> + X265_CHECK(tuSize <= MAX_TS_SIZE, "transform skip is only possible at 4x4 TUs\n");
>
> CUData& cu = mode.cu;
> Yuv* predYuv = &mode.predYuv;
> @@ -495,7 +495,7 @@
> uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSize, TEXT_LUMA, absPartIdx, useTSkip);
> if (numSig)
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTSkip, numSig);
> + m_quant.invtransformNxN(residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTSkip, numSig);
> primitives.cu[sizeIdx].add_ps(tmpRecon, tmpReconStride, pred, residual, stride, stride);
> }
> else if (useTSkip)
> @@ -645,7 +645,7 @@
> uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
> if (numSig)
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
> + m_quant.invtransformNxN(residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
> primitives.cu[sizeIdx].add_ps(picReconY, picStride, pred, residual, stride, stride);
> cu.setCbfSubParts(1 << tuDepth, TEXT_LUMA, absPartIdx, fullDepth);
> }
> @@ -819,7 +819,7 @@
> uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
> if (numSig)
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
> + m_quant.invtransformNxN(residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
> primitives.cu[sizeIdxC].add_ps(reconQt, reconQtStride, pred, residual, stride, stride);
> cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
> }
> @@ -923,7 +923,7 @@
> uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSizeC, ttype, absPartIdxC, useTSkip);
> if (numSig)
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeff, log2TrSizeC, ttype, true, useTSkip, numSig);
> + m_quant.invtransformNxN(residual, stride, coeff, log2TrSizeC, ttype, true, useTSkip, numSig);
> primitives.cu[sizeIdxC].add_ps(recon, reconStride, pred, residual, stride, stride);
> cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
> }
> @@ -1110,7 +1110,7 @@
> uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
> if (numSig)
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
> + m_quant.invtransformNxN(residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
> primitives.cu[sizeIdxC].add_ps(picReconC, picStride, pred, residual, stride, stride);
> cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
> }
> @@ -1137,6 +1137,7 @@
>
> cu.setPartSizeSubParts(partSize);
> cu.setPredModeSubParts(MODE_INTRA);
> + m_quant.m_tqBypass = !!cu.m_tqBypass[0];
>
> uint32_t tuDepthRange[2];
> cu.getIntraTUQtDepthRange(tuDepthRange, 0);
> @@ -2493,6 +2494,7 @@
> uint32_t log2CUSize = cuGeom.log2CUSize;
> int sizeIdx = log2CUSize - 2;
>
> + uint32_t tqBypass = cu.m_tqBypass[0];
> m_quant.setQPforQuant(interMode.cu);
>
> resiYuv->subtract(*fencYuv, *predYuv, log2CUSize);
> @@ -2505,7 +2507,7 @@
> Cost costs;
> estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);
>
> - if (!cu.m_tqBypass[0])
> + if (!tqBypass)
> {
> uint32_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
> cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
> @@ -2540,15 +2542,16 @@
> /* calculate signal bits for inter/merge/skip coded CU */
> m_entropyCoder.load(m_rqt[depth].cur);
>
> + m_entropyCoder.resetBits();
> + if (m_slice->m_pps->bTransquantBypassEnabled)
> + m_entropyCoder.codeCUTransquantBypassFlag(tqBypass);
> +
> uint32_t coeffBits, bits;
> if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N && !cu.getQtRootCbf(0))
> {
> cu.setPredModeSubParts(MODE_SKIP);
>
> /* Merge/Skip */
> - m_entropyCoder.resetBits();
> - if (m_slice->m_pps->bTransquantBypassEnabled)
> - m_entropyCoder.codeCUTransquantBypassFlag(cu.m_tqBypass[0]);
> m_entropyCoder.codeSkipFlag(cu, 0);
> m_entropyCoder.codeMergeIndex(cu, 0);
> coeffBits = 0;
> @@ -2556,9 +2559,6 @@
> }
> else
> {
> - m_entropyCoder.resetBits();
> - if (m_slice->m_pps->bTransquantBypassEnabled)
> - m_entropyCoder.codeCUTransquantBypassFlag(cu.m_tqBypass[0]);
> m_entropyCoder.codeSkipFlag(cu, 0);
> m_entropyCoder.codePredMode(cu.m_predMode[0]);
> m_entropyCoder.codePartSize(cu, 0, cuGeom.depth);
> @@ -2639,7 +2639,7 @@
>
> if (numSigY)
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[absPartIdx], curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
> + m_quant.invtransformNxN(curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
> cu.setCbfSubParts(setCbf, TEXT_LUMA, absPartIdx, depth);
> }
> else
> @@ -2672,7 +2672,7 @@
> uint32_t numSigU = m_quant.transformNxN(cu, fencCb, fencYuv->m_csize, curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, absPartIdxC, false);
> if (numSigU)
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU);
> + m_quant.invtransformNxN(curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU);
> cu.setCbfPartRange(setCbf, TEXT_CHROMA_U, absPartIdxC, tuIterator.absPartIdxStep);
> }
> else
> @@ -2686,7 +2686,7 @@
> uint32_t numSigV = m_quant.transformNxN(cu, fencCr, fencYuv->m_csize, curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, absPartIdxC, false);
> if (numSigV)
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV);
> + m_quant.invtransformNxN(curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV);
> cu.setCbfPartRange(setCbf, TEXT_CHROMA_V, absPartIdxC, tuIterator.absPartIdxStep);
> }
> else
> @@ -2832,7 +2832,7 @@
>
> if (cbfFlag[TEXT_LUMA][0])
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[absPartIdx], curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSig[TEXT_LUMA][0]); //this is for inter mode only
> + m_quant.invtransformNxN(curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSig[TEXT_LUMA][0]); //this is for inter mode only
>
> // non-zero cost calculation for luma - This is an approximation
> // finally we have to encode correct cbf after comparing with null cost
> @@ -2931,7 +2931,7 @@
>
> if (cbfFlag[chromaId][tuIterator.section])
> {
> - m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], curResiC, strideResiC, coeffCurC + subTUOffset,
> + m_quant.invtransformNxN(curResiC, strideResiC, coeffCurC + subTUOffset,
> log2TrSizeC, (TextType)chromaId, false, false, numSig[chromaId][tuIterator.section]);
>
> // non-zero cost calculation for luma, same as luma - This is an approximation
> @@ -3023,7 +3023,7 @@
> m_entropyCoder.codeCoeffNxN(cu, tsCoeffY, absPartIdx, log2TrSize, TEXT_LUMA);
> const uint32_t skipSingleBitsY = m_entropyCoder.getNumberOfWrittenBits();
>
> - m_quant.invtransformNxN(cu.m_tqBypass[absPartIdx], tsResiY, trSize, tsCoeffY, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
> + m_quant.invtransformNxN(tsResiY, trSize, tsCoeffY, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
>
> nonZeroDistY = primitives.cu[partSize].sse_ss(resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size, tsResiY, trSize);
>
> @@ -3094,7 +3094,7 @@
> m_entropyCoder.codeCoeffNxN(cu, tsCoeffC, absPartIdxC, log2TrSizeC, (TextType)chromaId);
> singleBits[chromaId][tuIterator.section] = m_entropyCoder.getNumberOfWrittenBits();
>
> - m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], tsResiC, trSizeC, tsCoeffC,
> + m_quant.invtransformNxN(tsResiC, trSizeC, tsCoeffC,
> log2TrSizeC, (TextType)chromaId, false, true, numSigTSkipC);
> uint32_t dist = primitives.cu[partSizeC].sse_ss(resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize, tsResiC, trSizeC);
> nonZeroDistC = m_rdCost.scaleChromaDist(chromaId, dist);
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list