[x265] quant: add m_tqBypass
Satoshi Nakagawa
nakagawa424 at oki.com
Wed Jan 28 15:46:38 CET 2015
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1422456196 -32400
# Wed Jan 28 23:43:16 2015 +0900
# Node ID 231f1a91eaefdd5e79bc250b0c505178a89f185e
# Parent c1371f175178edcc0d0402a745b7478aa240c3b4
quant: add m_tqBypass
diff -r c1371f175178 -r 231f1a91eaef source/common/deblock.cpp
--- a/source/common/deblock.cpp Mon Jan 26 15:31:42 2015 -0600
+++ b/source/common/deblock.cpp Wed Jan 28 23:43:16 2015 +0900
@@ -401,14 +401,22 @@
if (!bs)
continue;
- int32_t qpQ = cuQ->m_qp[partQ];
-
// Derive neighboring PU index
uint32_t partP;
const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
+ if (bCheckNoFilter)
+ {
+ // check if each of PUs is lossless coded
+ maskP = cuP->m_tqBypass[partP] - 1;
+ maskQ = cuQ->m_tqBypass[partQ] - 1;
+ if (!(maskP | maskQ))
+ continue;
+ }
+
+ int32_t qpQ = cuQ->m_qp[partQ];
int32_t qpP = cuP->m_qp[partP];
- int32_t qp = (qpP + qpQ + 1) >> 1;
+ int32_t qp = (qpP + qpQ + 1) >> 1;
int32_t indexB = x265_clip3(0, QP_MAX_SPEC, qp + betaOffset);
@@ -428,13 +436,6 @@
if (d >= beta)
continue;
- if (bCheckNoFilter)
- {
- // check if each of PUs is lossless coded
- maskP = (cuP->m_tqBypass[partP] ? 0 : -1);
- maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1);
- }
-
int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
int32_t tc = s_tcTable[indexTC] << bitdepthShift;
@@ -506,33 +507,29 @@
if (bs <= 1)
continue;
- int32_t qpQ = cuQ->m_qp[partQ];
-
// Derive neighboring PU index
uint32_t partP;
const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
- int32_t qpP = cuP->m_qp[partP];
-
if (bCheckNoFilter)
{
// check if each of PUs is lossless coded
maskP = (cuP->m_tqBypass[partP] ? 0 : -1);
maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1);
+ if (!(maskP | maskQ))
+ continue;
}
+ int32_t qpQ = cuQ->m_qp[partQ];
+ int32_t qpP = cuP->m_qp[partP];
+ int32_t qpA = (qpP + qpQ + 1) >> 1;
+
intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
{
- int32_t chromaQPOffset = pps->chromaQpOffset[chromaIdx];
- int32_t qp = ((qpP + qpQ + 1) >> 1) + chromaQPOffset;
+ int32_t qp = qpA + pps->chromaQpOffset[chromaIdx];
if (qp >= 30)
- {
- if (chFmt == X265_CSP_I420)
- qp = g_chromaScale[qp];
- else
- qp = X265_MIN(qp, 51);
- }
+ qp = chFmt == X265_CSP_I420 ? g_chromaScale[qp] : X265_MIN(qp, 51);
int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
const int32_t bitdepthShift = X265_DEPTH - 8;
diff -r c1371f175178 -r 231f1a91eaef source/common/quant.cpp
--- a/source/common/quant.cpp Mon Jan 26 15:31:42 2015 -0600
+++ b/source/common/quant.cpp Wed Jan 28 23:43:16 2015 +0900
@@ -169,6 +169,7 @@
m_resiDctCoeff = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE * 2);
m_fencDctCoeff = m_resiDctCoeff + (MAX_TR_SIZE * MAX_TR_SIZE);
m_fencShortBuf = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE);
+ m_tqBypass = false;
return m_resiDctCoeff && m_fencShortBuf;
}
@@ -326,7 +327,7 @@
coeff_t* coeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip)
{
const uint32_t sizeIdx = log2TrSize - 2;
- if (cu.m_tqBypass[absPartIdx])
+ if (m_tqBypass)
{
X265_CHECK(log2TrSize >= 2 && log2TrSize <= 5, "Block size mistake!\n");
return primitives.cu[sizeIdx].copy_cnt(coeff, residual, resiStride);
@@ -406,11 +407,11 @@
}
}
-void Quant::invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
+void Quant::invtransformNxN(int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig)
{
const uint32_t sizeIdx = log2TrSize - 2;
- if (transQuantBypass)
+ if (m_tqBypass)
{
primitives.cu[sizeIdx].cpy1Dto2D_shl(residual, coeff, resiStride, 0);
return;
diff -r c1371f175178 -r 231f1a91eaef source/common/quant.h
--- a/source/common/quant.h Mon Jan 26 15:31:42 2015 -0600
+++ b/source/common/quant.h Wed Jan 28 23:43:16 2015 +0900
@@ -93,6 +93,7 @@
NoiseReduction* m_nr;
NoiseReduction* m_frameNr; // Array of NR structures, one for each frameEncoder
+ bool m_tqBypass;
Quant();
~Quant();
@@ -107,7 +108,7 @@
uint32_t transformNxN(const CUData& cu, const pixel* fenc, uint32_t fencStride, const int16_t* residual, uint32_t resiStride, coeff_t* coeff,
uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip);
- void invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
+ void invtransformNxN(int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig);
/* static methods shared with entropy.cpp */
diff -r c1371f175178 -r 231f1a91eaef source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Mon Jan 26 15:31:42 2015 -0600
+++ b/source/encoder/analysis.cpp Wed Jan 28 23:43:16 2015 +0900
@@ -924,6 +924,7 @@
/* generate recon pixels with no rate distortion considerations */
CUData& cu = md.bestMode->cu;
m_quant.setQPforQuant(cu);
+ m_quant.m_tqBypass = !!cu.m_tqBypass[0];
uint32_t tuDepthRange[2];
cu.getInterTUQtDepthRange(tuDepthRange, 0);
@@ -949,6 +950,7 @@
/* generate recon pixels with no rate distortion considerations */
CUData& cu = md.bestMode->cu;
m_quant.setQPforQuant(cu);
+ m_quant.m_tqBypass = !!cu.m_tqBypass[0];
uint32_t tuDepthRange[2];
cu.getIntraTUQtDepthRange(tuDepthRange, 0);
@@ -1735,6 +1737,7 @@
cu.copyFromPic(ctu, cuGeom);
m_quant.setQPforQuant(cu);
+ m_quant.m_tqBypass = !!cu.m_tqBypass[0];
Yuv& fencYuv = m_modeDepth[cuGeom.depth].fencYuv;
if (cuGeom.depth)
diff -r c1371f175178 -r 231f1a91eaef source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp Mon Jan 26 15:31:42 2015 -0600
+++ b/source/encoder/entropy.cpp Wed Jan 28 23:43:16 2015 +0900
@@ -1419,7 +1419,7 @@
bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled && !tqBypass;
- if (cu.m_slice->m_pps->bTransformSkipEnabled && !tqBypass && (trSize == 4))
+ if (log2TrSize <= MAX_LOG2_TS_SIZE && !tqBypass && cu.m_slice->m_pps->bTransformSkipEnabled)
codeTransformSkipFlags(cu.m_transformSkip[ttype][absPartIdx], ttype);
bool bIsLuma = ttype == TEXT_LUMA;
diff -r c1371f175178 -r 231f1a91eaef source/encoder/search.cpp
--- a/source/encoder/search.cpp Mon Jan 26 15:31:42 2015 -0600
+++ b/source/encoder/search.cpp Wed Jan 28 23:43:16 2015 +0900
@@ -294,7 +294,7 @@
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
if (numSig)
{
- m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
+ m_quant.invtransformNxN(residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
primitives.cu[sizeIdx].add_ps(reconQt, reconQtStride, pred, residual, stride, stride);
}
else
@@ -437,7 +437,7 @@
uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
uint32_t tuSize = 1 << log2TrSize;
- X265_CHECK(tuSize == MAX_TS_SIZE, "transform skip is only possible at 4x4 TUs\n");
+ X265_CHECK(tuSize <= MAX_TS_SIZE, "transform skip is only possible at 4x4 TUs\n");
CUData& cu = mode.cu;
Yuv* predYuv = &mode.predYuv;
@@ -495,7 +495,7 @@
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSize, TEXT_LUMA, absPartIdx, useTSkip);
if (numSig)
{
- m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTSkip, numSig);
+ m_quant.invtransformNxN(residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTSkip, numSig);
primitives.cu[sizeIdx].add_ps(tmpRecon, tmpReconStride, pred, residual, stride, stride);
}
else if (useTSkip)
@@ -645,7 +645,7 @@
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
if (numSig)
{
- m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
+ m_quant.invtransformNxN(residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
primitives.cu[sizeIdx].add_ps(picReconY, picStride, pred, residual, stride, stride);
cu.setCbfSubParts(1 << tuDepth, TEXT_LUMA, absPartIdx, fullDepth);
}
@@ -819,7 +819,7 @@
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
if (numSig)
{
- m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
+ m_quant.invtransformNxN(residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
primitives.cu[sizeIdxC].add_ps(reconQt, reconQtStride, pred, residual, stride, stride);
cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
}
@@ -923,7 +923,7 @@
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSizeC, ttype, absPartIdxC, useTSkip);
if (numSig)
{
- m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeff, log2TrSizeC, ttype, true, useTSkip, numSig);
+ m_quant.invtransformNxN(residual, stride, coeff, log2TrSizeC, ttype, true, useTSkip, numSig);
primitives.cu[sizeIdxC].add_ps(recon, reconStride, pred, residual, stride, stride);
cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
}
@@ -1110,7 +1110,7 @@
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
if (numSig)
{
- m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
+ m_quant.invtransformNxN(residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
primitives.cu[sizeIdxC].add_ps(picReconC, picStride, pred, residual, stride, stride);
cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
}
@@ -1137,6 +1137,7 @@
cu.setPartSizeSubParts(partSize);
cu.setPredModeSubParts(MODE_INTRA);
+ m_quant.m_tqBypass = !!cu.m_tqBypass[0];
uint32_t tuDepthRange[2];
cu.getIntraTUQtDepthRange(tuDepthRange, 0);
@@ -1353,6 +1354,7 @@
X265_CHECK(!m_slice->isIntra(), "encodeIntraInInter does not expect to be used in I slices\n");
m_quant.setQPforQuant(cu);
+ m_quant.m_tqBypass = !!cu.m_tqBypass[0];
uint32_t tuDepthRange[2];
cu.getIntraTUQtDepthRange(tuDepthRange, 0);
@@ -2493,7 +2495,9 @@
uint32_t log2CUSize = cuGeom.log2CUSize;
int sizeIdx = log2CUSize - 2;
+ uint32_t tqBypass = cu.m_tqBypass[0];
m_quant.setQPforQuant(interMode.cu);
+ m_quant.m_tqBypass = !!tqBypass;
resiYuv->subtract(*fencYuv, *predYuv, log2CUSize);
@@ -2505,7 +2509,7 @@
Cost costs;
estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);
- if (!cu.m_tqBypass[0])
+ if (!tqBypass)
{
uint32_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
@@ -2540,15 +2544,16 @@
/* calculate signal bits for inter/merge/skip coded CU */
m_entropyCoder.load(m_rqt[depth].cur);
+ m_entropyCoder.resetBits();
+ if (m_slice->m_pps->bTransquantBypassEnabled)
+ m_entropyCoder.codeCUTransquantBypassFlag(tqBypass);
+
uint32_t coeffBits, bits;
if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N && !cu.getQtRootCbf(0))
{
cu.setPredModeSubParts(MODE_SKIP);
/* Merge/Skip */
- m_entropyCoder.resetBits();
- if (m_slice->m_pps->bTransquantBypassEnabled)
- m_entropyCoder.codeCUTransquantBypassFlag(cu.m_tqBypass[0]);
m_entropyCoder.codeSkipFlag(cu, 0);
m_entropyCoder.codeMergeIndex(cu, 0);
coeffBits = 0;
@@ -2556,9 +2561,6 @@
}
else
{
- m_entropyCoder.resetBits();
- if (m_slice->m_pps->bTransquantBypassEnabled)
- m_entropyCoder.codeCUTransquantBypassFlag(cu.m_tqBypass[0]);
m_entropyCoder.codeSkipFlag(cu, 0);
m_entropyCoder.codePredMode(cu.m_predMode[0]);
m_entropyCoder.codePartSize(cu, 0, cuGeom.depth);
@@ -2639,7 +2641,7 @@
if (numSigY)
{
- m_quant.invtransformNxN(cu.m_tqBypass[absPartIdx], curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
+ m_quant.invtransformNxN(curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
cu.setCbfSubParts(setCbf, TEXT_LUMA, absPartIdx, depth);
}
else
@@ -2672,7 +2674,7 @@
uint32_t numSigU = m_quant.transformNxN(cu, fencCb, fencYuv->m_csize, curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, absPartIdxC, false);
if (numSigU)
{
- m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU);
+ m_quant.invtransformNxN(curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU);
cu.setCbfPartRange(setCbf, TEXT_CHROMA_U, absPartIdxC, tuIterator.absPartIdxStep);
}
else
@@ -2686,7 +2688,7 @@
uint32_t numSigV = m_quant.transformNxN(cu, fencCr, fencYuv->m_csize, curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, absPartIdxC, false);
if (numSigV)
{
- m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV);
+ m_quant.invtransformNxN(curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV);
cu.setCbfPartRange(setCbf, TEXT_CHROMA_V, absPartIdxC, tuIterator.absPartIdxStep);
}
else
@@ -2832,7 +2834,7 @@
if (cbfFlag[TEXT_LUMA][0])
{
- m_quant.invtransformNxN(cu.m_tqBypass[absPartIdx], curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSig[TEXT_LUMA][0]); //this is for inter mode only
+ m_quant.invtransformNxN(curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSig[TEXT_LUMA][0]); //this is for inter mode only
// non-zero cost calculation for luma - This is an approximation
// finally we have to encode correct cbf after comparing with null cost
@@ -2931,7 +2933,7 @@
if (cbfFlag[chromaId][tuIterator.section])
{
- m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], curResiC, strideResiC, coeffCurC + subTUOffset,
+ m_quant.invtransformNxN(curResiC, strideResiC, coeffCurC + subTUOffset,
log2TrSizeC, (TextType)chromaId, false, false, numSig[chromaId][tuIterator.section]);
// non-zero cost calculation for luma, same as luma - This is an approximation
@@ -3023,7 +3025,7 @@
m_entropyCoder.codeCoeffNxN(cu, tsCoeffY, absPartIdx, log2TrSize, TEXT_LUMA);
const uint32_t skipSingleBitsY = m_entropyCoder.getNumberOfWrittenBits();
- m_quant.invtransformNxN(cu.m_tqBypass[absPartIdx], tsResiY, trSize, tsCoeffY, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
+ m_quant.invtransformNxN(tsResiY, trSize, tsCoeffY, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
nonZeroDistY = primitives.cu[partSize].sse_ss(resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size, tsResiY, trSize);
@@ -3094,7 +3096,7 @@
m_entropyCoder.codeCoeffNxN(cu, tsCoeffC, absPartIdxC, log2TrSizeC, (TextType)chromaId);
singleBits[chromaId][tuIterator.section] = m_entropyCoder.getNumberOfWrittenBits();
- m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], tsResiC, trSizeC, tsCoeffC,
+ m_quant.invtransformNxN(tsResiC, trSizeC, tsCoeffC,
log2TrSizeC, (TextType)chromaId, false, true, numSigTSkipC);
uint32_t dist = primitives.cu[partSizeC].sse_ss(resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize, tsResiC, trSizeC);
nonZeroDistC = m_rdCost.scaleChromaDist(chromaId, dist);
More information about the x265-devel
mailing list