[x265] [PATCH] Quant: fix for decoding hash mismatch and non-deterministic output in multi-socket m/c
ashok at multicorewareinc.com
ashok at multicorewareinc.com
Wed Sep 2 14:24:44 CEST 2015
# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1441182836 -19800
# Wed Sep 02 14:03:56 2015 +0530
# Node ID a0b361e07eb32c0b676437283273142fb77bd5fe
# Parent 86e9bd7dd19278fceef65fc93a06dc8746ec9daf
Quant: fix for decoding hash mismatch and non-deterministic output in multi-socket m/c
diff -r 86e9bd7dd192 -r a0b361e07eb3 source/common/quant.cpp
--- a/source/common/quant.cpp Tue Sep 01 17:06:05 2015 +0530
+++ b/source/common/quant.cpp Wed Sep 02 14:03:56 2015 +0530
@@ -204,7 +204,6 @@
m_resiDctCoeff = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE * 2);
m_fencDctCoeff = m_resiDctCoeff + (MAX_TR_SIZE * MAX_TR_SIZE);
m_fencShortBuf = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE);
- m_tqBypass = false;
return m_resiDctCoeff && m_fencShortBuf;
}
@@ -228,8 +227,6 @@
void Quant::setQPforQuant(const CUData& ctu, int qp)
{
- m_tqBypass = !!ctu.m_tqBypass[0];
-
m_nr = m_frameNr ? &m_frameNr[ctu.m_encData->m_frameEncoderID] : NULL;
m_qpParam[TEXT_LUMA].setQpParam(qp + QP_BD_OFFSET);
setChromaQP(qp + ctu.m_slice->m_pps->chromaQpOffset[0], TEXT_CHROMA_U, ctu.m_chromaFormat);
@@ -404,7 +401,8 @@
coeff_t* coeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip)
{
const uint32_t sizeIdx = log2TrSize - 2;
- if (m_tqBypass)
+
+ if (cu.m_tqBypass[0])
{
X265_CHECK(log2TrSize >= 2 && log2TrSize <= 5, "Block size mistake!\n");
return primitives.cu[sizeIdx].copy_cnt(coeff, residual, resiStride);
@@ -484,11 +482,12 @@
}
}
-void Quant::invtransformNxN(int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
+void Quant::invtransformNxN(const CUData& cu, int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig)
{
const uint32_t sizeIdx = log2TrSize - 2;
- if (m_tqBypass)
+
+ if (cu.m_tqBypass[0])
{
primitives.cu[sizeIdx].cpy1Dto2D_shl(residual, coeff, resiStride, 0);
return;
diff -r 86e9bd7dd192 -r a0b361e07eb3 source/common/quant.h
--- a/source/common/quant.h Tue Sep 01 17:06:05 2015 +0530
+++ b/source/common/quant.h Wed Sep 02 14:03:56 2015 +0530
@@ -95,7 +95,6 @@
NoiseReduction* m_nr;
NoiseReduction* m_frameNr; // Array of NR structures, one for each frameEncoder
- bool m_tqBypass;
Quant();
~Quant();
@@ -110,7 +109,7 @@
uint32_t transformNxN(const CUData& cu, const pixel* fenc, uint32_t fencStride, const int16_t* residual, uint32_t resiStride, coeff_t* coeff,
uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip);
- void invtransformNxN(int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
+ void invtransformNxN(const CUData& cu, int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig);
/* Pattern decision for context derivation process of significant_coeff_flag */
diff -r 86e9bd7dd192 -r a0b361e07eb3 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Tue Sep 01 17:06:05 2015 +0530
+++ b/source/encoder/analysis.cpp Wed Sep 02 14:03:56 2015 +0530
@@ -209,24 +209,20 @@
return;
else if (md.bestMode->cu.isIntra(0))
{
- m_quant.m_tqBypass = true;
md.pred[PRED_LOSSLESS].initCosts();
md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu, cuGeom);
PartSize size = (PartSize)md.pred[PRED_LOSSLESS].cu.m_partSize[0];
uint8_t* modes = md.pred[PRED_LOSSLESS].cu.m_lumaIntraDir;
checkIntra(md.pred[PRED_LOSSLESS], cuGeom, size, modes, NULL);
checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth);
- m_quant.m_tqBypass = false;
}
else
{
- m_quant.m_tqBypass = true;
md.pred[PRED_LOSSLESS].initCosts();
md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu, cuGeom);
md.pred[PRED_LOSSLESS].predYuv.copyFromYuv(md.bestMode->predYuv);
encodeResAndCalcRdInterCU(md.pred[PRED_LOSSLESS], cuGeom);
checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth);
- m_quant.m_tqBypass = false;
}
}
diff -r 86e9bd7dd192 -r a0b361e07eb3 source/encoder/search.cpp
--- a/source/encoder/search.cpp Tue Sep 01 17:06:05 2015 +0530
+++ b/source/encoder/search.cpp Wed Sep 02 14:03:56 2015 +0530
@@ -319,7 +319,7 @@
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
if (numSig)
{
- m_quant.invtransformNxN(residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
+ m_quant.invtransformNxN(cu, residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
primitives.cu[sizeIdx].add_ps(reconQt, reconQtStride, pred, residual, stride, stride);
}
else
@@ -518,7 +518,7 @@
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSize, TEXT_LUMA, absPartIdx, useTSkip);
if (numSig)
{
- m_quant.invtransformNxN(residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTSkip, numSig);
+ m_quant.invtransformNxN(cu, residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTSkip, numSig);
primitives.cu[sizeIdx].add_ps(tmpRecon, tmpReconStride, pred, residual, stride, stride);
}
else if (useTSkip)
@@ -670,7 +670,7 @@
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
if (numSig)
{
- m_quant.invtransformNxN(residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
+ m_quant.invtransformNxN(cu, residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
primitives.cu[sizeIdx].add_ps(picReconY, picStride, pred, residual, stride, stride);
cu.setCbfSubParts(1 << tuDepth, TEXT_LUMA, absPartIdx, fullDepth);
}
@@ -845,7 +845,7 @@
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
if (numSig)
{
- m_quant.invtransformNxN(residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
+ m_quant.invtransformNxN(cu, residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
primitives.cu[sizeIdxC].add_ps(reconQt, reconQtStride, pred, residual, stride, stride);
cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
}
@@ -946,7 +946,7 @@
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSizeC, ttype, absPartIdxC, useTSkip);
if (numSig)
{
- m_quant.invtransformNxN(residual, stride, coeff, log2TrSizeC, ttype, true, useTSkip, numSig);
+ m_quant.invtransformNxN(cu, residual, stride, coeff, log2TrSizeC, ttype, true, useTSkip, numSig);
primitives.cu[sizeIdxC].add_ps(recon, reconStride, pred, residual, stride, stride);
cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
}
@@ -1135,7 +1135,7 @@
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
if (numSig)
{
- m_quant.invtransformNxN(residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
+ m_quant.invtransformNxN(cu, residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
primitives.cu[sizeIdxC].add_ps(picReconC, picStride, pred, residual, stride, stride);
cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
}
@@ -1162,7 +1162,6 @@
cu.setPartSizeSubParts(partSize);
cu.setPredModeSubParts(MODE_INTRA);
- m_quant.m_tqBypass = !!cu.m_tqBypass[0];
uint32_t tuDepthRange[2];
cu.getIntraTUQtDepthRange(tuDepthRange, 0);
@@ -2683,7 +2682,7 @@
if (numSigY)
{
- m_quant.invtransformNxN(curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
+ m_quant.invtransformNxN(cu, curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
cu.setCbfSubParts(setCbf, TEXT_LUMA, absPartIdx, depth);
}
else
@@ -2716,7 +2715,7 @@
uint32_t numSigU = m_quant.transformNxN(cu, fencCb, fencYuv->m_csize, curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, absPartIdxC, false);
if (numSigU)
{
- m_quant.invtransformNxN(curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU);
+ m_quant.invtransformNxN(cu, curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU);
cu.setCbfPartRange(setCbf, TEXT_CHROMA_U, absPartIdxC, tuIterator.absPartIdxStep);
}
else
@@ -2730,7 +2729,7 @@
uint32_t numSigV = m_quant.transformNxN(cu, fencCr, fencYuv->m_csize, curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, absPartIdxC, false);
if (numSigV)
{
- m_quant.invtransformNxN(curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV);
+ m_quant.invtransformNxN(cu, curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV);
cu.setCbfPartRange(setCbf, TEXT_CHROMA_V, absPartIdxC, tuIterator.absPartIdxStep);
}
else
@@ -2876,7 +2875,7 @@
if (cbfFlag[TEXT_LUMA][0])
{
- m_quant.invtransformNxN(curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSig[TEXT_LUMA][0]); //this is for inter mode only
+ m_quant.invtransformNxN(cu, curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSig[TEXT_LUMA][0]); //this is for inter mode only
// non-zero cost calculation for luma - This is an approximation
// finally we have to encode correct cbf after comparing with null cost
@@ -2973,7 +2972,7 @@
if (cbfFlag[chromaId][tuIterator.section])
{
- m_quant.invtransformNxN(curResiC, strideResiC, coeffCurC + subTUOffset,
+ m_quant.invtransformNxN(cu, curResiC, strideResiC, coeffCurC + subTUOffset,
log2TrSizeC, (TextType)chromaId, false, false, numSig[chromaId][tuIterator.section]);
// non-zero cost calculation for luma, same as luma - This is an approximation
@@ -3062,7 +3061,7 @@
m_entropyCoder.codeCoeffNxN(cu, m_tsCoeff, absPartIdx, log2TrSize, TEXT_LUMA);
const uint32_t skipSingleBitsY = m_entropyCoder.getNumberOfWrittenBits();
- m_quant.invtransformNxN(m_tsResidual, trSize, m_tsCoeff, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
+ m_quant.invtransformNxN(cu, m_tsResidual, trSize, m_tsCoeff, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
nonZeroDistY = primitives.cu[partSize].sse_ss(resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size, m_tsResidual, trSize);
@@ -3130,7 +3129,7 @@
m_entropyCoder.codeCoeffNxN(cu, m_tsCoeff, absPartIdxC, log2TrSizeC, (TextType)chromaId);
singleBits[chromaId][tuIterator.section] = m_entropyCoder.getNumberOfWrittenBits();
- m_quant.invtransformNxN(m_tsResidual, trSizeC, m_tsCoeff,
+ m_quant.invtransformNxN(cu, m_tsResidual, trSizeC, m_tsCoeff,
log2TrSizeC, (TextType)chromaId, false, true, numSigTSkipC);
uint32_t dist = primitives.cu[partSizeC].sse_ss(resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize, m_tsResidual, trSizeC);
nonZeroDistC = m_rdCost.scaleChromaDist(chromaId, dist);
More information about the x265-devel
mailing list