[x265] [PATCH 4 of 8] search & analysis: modify to support 400 color space
mahesh at multicorewareinc.com
mahesh at multicorewareinc.com
Mon Dec 14 20:30:10 CET 2015
# HG changeset patch
# User Mahesh Pittala <mahesh at multicorewareinc.com>
# Date 1450017438 -19800
# Sun Dec 13 20:07:18 2015 +0530
# Node ID d7200ee7910d88b970a80c41da1f63ab4ce4166d
# Parent d01cd1fee4e30e2dd4ea90490e471417e5bf47d5
search & analysis: modify to support 400 color space
diff -r d01cd1fee4e3 -r d7200ee7910d source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Sun Dec 13 19:57:55 2015 +0530
+++ b/source/encoder/analysis.cpp Sun Dec 13 20:07:18 2015 +0530
@@ -689,7 +689,7 @@
if (m_param->rdLevel > 2)
{
/* RD selection between merge, inter, bidir and intra */
- if (!m_bChromaSa8d) /* When m_bChromaSa8d is enabled, chroma MC has already been done */
+ if (!m_bChromaSa8d && (m_csp != X265_CSP_I400)) /* When m_bChromaSa8d is enabled, chroma MC has already been done */
{
uint32_t numPU = bestInter->cu.getNumPartInter(0);
for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
@@ -1098,7 +1098,7 @@
if (m_param->rdLevel >= 3)
{
/* Calculate RD cost of best inter option */
- if (!m_bChromaSa8d) /* When m_bChromaSa8d is enabled, chroma MC has already been done */
+ if (!m_bChromaSa8d && (m_csp != X265_CSP_I400)) /* When m_bChromaSa8d is enabled, chroma MC has already been done */
{
uint32_t numPU = bestInter->cu.getNumPartInter(0);
for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
@@ -1173,10 +1173,13 @@
else if (md.bestMode->cu.isInter(0))
{
uint32_t numPU = md.bestMode->cu.getNumPartInter(0);
- for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
+ if (m_csp != X265_CSP_I400)
{
- PredictionUnit pu(md.bestMode->cu, cuGeom, puIdx);
- motionCompensation(md.bestMode->cu, pu, md.bestMode->predYuv, false, true);
+ for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
+ {
+ PredictionUnit pu(md.bestMode->cu, cuGeom, puIdx);
+ motionCompensation(md.bestMode->cu, pu, md.bestMode->predYuv, false, true);
+ }
}
if (m_param->rdLevel == 2)
encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
@@ -1187,7 +1190,6 @@
uint32_t tuDepthRange[2];
cu.getInterTUQtDepthRange(tuDepthRange, 0);
-
m_rqt[cuGeom.depth].tmpResiYuv.subtract(*md.bestMode->fencYuv, md.bestMode->predYuv, cuGeom.log2CUSize);
residualTransformQuantInter(*md.bestMode, cuGeom, 0, 0, tuDepthRange);
if (cu.getQtRootCbf(0))
@@ -1213,8 +1215,11 @@
cu.getIntraTUQtDepthRange(tuDepthRange, 0);
residualTransformQuantIntra(*md.bestMode, cuGeom, 0, 0, tuDepthRange);
- getBestIntraModeChroma(*md.bestMode, cuGeom);
- residualQTIntraChroma(*md.bestMode, cuGeom, 0, 0);
+ if (m_csp != X265_CSP_I400)
+ {
+ getBestIntraModeChroma(*md.bestMode, cuGeom);
+ residualQTIntraChroma(*md.bestMode, cuGeom, 0, 0);
+ }
md.bestMode->reconYuv.copyFromPicYuv(reconPic, cu.m_cuAddr, cuGeom.absPartIdx); // TODO:
}
}
@@ -1701,12 +1706,11 @@
tempPred->cu.m_mv[1][0] = candMvField[i][1].mv;
tempPred->cu.m_refIdx[0][0] = (int8_t)candMvField[i][0].refIdx;
tempPred->cu.m_refIdx[1][0] = (int8_t)candMvField[i][1].refIdx;
-
- motionCompensation(tempPred->cu, pu, tempPred->predYuv, true, m_bChromaSa8d);
+ motionCompensation(tempPred->cu, pu, tempPred->predYuv, true, m_bChromaSa8d && (m_csp != X265_CSP_I400));
tempPred->sa8dBits = getTUBits(i, numMergeCand);
tempPred->distortion = primitives.cu[sizeIdx].sa8d(fencYuv->m_buf[0], fencYuv->m_size, tempPred->predYuv.m_buf[0], tempPred->predYuv.m_size);
- if (m_bChromaSa8d)
+ if (m_bChromaSa8d && (m_csp != X265_CSP_I400))
{
tempPred->distortion += primitives.chroma[m_csp].cu[sizeIdx].sa8d(fencYuv->m_buf[1], fencYuv->m_csize, tempPred->predYuv.m_buf[1], tempPred->predYuv.m_csize);
tempPred->distortion += primitives.chroma[m_csp].cu[sizeIdx].sa8d(fencYuv->m_buf[2], fencYuv->m_csize, tempPred->predYuv.m_buf[2], tempPred->predYuv.m_csize);
@@ -1725,7 +1729,7 @@
return;
/* calculate the motion compensation for chroma for the best mode selected */
- if (!m_bChromaSa8d) /* Chroma MC was done above */
+ if (!m_bChromaSa8d && (m_csp != X265_CSP_I400)) /* Chroma MC was done above */
motionCompensation(bestPred->cu, pu, bestPred->predYuv, false, true);
if (m_param->rdLevel)
@@ -1848,7 +1852,7 @@
tempPred->cu.m_refIdx[1][0] = (int8_t)candMvField[i][1].refIdx;
tempPred->cu.setPredModeSubParts(MODE_INTER); /* must be cleared between encode iterations */
- motionCompensation(tempPred->cu, pu, tempPred->predYuv, true, true);
+ motionCompensation(tempPred->cu, pu, tempPred->predYuv, true, m_csp != X265_CSP_I400);
uint8_t hasCbf = true;
bool swapped = false;
@@ -1935,15 +1939,14 @@
}
}
}
-
- predInterSearch(interMode, cuGeom, m_bChromaSa8d, refMask);
+ predInterSearch(interMode, cuGeom, m_bChromaSa8d && (m_csp != X265_CSP_I400), refMask);
/* predInterSearch sets interMode.sa8dBits */
const Yuv& fencYuv = *interMode.fencYuv;
Yuv& predYuv = interMode.predYuv;
int part = partitionFromLog2Size(cuGeom.log2CUSize);
interMode.distortion = primitives.cu[part].sa8d(fencYuv.m_buf[0], fencYuv.m_size, predYuv.m_buf[0], predYuv.m_size);
- if (m_bChromaSa8d)
+ if (m_bChromaSa8d && (m_csp != X265_CSP_I400))
{
interMode.distortion += primitives.chroma[m_csp].cu[part].sa8d(fencYuv.m_buf[1], fencYuv.m_csize, predYuv.m_buf[1], predYuv.m_csize);
interMode.distortion += primitives.chroma[m_csp].cu[part].sa8d(fencYuv.m_buf[2], fencYuv.m_csize, predYuv.m_buf[2], predYuv.m_csize);
@@ -1992,8 +1995,7 @@
}
}
}
-
- predInterSearch(interMode, cuGeom, true, refMask);
+ predInterSearch(interMode, cuGeom, m_csp != X265_CSP_I400, refMask);
/* predInterSearch sets interMode.sa8dBits, but this is ignored */
encodeResAndCalcRdInterCU(interMode, cuGeom);
@@ -2060,10 +2062,10 @@
cu.m_mvd[1][0] = bestME[1].mv - mvp1;
PredictionUnit pu(cu, cuGeom, 0);
- motionCompensation(cu, pu, bidir2Nx2N.predYuv, true, m_bChromaSa8d);
+ motionCompensation(cu, pu, bidir2Nx2N.predYuv, true, m_bChromaSa8d && (m_csp != X265_CSP_I400));
int sa8d = primitives.cu[partEnum].sa8d(fencYuv.m_buf[0], fencYuv.m_size, bidir2Nx2N.predYuv.m_buf[0], bidir2Nx2N.predYuv.m_size);
- if (m_bChromaSa8d)
+ if (m_bChromaSa8d && (m_csp != X265_CSP_I400))
{
/* Add in chroma distortion */
sa8d += primitives.chroma[m_csp].cu[partEnum].sa8d(fencYuv.m_buf[1], fencYuv.m_csize, bidir2Nx2N.predYuv.m_buf[1], bidir2Nx2N.predYuv.m_csize);
@@ -2094,16 +2096,16 @@
int zsa8d;
- if (m_bChromaSa8d)
+ if (m_bChromaSa8d && (m_csp != X265_CSP_I400))
{
cu.m_mv[0][0] = mvzero;
cu.m_mv[1][0] = mvzero;
motionCompensation(cu, pu, tmpPredYuv, true, true);
-
zsa8d = primitives.cu[partEnum].sa8d(fencYuv.m_buf[0], fencYuv.m_size, tmpPredYuv.m_buf[0], tmpPredYuv.m_size);
zsa8d += primitives.chroma[m_csp].cu[partEnum].sa8d(fencYuv.m_buf[1], fencYuv.m_csize, tmpPredYuv.m_buf[1], tmpPredYuv.m_csize);
zsa8d += primitives.chroma[m_csp].cu[partEnum].sa8d(fencYuv.m_buf[2], fencYuv.m_csize, tmpPredYuv.m_buf[2], tmpPredYuv.m_csize);
+
}
else
{
@@ -2139,13 +2141,12 @@
cu.m_mvd[1][0] = mvzero - mvp1;
cu.m_mvpIdx[1][0] = (uint8_t)mvpIdx1;
- if (m_bChromaSa8d)
- /* real MC was already performed */
+ if (m_bChromaSa8d) /* real MC was already performed */
bidir2Nx2N.predYuv.copyFromYuv(tmpPredYuv);
else
- motionCompensation(cu, pu, bidir2Nx2N.predYuv, true, true);
+ motionCompensation(cu, pu, bidir2Nx2N.predYuv, true, m_csp != X265_CSP_I400);
}
- else if (m_bChromaSa8d)
+ else if (m_bChromaSa8d && (m_csp != X265_CSP_I400))
{
/* recover overwritten motion vectors */
cu.m_mv[0][0] = bestME[0].mv;
@@ -2174,7 +2175,7 @@
Mode *bestMode = m_modeDepth[cuGeom.depth].bestMode;
CUData& cu = bestMode->cu;
- cu.copyFromPic(ctu, cuGeom);
+ cu.copyFromPic(ctu, cuGeom, m_csp);
PicYuv& reconPic = *m_frame->m_reconPic;
@@ -2191,8 +2192,11 @@
cu.getIntraTUQtDepthRange(tuDepthRange, 0);
residualTransformQuantIntra(*bestMode, cuGeom, 0, 0, tuDepthRange);
- getBestIntraModeChroma(*bestMode, cuGeom);
- residualQTIntraChroma(*bestMode, cuGeom, 0, 0);
+ if (m_csp != X265_CSP_I400)
+ {
+ getBestIntraModeChroma(*bestMode, cuGeom);
+ residualQTIntraChroma(*bestMode, cuGeom, 0, 0);
+ }
}
else // if (cu.isInter(0))
{
@@ -2207,20 +2211,23 @@
/* at RD 0, the prediction pixels are accumulated into the top depth predYuv */
Yuv& predYuv = m_modeDepth[0].bestMode->predYuv;
pixel* predY = predYuv.getLumaAddr(absPartIdx);
- pixel* predU = predYuv.getCbAddr(absPartIdx);
- pixel* predV = predYuv.getCrAddr(absPartIdx);
primitives.cu[sizeIdx].sub_ps(resiYuv.m_buf[0], resiYuv.m_size,
fencYuv.m_buf[0], predY,
fencYuv.m_size, predYuv.m_size);
- primitives.chroma[m_csp].cu[sizeIdx].sub_ps(resiYuv.m_buf[1], resiYuv.m_csize,
+ if (m_csp != X265_CSP_I400)
+ {
+ pixel* predU = predYuv.getCbAddr(absPartIdx);
+ pixel* predV = predYuv.getCrAddr(absPartIdx);
+ primitives.chroma[m_csp].cu[sizeIdx].sub_ps(resiYuv.m_buf[1], resiYuv.m_csize,
fencYuv.m_buf[1], predU,
fencYuv.m_csize, predYuv.m_csize);
- primitives.chroma[m_csp].cu[sizeIdx].sub_ps(resiYuv.m_buf[2], resiYuv.m_csize,
+ primitives.chroma[m_csp].cu[sizeIdx].sub_ps(resiYuv.m_buf[2], resiYuv.m_csize,
fencYuv.m_buf[2], predV,
fencYuv.m_csize, predYuv.m_csize);
+ }
uint32_t tuDepthRange[2];
cu.getInterTUQtDepthRange(tuDepthRange, 0);
@@ -2239,20 +2246,24 @@
else
primitives.cu[sizeIdx].copy_pp(reconPic.getLumaAddr(cu.m_cuAddr, absPartIdx), reconPic.m_stride,
predY, predYuv.m_size);
-
- if (cu.m_cbf[1][0])
- primitives.chroma[m_csp].cu[sizeIdx].add_ps(reconPic.getCbAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
+ if (m_csp != X265_CSP_I400)
+ {
+ pixel* predU = predYuv.getCbAddr(absPartIdx);
+ pixel* predV = predYuv.getCrAddr(absPartIdx);
+ if (cu.m_cbf[1][0])
+ primitives.chroma[m_csp].cu[sizeIdx].add_ps(reconPic.getCbAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
predU, resiYuv.m_buf[1], predYuv.m_csize, resiYuv.m_csize);
- else
- primitives.chroma[m_csp].cu[sizeIdx].copy_pp(reconPic.getCbAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
+ else
+ primitives.chroma[m_csp].cu[sizeIdx].copy_pp(reconPic.getCbAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
predU, predYuv.m_csize);
- if (cu.m_cbf[2][0])
- primitives.chroma[m_csp].cu[sizeIdx].add_ps(reconPic.getCrAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
+ if (cu.m_cbf[2][0])
+ primitives.chroma[m_csp].cu[sizeIdx].add_ps(reconPic.getCrAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
predV, resiYuv.m_buf[2], predYuv.m_csize, resiYuv.m_csize);
- else
- primitives.chroma[m_csp].cu[sizeIdx].copy_pp(reconPic.getCrAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
+ else
+ primitives.chroma[m_csp].cu[sizeIdx].copy_pp(reconPic.getCrAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
predV, predYuv.m_csize);
+ }
}
cu.updatePic(cuGeom.depth);
diff -r d01cd1fee4e3 -r d7200ee7910d source/encoder/search.cpp
--- a/source/encoder/search.cpp Sun Dec 13 19:57:55 2015 +0530
+++ b/source/encoder/search.cpp Sun Dec 13 20:07:18 2015 +0530
@@ -98,13 +98,27 @@
* the coeffRQT and reconQtYuv are allocated to the max CU size at every depth. The parts
* which are reconstructed at each depth are valid. At the end, the transform depth table
* is walked and the coeff and recon at the correct depths are collected */
- for (uint32_t i = 0; i <= m_numLayers; i++)
+
+ if (param.internalCsp != X265_CSP_I400)
{
- CHECKED_MALLOC(m_rqt[i].coeffRQT[0], coeff_t, sizeL + sizeC * 2);
- m_rqt[i].coeffRQT[1] = m_rqt[i].coeffRQT[0] + sizeL;
- m_rqt[i].coeffRQT[2] = m_rqt[i].coeffRQT[0] + sizeL + sizeC;
- ok &= m_rqt[i].reconQtYuv.create(g_maxCUSize, param.internalCsp);
- ok &= m_rqt[i].resiQtYuv.create(g_maxCUSize, param.internalCsp);
+ for (uint32_t i = 0; i <= m_numLayers; i++)
+ {
+ CHECKED_MALLOC(m_rqt[i].coeffRQT[0], coeff_t, sizeL + sizeC * 2);
+ m_rqt[i].coeffRQT[1] = m_rqt[i].coeffRQT[0] + sizeL;
+ m_rqt[i].coeffRQT[2] = m_rqt[i].coeffRQT[0] + sizeL + sizeC;
+ ok &= m_rqt[i].reconQtYuv.create(g_maxCUSize, param.internalCsp);
+ ok &= m_rqt[i].resiQtYuv.create(g_maxCUSize, param.internalCsp);
+ }
+ }
+ else
+ {
+ for (uint32_t i = 0; i <= m_numLayers; i++)
+ {
+ CHECKED_MALLOC(m_rqt[i].coeffRQT[0], coeff_t, sizeL);
+ m_rqt[i].coeffRQT[1] = m_rqt[i].coeffRQT[2] = NULL;
+ ok &= m_rqt[i].reconQtYuv.create(g_maxCUSize, param.internalCsp);
+ ok &= m_rqt[i].resiQtYuv.create(g_maxCUSize, param.internalCsp);
+ }
}
/* the rest of these buffers are indexed per-depth */
@@ -117,12 +131,22 @@
ok &= m_rqt[i].bidirPredYuv[1].create(cuSize, param.internalCsp);
}
- CHECKED_MALLOC(m_qtTempCbf[0], uint8_t, numPartitions * 3);
- m_qtTempCbf[1] = m_qtTempCbf[0] + numPartitions;
- m_qtTempCbf[2] = m_qtTempCbf[0] + numPartitions * 2;
- CHECKED_MALLOC(m_qtTempTransformSkipFlag[0], uint8_t, numPartitions * 3);
- m_qtTempTransformSkipFlag[1] = m_qtTempTransformSkipFlag[0] + numPartitions;
- m_qtTempTransformSkipFlag[2] = m_qtTempTransformSkipFlag[0] + numPartitions * 2;
+ if (param.internalCsp != X265_CSP_I400)
+ {
+ CHECKED_MALLOC(m_qtTempCbf[0], uint8_t, numPartitions * 3);
+ m_qtTempCbf[1] = m_qtTempCbf[0] + numPartitions;
+ m_qtTempCbf[2] = m_qtTempCbf[0] + numPartitions * 2;
+ CHECKED_MALLOC(m_qtTempTransformSkipFlag[0], uint8_t, numPartitions * 3);
+ m_qtTempTransformSkipFlag[1] = m_qtTempTransformSkipFlag[0] + numPartitions;
+ m_qtTempTransformSkipFlag[2] = m_qtTempTransformSkipFlag[0] + numPartitions * 2;
+ }
+ else
+ {
+ CHECKED_MALLOC(m_qtTempCbf[0], uint8_t, numPartitions);
+ m_qtTempCbf[1] = m_qtTempCbf[2] = NULL;
+ CHECKED_MALLOC(m_qtTempTransformSkipFlag[0], uint8_t, numPartitions);
+ m_qtTempTransformSkipFlag[1] = m_qtTempTransformSkipFlag[2] = NULL;
+ }
CHECKED_MALLOC(m_intraPred, pixel, (32 * 32) * (33 + 3));
m_fencScaled = m_intraPred + 32 * 32;
@@ -1166,8 +1190,13 @@
intraMode.initCosts();
intraMode.lumaDistortion += estIntraPredQT(intraMode, cuGeom, tuDepthRange);
- intraMode.chromaDistortion += estIntraPredChromaQT(intraMode, cuGeom);
- intraMode.distortion += intraMode.lumaDistortion + intraMode.chromaDistortion;
+ if (m_csp != X265_CSP_I400)
+ {
+ intraMode.chromaDistortion += estIntraPredChromaQT(intraMode, cuGeom);
+ intraMode.distortion += intraMode.lumaDistortion + intraMode.chromaDistortion;
+ }
+ else
+ intraMode.distortion += intraMode.lumaDistortion;
m_entropyCoder.resetBits();
if (m_slice->m_pps->bTransquantBypassEnabled)
@@ -1386,8 +1415,13 @@
extractIntraResultQT(cu, *reconYuv, 0, 0);
intraMode.lumaDistortion = icosts.distortion;
- intraMode.chromaDistortion = estIntraPredChromaQT(intraMode, cuGeom);
- intraMode.distortion = intraMode.lumaDistortion + intraMode.chromaDistortion;
+ if (m_csp != X265_CSP_I400)
+ {
+ intraMode.chromaDistortion = estIntraPredChromaQT(intraMode, cuGeom);
+ intraMode.distortion = intraMode.lumaDistortion + intraMode.chromaDistortion;
+ }
+ else
+ intraMode.distortion = intraMode.lumaDistortion;
m_entropyCoder.resetBits();
if (m_slice->m_pps->bTransquantBypassEnabled)
@@ -2492,11 +2526,14 @@
// Luma
int part = partitionFromLog2Size(cu.m_log2CUSize[0]);
interMode.lumaDistortion = primitives.cu[part].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
+ interMode.distortion = interMode.lumaDistortion;
// Chroma
- interMode.chromaDistortion = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
- interMode.chromaDistortion += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
- interMode.distortion = interMode.lumaDistortion + interMode.chromaDistortion;
-
+ if (m_csp != X265_CSP_I400)
+ {
+ interMode.chromaDistortion = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
+ interMode.chromaDistortion += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
+ interMode.distortion += interMode.chromaDistortion;
+ }
m_entropyCoder.load(m_rqt[depth].cur);
m_entropyCoder.resetBits();
if (m_slice->m_pps->bTransquantBypassEnabled)
@@ -2546,8 +2583,11 @@
if (!tqBypass)
{
sse_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
- cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
- cbf0Dist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
+ if (m_csp != X265_CSP_I400)
+ {
+ cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
+ cbf0Dist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
+ }
/* Consider the RD cost of not signaling any residual */
m_entropyCoder.load(m_rqt[depth].cur);
@@ -2620,15 +2660,19 @@
// update with clipped distortion and cost (qp estimation loop uses unclipped values)
sse_t bestLumaDist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
- sse_t bestChromaDist = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
- bestChromaDist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
+ interMode.distortion = bestLumaDist;
+ if (m_csp != X265_CSP_I400)
+ {
+ sse_t bestChromaDist = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
+ bestChromaDist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
+ interMode.chromaDistortion = bestChromaDist;
+ interMode.distortion += bestChromaDist;
+ }
if (m_rdCost.m_psyRd)
interMode.psyEnergy = m_rdCost.psyCost(sizeIdx, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
interMode.resEnergy = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
interMode.totalBits = bits;
interMode.lumaDistortion = bestLumaDist;
- interMode.chromaDistortion = bestChromaDist;
- interMode.distortion = bestLumaDist + bestChromaDist;
interMode.coeffBits = coeffBits;
interMode.mvBits = mvBits;
updateModeCost(interMode);
@@ -2649,14 +2693,15 @@
{
// code full block
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
- bool bCodeChroma = true;
+ uint32_t codeChroma = (m_csp != X265_CSP_I400) ? 1 : 0;
+
uint32_t tuDepthC = tuDepth;
if (log2TrSizeC < 2)
{
X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
log2TrSizeC = 2;
tuDepthC--;
- bCodeChroma = !(absPartIdx & 3);
+ codeChroma &= !(absPartIdx & 3);
}
uint32_t absPartIdxStep = cuGeom.numPartitions >> tuDepthC * 2;
@@ -2690,7 +2735,7 @@
cu.setCbfSubParts(0, TEXT_LUMA, absPartIdx, depth);
}
- if (bCodeChroma)
+ if (codeChroma)
{
uint32_t sizeIdxC = log2TrSizeC - 2;
uint32_t strideResiC = resiYuv.m_csize;
@@ -2756,14 +2801,20 @@
{
residualTransformQuantInter(mode, cuGeom, qPartIdx, tuDepth + 1, depthRange);
ycbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1);
- ucbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1);
- vcbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1);
+ if (m_csp != X265_CSP_I400)
+ {
+ ucbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1);
+ vcbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1);
+ }
}
for (uint32_t i = 0; i < 4 * qNumParts; ++i)
{
cu.m_cbf[0][absPartIdx + i] |= ycbf << tuDepth;
- cu.m_cbf[1][absPartIdx + i] |= ucbf << tuDepth;
- cu.m_cbf[2][absPartIdx + i] |= vcbf << tuDepth;
+ if (m_csp != X265_CSP_I400)
+ {
+ cu.m_cbf[1][absPartIdx + i] |= ucbf << tuDepth;
+ cu.m_cbf[2][absPartIdx + i] |= vcbf << tuDepth;
+ }
}
}
}
@@ -2794,14 +2845,14 @@
X265_CHECK(bCheckFull || bCheckSplit, "check-full or check-split must be set\n");
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
- bool bCodeChroma = true;
+ uint32_t codeChroma = (m_csp != X265_CSP_I400) ? 1 : 0;
uint32_t tuDepthC = tuDepth;
if (log2TrSizeC < 2)
{
X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
log2TrSizeC = 2;
tuDepthC--;
- bCodeChroma = !(absPartIdx & 3);
+ codeChroma &= !(absPartIdx & 3);
}
// code full block
@@ -2827,14 +2878,14 @@
if (bCheckFull)
{
uint32_t trSizeC = 1 << log2TrSizeC;
- int partSize = partitionFromLog2Size(log2TrSize);
+ int partSize = partitionFromLog2Size(log2TrSize);
int partSizeC = partitionFromLog2Size(log2TrSizeC);
const uint32_t qtLayer = log2TrSize - 2;
uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2);
coeff_t* coeffCurY = m_rqt[qtLayer].coeffRQT[0] + coeffOffsetY;
- bool checkTransformSkip = m_slice->m_pps->bTransformSkipEnabled && !cu.m_tqBypass[0];
- bool checkTransformSkipY = checkTransformSkip && log2TrSize <= MAX_LOG2_TS_SIZE;
+ bool checkTransformSkip = m_slice->m_pps->bTransformSkipEnabled && !cu.m_tqBypass[0];
+ bool checkTransformSkipY = checkTransformSkip && log2TrSize <= MAX_LOG2_TS_SIZE;
bool checkTransformSkipC = checkTransformSkip && log2TrSizeC <= MAX_LOG2_TS_SIZE;
cu.setTUDepthSubParts(tuDepth, absPartIdx, depth);
@@ -2844,7 +2895,7 @@
m_entropyCoder.estBit(m_entropyCoder.m_estBitsSbac, log2TrSize, true);
const pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
- int16_t* resi = resiYuv.getLumaAddr(absPartIdx);
+ int16_t* resi = resiYuv.getLumaAddr(absPartIdx);
numSig[TEXT_LUMA][0] = m_quant.transformNxN(cu, fenc, fencYuv->m_size, resi, resiYuv.m_size, coeffCurY, log2TrSize, TEXT_LUMA, absPartIdx, false);
cbfFlag[TEXT_LUMA][0] = !!numSig[TEXT_LUMA][0];
@@ -2865,7 +2916,7 @@
if (m_rdCost.m_psyRd)
zeroPsyEnergyY = m_rdCost.psyCost(partSize, fenc, fencYuv->m_size, mode.predYuv.getLumaAddr(absPartIdx), mode.predYuv.m_size);
- int16_t* curResiY = m_rqt[qtLayer].resiQtYuv.getLumaAddr(absPartIdx);
+ int16_t* curResiY = m_rqt[qtLayer].resiQtYuv.getLumaAddr(absPartIdx);
uint32_t strideResiY = m_rqt[qtLayer].resiQtYuv.m_size;
if (cbfFlag[TEXT_LUMA][0])
@@ -2874,7 +2925,7 @@
// non-zero cost calculation for luma - This is an approximation
// finally we have to encode correct cbf after comparing with null cost
- pixel* curReconY = m_rqt[qtLayer].reconQtYuv.getLumaAddr(absPartIdx);
+ pixel* curReconY = m_rqt[qtLayer].reconQtYuv.getLumaAddr(absPartIdx);
uint32_t strideReconY = m_rqt[qtLayer].reconQtYuv.m_size;
primitives.cu[partSize].add_ps(curReconY, strideReconY, mode.predYuv.getLumaAddr(absPartIdx), curResiY, mode.predYuv.m_size, strideResiY);
@@ -2883,7 +2934,7 @@
uint32_t nonZeroPsyEnergyY = 0; uint64_t singleCostY = 0;
if (m_rdCost.m_psyRd)
{
- nonZeroPsyEnergyY = m_rdCost.psyCost(partSize, fenc, fencYuv->m_size, curReconY, strideReconY);
+ nonZeroPsyEnergyY = m_rdCost.psyCost(partSize, fenc, fencYuv->m_size, curReconY, strideReconY);
singleCostY = m_rdCost.calcPsyRdCost(nonZeroDistY, nzCbfBitsY + singleBits[TEXT_LUMA][0], nonZeroPsyEnergyY);
}
else
@@ -2908,7 +2959,7 @@
primitives.cu[partSize].blockfill_s(curResiY, strideResiY, 0);
#if CHECKED_BUILD || _DEBUG
uint32_t numCoeffY = 1 << (log2TrSize << 1);
- memset(coeffCurY, 0, sizeof(coeff_t) * numCoeffY);
+ memset(coeffCurY, 0, sizeof(coeff_t)* numCoeffY);
#endif
if (checkTransformSkipY)
minCost[TEXT_LUMA][0] = nullCostY;
@@ -2936,7 +2987,7 @@
cu.setCbfSubParts(cbfFlag[TEXT_LUMA][0] << tuDepth, TEXT_LUMA, absPartIdx, depth);
- if (bCodeChroma)
+ if (codeChroma)
{
uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
uint32_t strideResiC = m_rqt[qtLayer].resiQtYuv.m_csize;
@@ -3099,7 +3150,7 @@
cu.setCbfSubParts(cbfFlag[TEXT_LUMA][0] << tuDepth, TEXT_LUMA, absPartIdx, depth);
}
- if (bCodeChroma && checkTransformSkipC)
+ if (codeChroma && checkTransformSkipC)
{
sse_t nonZeroDistC = 0;
uint32_t nonZeroPsyEnergyC = 0;
@@ -3180,7 +3231,7 @@
m_entropyCoder.resetBits();
//Encode cbf flags
- if (bCodeChroma)
+ if (codeChroma)
{
if (!splitIntoSubTUs)
{
@@ -3254,14 +3305,20 @@
{
estimateResidualQT(mode, cuGeom, qPartIdx, tuDepth + 1, resiYuv, splitCost, depthRange);
ycbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1);
- ucbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1);
- vcbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1);
+ if (m_csp != X265_CSP_I400)
+ {
+ ucbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1);
+ vcbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1);
+ }
}
for (uint32_t i = 0; i < 4 * qNumParts; ++i)
{
cu.m_cbf[0][absPartIdx + i] |= ycbf << tuDepth;
- cu.m_cbf[1][absPartIdx + i] |= ucbf << tuDepth;
- cu.m_cbf[2][absPartIdx + i] |= vcbf << tuDepth;
+ if (m_csp != X265_CSP_I400)
+ {
+ cu.m_cbf[1][absPartIdx + i] |= ucbf << tuDepth;
+ cu.m_cbf[2][absPartIdx + i] |= vcbf << tuDepth;
+ }
}
// Here we were encoding cbfs and coefficients for splitted blocks. Since I have collected coefficient bits
@@ -3295,7 +3352,7 @@
}
cu.setTransformSkipSubParts(bestTransformMode[TEXT_LUMA][0], TEXT_LUMA, absPartIdx, depth);
- if (bCodeChroma)
+ if (codeChroma)
{
if (!splitIntoSubTUs)
{
@@ -3318,7 +3375,7 @@
cu.setTUDepthSubParts(tuDepth, absPartIdx, depth);
cu.setCbfSubParts(cbfFlag[TEXT_LUMA][0] << tuDepth, TEXT_LUMA, absPartIdx, depth);
- if (bCodeChroma)
+ if (codeChroma)
{
if (!splitIntoSubTUs)
{
@@ -3350,18 +3407,20 @@
const bool bSubdiv = tuDepth < cu.m_tuDepth[absPartIdx];
uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;
-
- if (!(log2TrSize - m_hChromaShift < 2))
+ if (m_csp != X265_CSP_I400)
{
- if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1))
- m_entropyCoder.codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, tuDepth, !bSubdiv);
- if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1))
- m_entropyCoder.codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, tuDepth, !bSubdiv);
- }
- else
- {
- X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1), "chroma CBF not matching\n");
- X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1), "chroma CBF not matching\n");
+ if (!(log2TrSize - m_hChromaShift < 2))
+ {
+ if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1))
+ m_entropyCoder.codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, tuDepth, !bSubdiv);
+ if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1))
+ m_entropyCoder.codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, tuDepth, !bSubdiv);
+ }
+ else
+ {
+ X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1), "chroma CBF not matching\n");
+ X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1), "chroma CBF not matching\n");
+ }
}
if (!bSubdiv)
@@ -3391,14 +3450,14 @@
const uint32_t qtLayer = log2TrSize - 2;
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
- bool bCodeChroma = true;
+ uint32_t codeChroma = (m_csp != X265_CSP_I400) ? 1 : 0;
uint32_t tuDepthC = tuDepth;
if (log2TrSizeC < 2)
{
X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, "invalid tuDepth\n");
log2TrSizeC = 2;
tuDepthC--;
- bCodeChroma = !(absPartIdx & 3);
+ codeChroma &= !(absPartIdx & 3);
}
m_rqt[qtLayer].resiQtYuv.copyPartToPartLuma(resiYuv, absPartIdx, log2TrSize);
@@ -3409,7 +3468,7 @@
coeff_t* coeffDstY = cu.m_trCoeff[0] + coeffOffsetY;
memcpy(coeffDstY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
- if (bCodeChroma)
+ if (codeChroma)
{
m_rqt[qtLayer].resiQtYuv.copyPartToPartChroma(resiYuv, absPartIdx, log2TrSizeC + m_hChromaShift);
More information about the x265-devel
mailing list