[x265] [PATCH 3 of 8] cudata: allocate memory pool for each cu to support 400 color space
mahesh at multicorewareinc.com
mahesh at multicorewareinc.com
Mon Dec 14 20:30:09 CET 2015
# HG changeset patch
# User Mahesh Pittala <mahesh at multicorewareinc.com>
# Date 1450016875 -19800
# Sun Dec 13 19:57:55 2015 +0530
# Node ID d01cd1fee4e30e2dd4ea90490e471417e5bf47d5
# Parent bc03a968117e5f3c242dd0f198c8281c6216587b
cudata: allocate memory pool for each cu to support 400 color space
diff -r bc03a968117e -r d01cd1fee4e3 source/common/cudata.cpp
--- a/source/common/cudata.cpp Sun Dec 13 19:53:20 2015 +0530
+++ b/source/common/cudata.cpp Sun Dec 13 19:57:55 2015 +0530
@@ -193,44 +193,82 @@
break;
}
- /* Each CU's data is layed out sequentially within the charMemBlock */
- uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * instance;
+ if (csp == X265_CSP_I400)
+ {
+ /* Each CU's data is layed out sequentially within the charMemBlock */
+ uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * (BytesPerPartition - 4)) * instance;
- m_qp = (int8_t*)charBuf; charBuf += m_numPartitions;
- m_log2CUSize = charBuf; charBuf += m_numPartitions;
- m_lumaIntraDir = charBuf; charBuf += m_numPartitions;
- m_chromaIntraDir = charBuf; charBuf += m_numPartitions;
- m_tqBypass = charBuf; charBuf += m_numPartitions;
- m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions;
- m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions;
- m_cuDepth = charBuf; charBuf += m_numPartitions;
- m_predMode = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
- m_partSize = charBuf; charBuf += m_numPartitions;
- m_mergeFlag = charBuf; charBuf += m_numPartitions;
- m_interDir = charBuf; charBuf += m_numPartitions;
- m_mvpIdx[0] = charBuf; charBuf += m_numPartitions;
- m_mvpIdx[1] = charBuf; charBuf += m_numPartitions;
- m_tuDepth = charBuf; charBuf += m_numPartitions;
- m_transformSkip[0] = charBuf; charBuf += m_numPartitions;
- m_transformSkip[1] = charBuf; charBuf += m_numPartitions;
- m_transformSkip[2] = charBuf; charBuf += m_numPartitions;
- m_cbf[0] = charBuf; charBuf += m_numPartitions;
- m_cbf[1] = charBuf; charBuf += m_numPartitions;
- m_cbf[2] = charBuf; charBuf += m_numPartitions;
+ m_qp = (int8_t*)charBuf; charBuf += m_numPartitions;
+ m_log2CUSize = charBuf; charBuf += m_numPartitions;
+ m_lumaIntraDir = charBuf; charBuf += m_numPartitions;
+ m_tqBypass = charBuf; charBuf += m_numPartitions;
+ m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions;
+ m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions;
+ m_cuDepth = charBuf; charBuf += m_numPartitions;
+ m_predMode = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
+ m_partSize = charBuf; charBuf += m_numPartitions;
+ m_mergeFlag = charBuf; charBuf += m_numPartitions;
+ m_interDir = charBuf; charBuf += m_numPartitions;
+ m_mvpIdx[0] = charBuf; charBuf += m_numPartitions;
+ m_mvpIdx[1] = charBuf; charBuf += m_numPartitions;
+ m_tuDepth = charBuf; charBuf += m_numPartitions;
+ m_transformSkip[0] = charBuf; charBuf += m_numPartitions;
+ m_cbf[0] = charBuf; charBuf += m_numPartitions;
+ m_chromaIntraDir = charBuf; charBuf += m_numPartitions;
- X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * (instance + 1), "CU data layout is broken\n");
+ X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * (BytesPerPartition - 4)) * (instance + 1), "CU data layout is broken\n"); //BytesPerPartition
- m_mv[0] = dataPool.mvMemBlock + (instance * 4) * m_numPartitions;
- m_mv[1] = m_mv[0] + m_numPartitions;
- m_mvd[0] = m_mv[1] + m_numPartitions;
- m_mvd[1] = m_mvd[0] + m_numPartitions;
+ m_mv[0] = dataPool.mvMemBlock + (instance * 4) * m_numPartitions;
+ m_mv[1] = m_mv[0] + m_numPartitions;
+ m_mvd[0] = m_mv[1] + m_numPartitions;
+ m_mvd[1] = m_mvd[0] + m_numPartitions;
- uint32_t cuSize = g_maxCUSize >> depth;
- uint32_t sizeL = cuSize * cuSize;
- uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
- m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (sizeL + sizeC * 2);
- m_trCoeff[1] = m_trCoeff[0] + sizeL;
- m_trCoeff[2] = m_trCoeff[0] + sizeL + sizeC;
+ uint32_t cuSize = g_maxCUSize >> depth;
+ m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (cuSize * cuSize);
+ m_trCoeff[1] = m_trCoeff[2] = 0;
+ m_transformSkip[1] = m_transformSkip[2] = m_cbf[1] = m_cbf[2] = 0;
+ }
+ else
+ {
+ /* Each CU's data is layed out sequentially within the charMemBlock */
+ uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * instance;
+
+ m_qp = (int8_t*)charBuf; charBuf += m_numPartitions;
+ m_log2CUSize = charBuf; charBuf += m_numPartitions;
+ m_lumaIntraDir = charBuf; charBuf += m_numPartitions;
+ m_tqBypass = charBuf; charBuf += m_numPartitions;
+ m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions;
+ m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions;
+ m_cuDepth = charBuf; charBuf += m_numPartitions;
+ m_predMode = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
+ m_partSize = charBuf; charBuf += m_numPartitions;
+ m_mergeFlag = charBuf; charBuf += m_numPartitions;
+ m_interDir = charBuf; charBuf += m_numPartitions;
+ m_mvpIdx[0] = charBuf; charBuf += m_numPartitions;
+ m_mvpIdx[1] = charBuf; charBuf += m_numPartitions;
+ m_tuDepth = charBuf; charBuf += m_numPartitions;
+ m_transformSkip[0] = charBuf; charBuf += m_numPartitions;
+ m_transformSkip[1] = charBuf; charBuf += m_numPartitions;
+ m_transformSkip[2] = charBuf; charBuf += m_numPartitions;
+ m_cbf[0] = charBuf; charBuf += m_numPartitions;
+ m_cbf[1] = charBuf; charBuf += m_numPartitions;
+ m_cbf[2] = charBuf; charBuf += m_numPartitions;
+ m_chromaIntraDir = charBuf; charBuf += m_numPartitions;
+
+ X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * (instance + 1), "CU data layout is broken\n");
+
+ m_mv[0] = dataPool.mvMemBlock + (instance * 4) * m_numPartitions;
+ m_mv[1] = m_mv[0] + m_numPartitions;
+ m_mvd[0] = m_mv[1] + m_numPartitions;
+ m_mvd[1] = m_mvd[0] + m_numPartitions;
+
+ uint32_t cuSize = g_maxCUSize >> depth;
+ uint32_t sizeL = cuSize * cuSize;
+ uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift); // block chroma part
+ m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (sizeL + sizeC * 2);
+ m_trCoeff[1] = m_trCoeff[0] + sizeL;
+ m_trCoeff[2] = m_trCoeff[0] + sizeL + sizeC;
+ }
}
void CUData::initCTU(const Frame& frame, uint32_t cuAddr, int qp)
@@ -258,7 +296,7 @@
X265_CHECK(!(frame.m_encData->m_param->bLossless && !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without TQbypass in PPS\n");
/* initialize the remaining CU data in one memset */
- memset(m_cuDepth, 0, (BytesPerPartition - 7) * m_numPartitions);
+ memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ? BytesPerPartition - 11 : BytesPerPartition - 7) * m_numPartitions);
uint32_t widthInCU = m_slice->m_sps->numCuInWidth;
m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL;
@@ -293,7 +331,7 @@
m_partSet(m_cuDepth, (uint8_t)cuGeom.depth);
/* initialize the remaining CU data in one memset */
- memset(m_predMode, 0, (BytesPerPartition - 8) * m_numPartitions);
+ memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ? BytesPerPartition - 12 : BytesPerPartition - 8) * m_numPartitions);
}
/* Copy the results of a sub-part (split) CU to the parent CU */
@@ -317,13 +355,9 @@
m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]);
m_subPartCopy(m_mvpIdx[1] + offset, subCU.m_mvpIdx[1]);
m_subPartCopy(m_tuDepth + offset, subCU.m_tuDepth);
+
m_subPartCopy(m_transformSkip[0] + offset, subCU.m_transformSkip[0]);
- m_subPartCopy(m_transformSkip[1] + offset, subCU.m_transformSkip[1]);
- m_subPartCopy(m_transformSkip[2] + offset, subCU.m_transformSkip[2]);
m_subPartCopy(m_cbf[0] + offset, subCU.m_cbf[0]);
- m_subPartCopy(m_cbf[1] + offset, subCU.m_cbf[1]);
- m_subPartCopy(m_cbf[2] + offset, subCU.m_cbf[2]);
- m_subPartCopy(m_chromaIntraDir + offset, subCU.m_chromaIntraDir);
memcpy(m_mv[0] + offset, subCU.m_mv[0], childGeom.numPartitions * sizeof(MV));
memcpy(m_mv[1] + offset, subCU.m_mv[1], childGeom.numPartitions * sizeof(MV));
@@ -332,12 +366,21 @@
uint32_t tmp = 1 << ((g_maxLog2CUSize - childGeom.depth) * 2);
uint32_t tmp2 = subPartIdx * tmp;
- memcpy(m_trCoeff[0] + tmp2, subCU.m_trCoeff[0], sizeof(coeff_t) * tmp);
+ memcpy(m_trCoeff[0] + tmp2, subCU.m_trCoeff[0], sizeof(coeff_t)* tmp);
- uint32_t tmpC = tmp >> (m_hChromaShift + m_vChromaShift);
- uint32_t tmpC2 = tmp2 >> (m_hChromaShift + m_vChromaShift);
- memcpy(m_trCoeff[1] + tmpC2, subCU.m_trCoeff[1], sizeof(coeff_t) * tmpC);
- memcpy(m_trCoeff[2] + tmpC2, subCU.m_trCoeff[2], sizeof(coeff_t) * tmpC);
+ if (subCU.m_chromaFormat != X265_CSP_I400)
+ {
+ m_subPartCopy(m_transformSkip[1] + offset, subCU.m_transformSkip[1]);
+ m_subPartCopy(m_transformSkip[2] + offset, subCU.m_transformSkip[2]);
+ m_subPartCopy(m_cbf[1] + offset, subCU.m_cbf[1]);
+ m_subPartCopy(m_cbf[2] + offset, subCU.m_cbf[2]);
+ m_subPartCopy(m_chromaIntraDir + offset, subCU.m_chromaIntraDir);
+
+ uint32_t tmpC = tmp >> (m_hChromaShift + m_vChromaShift);
+ uint32_t tmpC2 = tmp2 >> (m_hChromaShift + m_vChromaShift);
+ memcpy(m_trCoeff[1] + tmpC2, subCU.m_trCoeff[1], sizeof(coeff_t) * tmpC);
+ memcpy(m_trCoeff[2] + tmpC2, subCU.m_trCoeff[2], sizeof(coeff_t) * tmpC);
+ }
}
/* If a sub-CU part is not present (off the edge of the picture) its depth and
@@ -374,17 +417,20 @@
/* force TQBypass to true */
m_partSet(m_tqBypass, true);
- m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
-
/* clear residual coding flags */
m_partSet(m_predMode, cu.m_predMode[0] & (MODE_INTRA | MODE_INTER));
m_partSet(m_tuDepth, 0);
+ m_partSet(m_cbf[0], 0);
m_partSet(m_transformSkip[0], 0);
- m_partSet(m_transformSkip[1], 0);
- m_partSet(m_transformSkip[2], 0);
- m_partSet(m_cbf[0], 0);
- m_partSet(m_cbf[1], 0);
- m_partSet(m_cbf[2], 0);
+
+ if (cu.m_chromaFormat != X265_CSP_I400)
+ {
+ m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
+ m_partSet(m_cbf[1], 0);
+ m_partSet(m_cbf[2], 0);
+ m_partSet(m_transformSkip[1], 0);
+ m_partSet(m_transformSkip[2], 0);
+ }
}
/* Copy completed predicted CU to CTU in picture */
@@ -407,30 +453,34 @@
m_partCopy(ctu.m_mvpIdx[1] + m_absIdxInCTU, m_mvpIdx[1]);
m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth);
m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]);
- m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
- m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]);
- m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
- m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
- m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
- memcpy(ctu.m_mv[0] + m_absIdxInCTU, m_mv[0], m_numPartitions * sizeof(MV));
- memcpy(ctu.m_mv[1] + m_absIdxInCTU, m_mv[1], m_numPartitions * sizeof(MV));
+ memcpy(ctu.m_mv[0] + m_absIdxInCTU, m_mv[0], m_numPartitions * sizeof(MV));
+ memcpy(ctu.m_mv[1] + m_absIdxInCTU, m_mv[1], m_numPartitions * sizeof(MV));
memcpy(ctu.m_mvd[0] + m_absIdxInCTU, m_mvd[0], m_numPartitions * sizeof(MV));
memcpy(ctu.m_mvd[1] + m_absIdxInCTU, m_mvd[1], m_numPartitions * sizeof(MV));
uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2);
uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
- memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY);
+ memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t)* tmpY);
- uint32_t tmpC = tmpY >> (m_hChromaShift + m_vChromaShift);
- uint32_t tmpC2 = tmpY2 >> (m_hChromaShift + m_vChromaShift);
- memcpy(ctu.m_trCoeff[1] + tmpC2, m_trCoeff[1], sizeof(coeff_t) * tmpC);
- memcpy(ctu.m_trCoeff[2] + tmpC2, m_trCoeff[2], sizeof(coeff_t) * tmpC);
+ if (ctu.m_chromaFormat != X265_CSP_I400)
+ {
+ m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
+ m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
+ m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
+ m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
+ m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
+
+ uint32_t tmpC = tmpY >> (m_hChromaShift + m_vChromaShift);
+ uint32_t tmpC2 = tmpY2 >> (m_hChromaShift + m_vChromaShift);
+ memcpy(ctu.m_trCoeff[1] + tmpC2, m_trCoeff[1], sizeof(coeff_t) * tmpC);
+ memcpy(ctu.m_trCoeff[2] + tmpC2, m_trCoeff[2], sizeof(coeff_t) * tmpC);
+ }
}
/* The reverse of copyToPic, called only by encodeResidue */
-void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom)
+void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom, int csp)
{
m_encData = ctu.m_encData;
m_slice = ctu.m_slice;
@@ -456,19 +506,23 @@
m_partCopy(m_mvpIdx[1], ctu.m_mvpIdx[1] + m_absIdxInCTU);
m_partCopy(m_chromaIntraDir, ctu.m_chromaIntraDir + m_absIdxInCTU);
- memcpy(m_mv[0], ctu.m_mv[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
- memcpy(m_mv[1], ctu.m_mv[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
+ memcpy(m_mv[0], ctu.m_mv[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
+ memcpy(m_mv[1], ctu.m_mv[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
memcpy(m_mvd[0], ctu.m_mvd[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
/* clear residual coding flags */
m_partSet(m_tuDepth, 0);
m_partSet(m_transformSkip[0], 0);
- m_partSet(m_transformSkip[1], 0);
- m_partSet(m_transformSkip[2], 0);
m_partSet(m_cbf[0], 0);
- m_partSet(m_cbf[1], 0);
- m_partSet(m_cbf[2], 0);
+
+ if (csp != X265_CSP_I400)
+ {
+ m_partSet(m_transformSkip[1], 0);
+ m_partSet(m_transformSkip[2], 0);
+ m_partSet(m_cbf[1], 0);
+ m_partSet(m_cbf[2], 0);
+ }
}
/* Only called by encodeResidue, these fields can be modified during inter/intra coding */
@@ -478,22 +532,28 @@
m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]);
- m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
- m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth);
m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]);
- m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
- m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
- m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2);
uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
- memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY);
- tmpY >>= m_hChromaShift + m_vChromaShift;
- tmpY2 >>= m_hChromaShift + m_vChromaShift;
- memcpy(ctu.m_trCoeff[1] + tmpY2, m_trCoeff[1], sizeof(coeff_t) * tmpY);
- memcpy(ctu.m_trCoeff[2] + tmpY2, m_trCoeff[2], sizeof(coeff_t) * tmpY);
+ memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t)* tmpY);
+
+ if (ctu.m_chromaFormat != X265_CSP_I400)
+ {
+ m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
+ m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
+
+ m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
+ m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
+ m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
+
+ tmpY >>= m_hChromaShift + m_vChromaShift;
+ tmpY2 >>= m_hChromaShift + m_vChromaShift;
+ memcpy(ctu.m_trCoeff[1] + tmpY2, m_trCoeff[1], sizeof(coeff_t) * tmpY);
+ memcpy(ctu.m_trCoeff[2] + tmpY2, m_trCoeff[2], sizeof(coeff_t) * tmpY);
+ }
}
const CUData* CUData::getPULeft(uint32_t& lPartUnitIdx, uint32_t curPartUnitIdx) const
diff -r bc03a968117e -r d01cd1fee4e3 source/common/cudata.h
--- a/source/common/cudata.h Sun Dec 13 19:53:20 2015 +0530
+++ b/source/common/cudata.h Sun Dec 13 19:57:55 2015 +0530
@@ -222,12 +222,12 @@
void copyToPic(uint32_t depth) const;
/* RD-0 methods called only from encodeResidue */
- void copyFromPic(const CUData& ctu, const CUGeom& cuGeom);
+ void copyFromPic(const CUData& ctu, const CUGeom& cuGeom, int csp);
void updatePic(uint32_t depth) const;
void setPartSizeSubParts(PartSize size) { m_partSet(m_partSize, (uint8_t)size); }
void setPredModeSubParts(PredMode mode) { m_partSet(m_predMode, (uint8_t)mode); }
- void clearCbf() { m_partSet(m_cbf[0], 0); m_partSet(m_cbf[1], 0); m_partSet(m_cbf[2], 0); }
+ void clearCbf() { m_partSet(m_cbf[0], 0); if (m_chromaFormat != X265_CSP_I400) { m_partSet(m_cbf[1], 0); m_partSet(m_cbf[2], 0);} }
/* these functions all take depth as an absolute depth from CTU, it is used to calculate the number of parts to copy */
void setQPSubParts(int8_t qp, uint32_t absPartIdx, uint32_t depth) { s_partSet[depth]((uint8_t*)m_qp + absPartIdx, (uint8_t)qp); }
@@ -246,7 +246,7 @@
void setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx);
uint8_t getCbf(uint32_t absPartIdx, TextType ttype, uint32_t tuDepth) const { return (m_cbf[ttype][absPartIdx] >> tuDepth) & 0x1; }
- uint8_t getQtRootCbf(uint32_t absPartIdx) const { return m_cbf[0][absPartIdx] || m_cbf[1][absPartIdx] || m_cbf[2][absPartIdx]; }
+ uint8_t getQtRootCbf(uint32_t absPartIdx) const { if (m_chromaFormat == X265_CSP_I400) return m_cbf[0][absPartIdx] || 0; else { return m_cbf[0][absPartIdx] || m_cbf[1][absPartIdx] || m_cbf[2][absPartIdx];} }
int8_t getRefQP(uint32_t currAbsIdxInCTU) const;
uint32_t getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField (*candMvField)[2], uint8_t* candDir) const;
void clipMv(MV& outMV) const;
@@ -339,8 +339,15 @@
uint32_t numPartition = NUM_4x4_PARTITIONS >> (depth * 2);
uint32_t cuSize = g_maxCUSize >> depth;
uint32_t sizeL = cuSize * cuSize;
- uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) + CHROMA_V_SHIFT(csp));
- CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL + sizeC * 2) * numInstances);
+ if (csp == X265_CSP_I400)
+ {
+ CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL) * numInstances);
+ }
+ else
+ {
+ uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) + CHROMA_V_SHIFT(csp));
+ CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL + sizeC * 2) * numInstances);
+ }
CHECKED_MALLOC(charMemBlock, uint8_t, numPartition * numInstances * CUData::BytesPerPartition);
CHECKED_MALLOC(mvMemBlock, MV, numPartition * 4 * numInstances);
return true;
More information about the x265-devel
mailing list