[x265] [PATCH 3 of 8] cudata: allocate memory pool for each cu to support 400 color space

mahesh at multicorewareinc.com mahesh at multicorewareinc.com
Mon Dec 14 20:30:09 CET 2015


# HG changeset patch
# User Mahesh Pittala <mahesh at multicorewareinc.com>
# Date 1450016875 -19800
#      Sun Dec 13 19:57:55 2015 +0530
# Node ID d01cd1fee4e30e2dd4ea90490e471417e5bf47d5
# Parent  bc03a968117e5f3c242dd0f198c8281c6216587b
cudata: allocate memory pool for each cu to support 400 color space

diff -r bc03a968117e -r d01cd1fee4e3 source/common/cudata.cpp
--- a/source/common/cudata.cpp	Sun Dec 13 19:53:20 2015 +0530
+++ b/source/common/cudata.cpp	Sun Dec 13 19:57:55 2015 +0530
@@ -193,44 +193,82 @@
         break;
     }
 
-    /* Each CU's data is layed out sequentially within the charMemBlock */
-    uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * instance;
+    if (csp == X265_CSP_I400)
+    {
+        /* Each CU's data is layed out sequentially within the charMemBlock */
+        uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * (BytesPerPartition - 4)) * instance;
 
-    m_qp        = (int8_t*)charBuf; charBuf += m_numPartitions;
-    m_log2CUSize         = charBuf; charBuf += m_numPartitions;
-    m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
-    m_chromaIntraDir     = charBuf; charBuf += m_numPartitions;
-    m_tqBypass           = charBuf; charBuf += m_numPartitions;
-    m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions;
-    m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions;
-    m_cuDepth            = charBuf; charBuf += m_numPartitions;
-    m_predMode           = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
-    m_partSize           = charBuf; charBuf += m_numPartitions;
-    m_mergeFlag          = charBuf; charBuf += m_numPartitions;
-    m_interDir           = charBuf; charBuf += m_numPartitions;
-    m_mvpIdx[0]          = charBuf; charBuf += m_numPartitions;
-    m_mvpIdx[1]          = charBuf; charBuf += m_numPartitions;
-    m_tuDepth            = charBuf; charBuf += m_numPartitions;
-    m_transformSkip[0]   = charBuf; charBuf += m_numPartitions;
-    m_transformSkip[1]   = charBuf; charBuf += m_numPartitions;
-    m_transformSkip[2]   = charBuf; charBuf += m_numPartitions;
-    m_cbf[0]             = charBuf; charBuf += m_numPartitions;
-    m_cbf[1]             = charBuf; charBuf += m_numPartitions;
-    m_cbf[2]             = charBuf; charBuf += m_numPartitions;
+        m_qp        = (int8_t*)charBuf; charBuf += m_numPartitions;
+        m_log2CUSize         = charBuf; charBuf += m_numPartitions;
+        m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
+        m_tqBypass           = charBuf; charBuf += m_numPartitions;
+        m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions;
+        m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions;
+        m_cuDepth            = charBuf; charBuf += m_numPartitions;
+        m_predMode           = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
+        m_partSize           = charBuf; charBuf += m_numPartitions;
+        m_mergeFlag          = charBuf; charBuf += m_numPartitions;
+        m_interDir           = charBuf; charBuf += m_numPartitions;
+        m_mvpIdx[0]          = charBuf; charBuf += m_numPartitions;
+        m_mvpIdx[1]          = charBuf; charBuf += m_numPartitions;
+        m_tuDepth            = charBuf; charBuf += m_numPartitions;
+        m_transformSkip[0]   = charBuf; charBuf += m_numPartitions;
+        m_cbf[0]             = charBuf; charBuf += m_numPartitions;
+        m_chromaIntraDir     = charBuf; charBuf += m_numPartitions;
 
-    X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * (instance + 1), "CU data layout is broken\n");
+        X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * (BytesPerPartition - 4)) * (instance + 1), "CU data layout is broken\n"); //BytesPerPartition
 
-    m_mv[0]  = dataPool.mvMemBlock + (instance * 4) * m_numPartitions;
-    m_mv[1]  = m_mv[0] +  m_numPartitions;
-    m_mvd[0] = m_mv[1] +  m_numPartitions;
-    m_mvd[1] = m_mvd[0] + m_numPartitions;
+        m_mv[0]  = dataPool.mvMemBlock + (instance * 4) * m_numPartitions;
+        m_mv[1]  = m_mv[0] +  m_numPartitions;
+        m_mvd[0] = m_mv[1] +  m_numPartitions;
+        m_mvd[1] = m_mvd[0] + m_numPartitions;
 
-    uint32_t cuSize = g_maxCUSize >> depth;
-    uint32_t sizeL = cuSize * cuSize;
-    uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
-    m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (sizeL + sizeC * 2);
-    m_trCoeff[1] = m_trCoeff[0] + sizeL;
-    m_trCoeff[2] = m_trCoeff[0] + sizeL + sizeC;
+        uint32_t cuSize = g_maxCUSize >> depth;
+        m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (cuSize * cuSize);
+        m_trCoeff[1] = m_trCoeff[2] = 0;
+        m_transformSkip[1] = m_transformSkip[2] = m_cbf[1] = m_cbf[2] = 0;
+    }
+    else
+    {
+        /* Each CU's data is layed out sequentially within the charMemBlock */
+        uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * instance;
+
+        m_qp        = (int8_t*)charBuf; charBuf += m_numPartitions;
+        m_log2CUSize         = charBuf; charBuf += m_numPartitions;
+        m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
+        m_tqBypass           = charBuf; charBuf += m_numPartitions;
+        m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions;
+        m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions;
+        m_cuDepth            = charBuf; charBuf += m_numPartitions;
+        m_predMode           = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
+        m_partSize           = charBuf; charBuf += m_numPartitions;
+        m_mergeFlag          = charBuf; charBuf += m_numPartitions;
+        m_interDir           = charBuf; charBuf += m_numPartitions;
+        m_mvpIdx[0]          = charBuf; charBuf += m_numPartitions;
+        m_mvpIdx[1]          = charBuf; charBuf += m_numPartitions;
+        m_tuDepth            = charBuf; charBuf += m_numPartitions;
+        m_transformSkip[0]   = charBuf; charBuf += m_numPartitions;
+        m_transformSkip[1]   = charBuf; charBuf += m_numPartitions;
+        m_transformSkip[2]   = charBuf; charBuf += m_numPartitions;
+        m_cbf[0]             = charBuf; charBuf += m_numPartitions;
+        m_cbf[1]             = charBuf; charBuf += m_numPartitions;
+        m_cbf[2]             = charBuf; charBuf += m_numPartitions;
+        m_chromaIntraDir     = charBuf; charBuf += m_numPartitions;
+
+        X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * (instance + 1), "CU data layout is broken\n");
+
+        m_mv[0]  = dataPool.mvMemBlock + (instance * 4) * m_numPartitions;
+        m_mv[1]  = m_mv[0] +  m_numPartitions;
+        m_mvd[0] = m_mv[1] +  m_numPartitions;
+        m_mvd[1] = m_mvd[0] + m_numPartitions;
+
+        uint32_t cuSize = g_maxCUSize >> depth;
+        uint32_t sizeL = cuSize * cuSize;
+        uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift); // block chroma part
+        m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (sizeL + sizeC * 2);
+        m_trCoeff[1] = m_trCoeff[0] + sizeL;
+        m_trCoeff[2] = m_trCoeff[0] + sizeL + sizeC;
+    }
 }
 
 void CUData::initCTU(const Frame& frame, uint32_t cuAddr, int qp)
@@ -258,7 +296,7 @@
     X265_CHECK(!(frame.m_encData->m_param->bLossless && !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without TQbypass in PPS\n");
 
     /* initialize the remaining CU data in one memset */
-    memset(m_cuDepth, 0, (BytesPerPartition - 7) * m_numPartitions);
+    memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ? BytesPerPartition - 11 : BytesPerPartition - 7) * m_numPartitions);
 
     uint32_t widthInCU = m_slice->m_sps->numCuInWidth;
     m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL;
@@ -293,7 +331,7 @@
     m_partSet(m_cuDepth,      (uint8_t)cuGeom.depth);
 
     /* initialize the remaining CU data in one memset */
-    memset(m_predMode, 0, (BytesPerPartition - 8) * m_numPartitions);
+    memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ? BytesPerPartition - 12 : BytesPerPartition - 8) * m_numPartitions);
 }
 
 /* Copy the results of a sub-part (split) CU to the parent CU */
@@ -317,13 +355,9 @@
     m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]);
     m_subPartCopy(m_mvpIdx[1] + offset, subCU.m_mvpIdx[1]);
     m_subPartCopy(m_tuDepth + offset, subCU.m_tuDepth);
+
     m_subPartCopy(m_transformSkip[0] + offset, subCU.m_transformSkip[0]);
-    m_subPartCopy(m_transformSkip[1] + offset, subCU.m_transformSkip[1]);
-    m_subPartCopy(m_transformSkip[2] + offset, subCU.m_transformSkip[2]);
     m_subPartCopy(m_cbf[0] + offset, subCU.m_cbf[0]);
-    m_subPartCopy(m_cbf[1] + offset, subCU.m_cbf[1]);
-    m_subPartCopy(m_cbf[2] + offset, subCU.m_cbf[2]);
-    m_subPartCopy(m_chromaIntraDir + offset, subCU.m_chromaIntraDir);
 
     memcpy(m_mv[0] + offset, subCU.m_mv[0], childGeom.numPartitions * sizeof(MV));
     memcpy(m_mv[1] + offset, subCU.m_mv[1], childGeom.numPartitions * sizeof(MV));
@@ -332,12 +366,21 @@
 
     uint32_t tmp = 1 << ((g_maxLog2CUSize - childGeom.depth) * 2);
     uint32_t tmp2 = subPartIdx * tmp;
-    memcpy(m_trCoeff[0] + tmp2, subCU.m_trCoeff[0], sizeof(coeff_t) * tmp);
+    memcpy(m_trCoeff[0] + tmp2, subCU.m_trCoeff[0], sizeof(coeff_t)* tmp);
 
-    uint32_t tmpC = tmp >> (m_hChromaShift + m_vChromaShift);
-    uint32_t tmpC2 = tmp2 >> (m_hChromaShift + m_vChromaShift);
-    memcpy(m_trCoeff[1] + tmpC2, subCU.m_trCoeff[1], sizeof(coeff_t) * tmpC);
-    memcpy(m_trCoeff[2] + tmpC2, subCU.m_trCoeff[2], sizeof(coeff_t) * tmpC);
+    if (subCU.m_chromaFormat != X265_CSP_I400)
+    {
+        m_subPartCopy(m_transformSkip[1] + offset, subCU.m_transformSkip[1]);
+        m_subPartCopy(m_transformSkip[2] + offset, subCU.m_transformSkip[2]);
+        m_subPartCopy(m_cbf[1] + offset, subCU.m_cbf[1]);
+        m_subPartCopy(m_cbf[2] + offset, subCU.m_cbf[2]);
+        m_subPartCopy(m_chromaIntraDir + offset, subCU.m_chromaIntraDir);
+
+        uint32_t tmpC = tmp >> (m_hChromaShift + m_vChromaShift);
+        uint32_t tmpC2 = tmp2 >> (m_hChromaShift + m_vChromaShift);
+        memcpy(m_trCoeff[1] + tmpC2, subCU.m_trCoeff[1], sizeof(coeff_t) * tmpC);
+        memcpy(m_trCoeff[2] + tmpC2, subCU.m_trCoeff[2], sizeof(coeff_t) * tmpC);
+    }
 }
 
 /* If a sub-CU part is not present (off the edge of the picture) its depth and
@@ -374,17 +417,20 @@
     /* force TQBypass to true */
     m_partSet(m_tqBypass, true);
 
-    m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
-
     /* clear residual coding flags */
     m_partSet(m_predMode, cu.m_predMode[0] & (MODE_INTRA | MODE_INTER));
     m_partSet(m_tuDepth, 0);
+    m_partSet(m_cbf[0], 0);
     m_partSet(m_transformSkip[0], 0);
-    m_partSet(m_transformSkip[1], 0);
-    m_partSet(m_transformSkip[2], 0);
-    m_partSet(m_cbf[0], 0);
-    m_partSet(m_cbf[1], 0);
-    m_partSet(m_cbf[2], 0);
+
+    if (cu.m_chromaFormat != X265_CSP_I400)
+    {
+        m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
+        m_partSet(m_cbf[1], 0);
+        m_partSet(m_cbf[2], 0);
+        m_partSet(m_transformSkip[1], 0);
+        m_partSet(m_transformSkip[2], 0);
+    }
 }
 
 /* Copy completed predicted CU to CTU in picture */
@@ -407,30 +453,34 @@
     m_partCopy(ctu.m_mvpIdx[1] + m_absIdxInCTU, m_mvpIdx[1]);
     m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth);
     m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]);
-    m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
-    m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
     m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]);
-    m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
-    m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
-    m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
 
-    memcpy(ctu.m_mv[0] + m_absIdxInCTU,  m_mv[0],  m_numPartitions * sizeof(MV));
-    memcpy(ctu.m_mv[1] + m_absIdxInCTU,  m_mv[1],  m_numPartitions * sizeof(MV));
+    memcpy(ctu.m_mv[0] + m_absIdxInCTU, m_mv[0], m_numPartitions * sizeof(MV));
+    memcpy(ctu.m_mv[1] + m_absIdxInCTU, m_mv[1], m_numPartitions * sizeof(MV));
     memcpy(ctu.m_mvd[0] + m_absIdxInCTU, m_mvd[0], m_numPartitions * sizeof(MV));
     memcpy(ctu.m_mvd[1] + m_absIdxInCTU, m_mvd[1], m_numPartitions * sizeof(MV));
 
     uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2);
     uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
-    memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY);
+    memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t)* tmpY);
 
-    uint32_t tmpC = tmpY >> (m_hChromaShift + m_vChromaShift);
-    uint32_t tmpC2 = tmpY2 >> (m_hChromaShift + m_vChromaShift);
-    memcpy(ctu.m_trCoeff[1] + tmpC2, m_trCoeff[1], sizeof(coeff_t) * tmpC);
-    memcpy(ctu.m_trCoeff[2] + tmpC2, m_trCoeff[2], sizeof(coeff_t) * tmpC);
+    if (ctu.m_chromaFormat != X265_CSP_I400)
+    {
+        m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
+        m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
+        m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
+        m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
+        m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
+
+        uint32_t tmpC = tmpY >> (m_hChromaShift + m_vChromaShift);
+        uint32_t tmpC2 = tmpY2 >> (m_hChromaShift + m_vChromaShift);
+        memcpy(ctu.m_trCoeff[1] + tmpC2, m_trCoeff[1], sizeof(coeff_t) * tmpC);
+        memcpy(ctu.m_trCoeff[2] + tmpC2, m_trCoeff[2], sizeof(coeff_t) * tmpC);
+    }
 }
 
 /* The reverse of copyToPic, called only by encodeResidue */
-void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom)
+void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom, int csp)
 {
     m_encData       = ctu.m_encData;
     m_slice         = ctu.m_slice;
@@ -456,19 +506,23 @@
     m_partCopy(m_mvpIdx[1],    ctu.m_mvpIdx[1] + m_absIdxInCTU);
     m_partCopy(m_chromaIntraDir, ctu.m_chromaIntraDir + m_absIdxInCTU);
 
-    memcpy(m_mv[0],  ctu.m_mv[0] + m_absIdxInCTU,  m_numPartitions * sizeof(MV));
-    memcpy(m_mv[1],  ctu.m_mv[1] + m_absIdxInCTU,  m_numPartitions * sizeof(MV));
+    memcpy(m_mv[0], ctu.m_mv[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
+    memcpy(m_mv[1], ctu.m_mv[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
     memcpy(m_mvd[0], ctu.m_mvd[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
     memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
 
     /* clear residual coding flags */
     m_partSet(m_tuDepth, 0);
     m_partSet(m_transformSkip[0], 0);
-    m_partSet(m_transformSkip[1], 0);
-    m_partSet(m_transformSkip[2], 0);
     m_partSet(m_cbf[0], 0);
-    m_partSet(m_cbf[1], 0);
-    m_partSet(m_cbf[2], 0);
+
+    if (csp != X265_CSP_I400)
+    {        
+        m_partSet(m_transformSkip[1], 0);
+        m_partSet(m_transformSkip[2], 0);
+        m_partSet(m_cbf[1], 0);
+        m_partSet(m_cbf[2], 0);
+    }
 }
 
 /* Only called by encodeResidue, these fields can be modified during inter/intra coding */
@@ -478,22 +532,28 @@
 
     m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
     m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]);
-    m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
-    m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
     m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
     m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth);
     m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]);
-    m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
-    m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
-    m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
 
     uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2);
     uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
-    memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY);
-    tmpY  >>= m_hChromaShift + m_vChromaShift;
-    tmpY2 >>= m_hChromaShift + m_vChromaShift;
-    memcpy(ctu.m_trCoeff[1] + tmpY2, m_trCoeff[1], sizeof(coeff_t) * tmpY);
-    memcpy(ctu.m_trCoeff[2] + tmpY2, m_trCoeff[2], sizeof(coeff_t) * tmpY);
+    memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t)* tmpY);
+
+    if (ctu.m_chromaFormat != X265_CSP_I400)
+    {
+        m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
+        m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
+
+        m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
+        m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
+        m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
+
+        tmpY  >>= m_hChromaShift + m_vChromaShift;
+        tmpY2 >>= m_hChromaShift + m_vChromaShift;
+        memcpy(ctu.m_trCoeff[1] + tmpY2, m_trCoeff[1], sizeof(coeff_t) * tmpY);
+        memcpy(ctu.m_trCoeff[2] + tmpY2, m_trCoeff[2], sizeof(coeff_t) * tmpY);
+    }
 }
 
 const CUData* CUData::getPULeft(uint32_t& lPartUnitIdx, uint32_t curPartUnitIdx) const
diff -r bc03a968117e -r d01cd1fee4e3 source/common/cudata.h
--- a/source/common/cudata.h	Sun Dec 13 19:53:20 2015 +0530
+++ b/source/common/cudata.h	Sun Dec 13 19:57:55 2015 +0530
@@ -222,12 +222,12 @@
     void     copyToPic(uint32_t depth) const;
 
     /* RD-0 methods called only from encodeResidue */
-    void     copyFromPic(const CUData& ctu, const CUGeom& cuGeom);
+    void     copyFromPic(const CUData& ctu, const CUGeom& cuGeom, int csp);
     void     updatePic(uint32_t depth) const;
 
     void     setPartSizeSubParts(PartSize size)    { m_partSet(m_partSize, (uint8_t)size); }
     void     setPredModeSubParts(PredMode mode)    { m_partSet(m_predMode, (uint8_t)mode); }
-    void     clearCbf()                            { m_partSet(m_cbf[0], 0); m_partSet(m_cbf[1], 0); m_partSet(m_cbf[2], 0); }
+    void     clearCbf()                            { m_partSet(m_cbf[0], 0); if (m_chromaFormat != X265_CSP_I400) { m_partSet(m_cbf[1], 0); m_partSet(m_cbf[2], 0);} }
 
     /* these functions all take depth as an absolute depth from CTU, it is used to calculate the number of parts to copy */
     void     setQPSubParts(int8_t qp, uint32_t absPartIdx, uint32_t depth)                    { s_partSet[depth]((uint8_t*)m_qp + absPartIdx, (uint8_t)qp); }
@@ -246,7 +246,7 @@
     void     setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx);
 
     uint8_t  getCbf(uint32_t absPartIdx, TextType ttype, uint32_t tuDepth) const { return (m_cbf[ttype][absPartIdx] >> tuDepth) & 0x1; }
-    uint8_t  getQtRootCbf(uint32_t absPartIdx) const                             { return m_cbf[0][absPartIdx] || m_cbf[1][absPartIdx] || m_cbf[2][absPartIdx]; }
+    uint8_t  getQtRootCbf(uint32_t absPartIdx) const                             { if (m_chromaFormat == X265_CSP_I400) return m_cbf[0][absPartIdx] || 0; else { return m_cbf[0][absPartIdx] || m_cbf[1][absPartIdx] || m_cbf[2][absPartIdx];} }
     int8_t   getRefQP(uint32_t currAbsIdxInCTU) const;
     uint32_t getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField (*candMvField)[2], uint8_t* candDir) const;
     void     clipMv(MV& outMV) const;
@@ -339,8 +339,15 @@
         uint32_t numPartition = NUM_4x4_PARTITIONS >> (depth * 2);
         uint32_t cuSize = g_maxCUSize >> depth;
         uint32_t sizeL = cuSize * cuSize;
-        uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) + CHROMA_V_SHIFT(csp));
-        CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL + sizeC * 2) * numInstances);
+        if (csp == X265_CSP_I400)
+        {
+            CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL) * numInstances);
+        }
+        else
+        {            
+            uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) + CHROMA_V_SHIFT(csp));
+            CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL + sizeC * 2) * numInstances);
+        }
         CHECKED_MALLOC(charMemBlock, uint8_t, numPartition * numInstances * CUData::BytesPerPartition);
         CHECKED_MALLOC(mvMemBlock, MV, numPartition * 4 * numInstances);
         return true;


More information about the x265-devel mailing list