[x265] refine block size related, use more log2 domain.
Satoshi Nakagawa
nakagawa424 at oki.com
Mon Jul 14 07:53:48 CEST 2014
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1405317034 -32400
# Mon Jul 14 14:50:34 2014 +0900
# Node ID fa683df9621ef79cacdf98d53d966b4bf90c6e88
# Parent 6055baa75085cd074c62ab7c52357cac64d10a7e
refine block size related, use more log2 domain.
diff -r 6055baa75085 -r fa683df9621e source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp Mon Jul 14 10:53:01 2014 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.cpp Mon Jul 14 14:50:34 2014 +0900
@@ -71,7 +71,7 @@
m_baseQp = 0;
m_DataCUMemPool.qpMemBlock = NULL;
m_DataCUMemPool.depthMemBlock = NULL;
- m_DataCUMemPool.cuSizeMemBlock = NULL;
+ m_DataCUMemPool.log2CUSizeMemBlock = NULL;
m_DataCUMemPool.skipFlagMemBlock = NULL;
m_DataCUMemPool.partSizeMemBlock = NULL;
m_DataCUMemPool.predModeMemBlock = NULL;
@@ -101,7 +101,7 @@
CHECKED_MALLOC(m_DataCUMemPool.qpMemBlock, char, numPartition * numBlocks);
CHECKED_MALLOC(m_DataCUMemPool.depthMemBlock, uint8_t, numPartition * numBlocks);
- CHECKED_MALLOC(m_DataCUMemPool.cuSizeMemBlock, uint8_t, numPartition * numBlocks);
+ CHECKED_MALLOC(m_DataCUMemPool.log2CUSizeMemBlock, uint8_t, numPartition * numBlocks);
CHECKED_MALLOC(m_DataCUMemPool.skipFlagMemBlock, bool, numPartition * numBlocks);
CHECKED_MALLOC(m_DataCUMemPool.partSizeMemBlock, char, numPartition * numBlocks);
CHECKED_MALLOC(m_DataCUMemPool.predModeMemBlock, char, numPartition * numBlocks);
@@ -153,7 +153,7 @@
m_qp = cu->m_DataCUMemPool.qpMemBlock + index * numPartition;
m_depth = cu->m_DataCUMemPool.depthMemBlock + index * numPartition;
- m_cuSize = cu->m_DataCUMemPool.cuSizeMemBlock + index * numPartition;
+ m_log2CUSize = cu->m_DataCUMemPool.log2CUSizeMemBlock + index * numPartition;
m_skipFlag = cu->m_DataCUMemPool.skipFlagMemBlock + index * numPartition;
m_partSizes = cu->m_DataCUMemPool.partSizeMemBlock + index * numPartition;
m_predModes = cu->m_DataCUMemPool.predModeMemBlock + index * numPartition;
@@ -204,10 +204,10 @@
m_DataCUMemPool.depthMemBlock = NULL;
}
- if (m_DataCUMemPool.cuSizeMemBlock)
+ if (m_DataCUMemPool.log2CUSizeMemBlock)
{
- X265_FREE(m_DataCUMemPool.cuSizeMemBlock);
- m_DataCUMemPool.cuSizeMemBlock = NULL;
+ X265_FREE(m_DataCUMemPool.log2CUSizeMemBlock);
+ m_DataCUMemPool.log2CUSizeMemBlock = NULL;
}
if (m_DataCUMemPool.cbfMemBlock)
@@ -318,8 +318,8 @@
m_pic = pic;
m_slice = pic->getSlice();
m_cuAddr = cuAddr;
- m_cuPelX = (cuAddr % pic->getFrameWidthInCU()) * g_maxCUSize;
- m_cuPelY = (cuAddr / pic->getFrameWidthInCU()) * g_maxCUSize;
+ m_cuPelX = (cuAddr % pic->getFrameWidthInCU()) << g_maxLog2CUSize;
+ m_cuPelY = (cuAddr / pic->getFrameWidthInCU()) << g_maxLog2CUSize;
m_absIdxInLCU = 0;
m_psyEnergy = 0;
m_totalPsyCost = MAX_INT64;
@@ -349,7 +349,7 @@
memset(m_transformSkip[0], 0, m_numPartitions * sizeof(*m_transformSkip[0]));
memset(m_transformSkip[1], 0, m_numPartitions * sizeof(*m_transformSkip[1]));
memset(m_transformSkip[2], 0, m_numPartitions * sizeof(*m_transformSkip[2]));
- memset(m_cuSize, g_maxCUSize, m_numPartitions * sizeof(*m_cuSize));
+ memset(m_log2CUSize, g_maxLog2CUSize, m_numPartitions * sizeof(*m_log2CUSize));
memset(m_bMergeFlags, false, m_numPartitions * sizeof(*m_bMergeFlags));
memset(m_lumaIntraDir, DC_IDX, m_numPartitions * sizeof(*m_lumaIntraDir));
memset(m_chromaIntraDir, 0, m_numPartitions * sizeof(*m_chromaIntraDir));
@@ -365,8 +365,8 @@
if (getSlice()->getPPS()->getTransquantBypassEnableFlag())
{
- uint32_t y_tmp = g_maxCUSize * g_maxCUSize;
- uint32_t c_tmp = g_maxCUSize * g_maxCUSize >> (m_hChromaShift + m_vChromaShift);
+ uint32_t y_tmp = 1 << (g_maxLog2CUSize * 2);
+ uint32_t c_tmp = 1 << (g_maxLog2CUSize * 2 - m_hChromaShift - m_vChromaShift);
memset(m_tqBypassOrigYuv[0], 0, sizeof(pixel) * y_tmp);
memset(m_tqBypassOrigYuv[1], 0, sizeof(pixel) * c_tmp);
memset(m_tqBypassOrigYuv[2], 0, sizeof(pixel) * c_tmp);
@@ -399,7 +399,7 @@
void TComDataCU::initSubCU(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth, int qp)
{
X265_CHECK(partUnitIdx < 4, "part unit should be less than 4\n");
- uint8_t cuSize = g_maxCUSize >> depth;
+ uint8_t log2CUSize = g_maxLog2CUSize - depth;
uint32_t partOffset = (cu->getTotalNumPart() >> 2) * partUnitIdx;
m_pic = cu->getPic();
@@ -407,8 +407,8 @@
m_cuAddr = cu->getAddr();
m_absIdxInLCU = cu->getZorderIdxInCU() + partOffset;
- m_cuPelX = cu->getCUPelX() + cuSize * (partUnitIdx & 1);
- m_cuPelY = cu->getCUPelY() + cuSize * (partUnitIdx >> 1);
+ m_cuPelX = cu->getCUPelX() + ((partUnitIdx & 1) << log2CUSize);
+ m_cuPelY = cu->getCUPelY() + ((partUnitIdx >> 1) << log2CUSize);
m_psyEnergy = 0;
m_totalPsyCost = MAX_INT64;
@@ -441,7 +441,7 @@
memset(m_cbf[1], 0, sizeInChar);
memset(m_cbf[2], 0, sizeInChar);
memset(m_depth, depth, sizeInChar);
- memset(m_cuSize, cuSize, sizeInChar);
+ memset(m_log2CUSize, log2CUSize, sizeInChar);
memset(m_partSizes, SIZE_NONE, sizeInChar);
memset(m_predModes, MODE_NONE, sizeInChar);
memset(m_skipFlag, false, sizeInBool);
@@ -473,8 +473,8 @@
m_cuAddr = cu->getAddr();
m_absIdxInLCU = cu->getZorderIdxInCU() + partOffset;
- m_cuPelX = cu->getCUPelX() + (g_maxCUSize >> depth) * (partUnitIdx & 1);
- m_cuPelY = cu->getCUPelY() + (g_maxCUSize >> depth) * (partUnitIdx >> 1);
+ m_cuPelX = cu->getCUPelX() + ((partUnitIdx & 1) << (g_maxLog2CUSize - depth));
+ m_cuPelY = cu->getCUPelY() + ((partUnitIdx >> 1) << (g_maxLog2CUSize - depth));
m_psyEnergy = 0;
m_totalPsyCost = MAX_INT64;
@@ -497,7 +497,7 @@
memcpy(m_lumaIntraDir, otherCU->getLumaIntraDir() + m_absIdxInLCU, sizeInChar);
memcpy(m_depth, otherCU->getDepth() + m_absIdxInLCU, sizeInChar);
- memcpy(m_cuSize, otherCU->getCUSize() + m_absIdxInLCU, sizeInChar);
+ memcpy(m_log2CUSize, otherCU->getLog2CUSize() + m_absIdxInLCU, sizeInChar);
}
// --------------------------------------------------------------------------------------------------------------------
@@ -544,7 +544,7 @@
memcpy(m_cbf[2] + offset, cu->getCbf(TEXT_CHROMA_V), sizeInChar);
memcpy(m_depth + offset, cu->getDepth(), sizeInChar);
- memcpy(m_cuSize + offset, cu->getCUSize(), sizeInChar);
+ memcpy(m_log2CUSize + offset, cu->getLog2CUSize(), sizeInChar);
memcpy(m_mvpIdx[0] + offset, cu->getMVPIdx(REF_PIC_LIST_0), sizeInChar);
memcpy(m_mvpIdx[1] + offset, cu->getMVPIdx(REF_PIC_LIST_1), sizeInChar);
@@ -557,7 +557,7 @@
m_cuMvField[0].copyFrom(cu->getCUMvField(REF_PIC_LIST_0), cu->getTotalNumPart(), offset);
m_cuMvField[1].copyFrom(cu->getCUMvField(REF_PIC_LIST_1), cu->getTotalNumPart(), offset);
- uint32_t tmp = g_maxCUSize * g_maxCUSize >> (depth << 1);
+ uint32_t tmp = 1 << ((g_maxLog2CUSize - depth) * 2);
uint32_t tmp2 = partUnitIdx * tmp;
memcpy(m_trCoeff[0] + tmp2, cu->getCoeffY(), sizeof(coeff_t) * tmp);
@@ -613,7 +613,7 @@
memcpy(cu->getCbf(TEXT_CHROMA_V) + m_absIdxInLCU, m_cbf[2], sizeInChar);
memcpy(cu->getDepth() + m_absIdxInLCU, m_depth, sizeInChar);
- memcpy(cu->getCUSize() + m_absIdxInLCU, m_cuSize, sizeInChar);
+ memcpy(cu->getLog2CUSize() + m_absIdxInLCU, m_log2CUSize, sizeInChar);
memcpy(cu->getMVPIdx(REF_PIC_LIST_0) + m_absIdxInLCU, m_mvpIdx[0], sizeInChar);
memcpy(cu->getMVPIdx(REF_PIC_LIST_1) + m_absIdxInLCU, m_mvpIdx[1], sizeInChar);
@@ -621,7 +621,7 @@
m_cuMvField[0].copyTo(cu->getCUMvField(REF_PIC_LIST_0), m_absIdxInLCU);
m_cuMvField[1].copyTo(cu->getCUMvField(REF_PIC_LIST_1), m_absIdxInLCU);
- uint32_t tmpY = (g_maxCUSize * g_maxCUSize) >> (depth << 1);
+ uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2);
uint32_t tmpY2 = m_absIdxInLCU << m_pic->getLog2UnitSize() * 2;
memcpy(cu->getCoeffY() + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY);
@@ -632,7 +632,7 @@
if (getSlice()->getPPS()->getTransquantBypassEnableFlag())
{
- uint32_t tmp = (g_maxCUSize * g_maxCUSize) >> (depth << 1);
+ uint32_t tmp = 1 << ((g_maxLog2CUSize - depth) * 2);
uint32_t tmp2 = m_absIdxInLCU << m_pic->getLog2UnitSize() * 2;
memcpy(cu->getLumaOrigYuv() + tmp2, m_tqBypassOrigYuv[0], sizeof(pixel) * tmp);
@@ -659,7 +659,7 @@
memcpy(cu->getCbf(TEXT_CHROMA_U) + m_absIdxInLCU, m_cbf[1], sizeInChar);
memcpy(cu->getCbf(TEXT_CHROMA_V) + m_absIdxInLCU, m_cbf[2], sizeInChar);
- uint32_t tmpY = (g_maxCUSize * g_maxCUSize) >> (depth << 1);
+ uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2);
uint32_t tmpY2 = m_absIdxInLCU << m_pic->getLog2UnitSize() * 2;
memcpy(cu->getCoeffY() + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY);
tmpY >>= m_hChromaShift + m_vChromaShift;
@@ -705,14 +705,14 @@
memcpy(cu->getCbf(TEXT_CHROMA_V) + partOffset, m_cbf[2], sizeInChar);
memcpy(cu->getDepth() + partOffset, m_depth, sizeInChar);
- memcpy(cu->getCUSize() + partOffset, m_cuSize, sizeInChar);
+ memcpy(cu->getLog2CUSize() + partOffset, m_log2CUSize, sizeInChar);
memcpy(cu->getMVPIdx(REF_PIC_LIST_0) + partOffset, m_mvpIdx[0], sizeInChar);
memcpy(cu->getMVPIdx(REF_PIC_LIST_1) + partOffset, m_mvpIdx[1], sizeInChar);
m_cuMvField[0].copyTo(cu->getCUMvField(REF_PIC_LIST_0), m_absIdxInLCU, partStart, qNumPart);
m_cuMvField[1].copyTo(cu->getCUMvField(REF_PIC_LIST_1), m_absIdxInLCU, partStart, qNumPart);
- uint32_t tmpY = (g_maxCUSize * g_maxCUSize) >> ((depth + partDepth) << 1);
+ uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth - partDepth) * 2);
uint32_t tmpY2 = partOffset << m_pic->getLog2UnitSize() * 2;
memcpy(cu->getCoeffY() + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY);
@@ -737,11 +737,11 @@
TComDataCU* TComDataCU::getPULeft(uint32_t& lPartUnitIdx, uint32_t curPartUnitIdx)
{
uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
- uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU];
uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
if (!RasterAddress::isZeroCol(absPartIdx, numPartInCUSize))
{
+ uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU];
lPartUnitIdx = g_rasterToZscan[absPartIdx - 1];
if (RasterAddress::isEqualCol(absPartIdx, absZorderCUIdx, numPartInCUSize))
{
@@ -761,11 +761,11 @@
TComDataCU* TComDataCU::getPUAbove(uint32_t& aPartUnitIdx, uint32_t curPartUnitIdx, bool planarAtLCUBoundary)
{
uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
- uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU];
uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
if (!RasterAddress::isZeroRow(absPartIdx, numPartInCUSize))
{
+ uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU];
aPartUnitIdx = g_rasterToZscan[absPartIdx - numPartInCUSize];
if (RasterAddress::isEqualRow(absPartIdx, absZorderCUIdx, numPartInCUSize))
{
@@ -788,13 +788,13 @@
TComDataCU* TComDataCU::getPUAboveLeft(uint32_t& alPartUnitIdx, uint32_t curPartUnitIdx)
{
uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
- uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU];
uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
if (!RasterAddress::isZeroCol(absPartIdx, numPartInCUSize))
{
if (!RasterAddress::isZeroRow(absPartIdx, numPartInCUSize))
{
+ uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU];
alPartUnitIdx = g_rasterToZscan[absPartIdx - numPartInCUSize - 1];
if (RasterAddress::isEqualRowOrCol(absPartIdx, absZorderCUIdx, numPartInCUSize))
{
@@ -823,7 +823,6 @@
TComDataCU* TComDataCU::getPUAboveRight(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx)
{
uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx];
- uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + (m_cuSize[0] >> m_pic->getLog2UnitSize()) - 1;
uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
if ((m_pic->getCU(m_cuAddr)->getCUPelX() + g_rasterToPelX[absPartIdxRT] + m_pic->getUnitSize()) >= m_slice->getSPS()->getPicWidthInLumaSamples())
@@ -837,6 +836,7 @@
{
if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - numPartInCUSize + 1])
{
+ uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + (1 << (m_log2CUSize[0] - m_pic->getLog2UnitSize())) - 1;
arPartUnitIdx = g_rasterToZscan[absPartIdxRT - numPartInCUSize + 1];
if (RasterAddress::isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, numPartInCUSize))
{
@@ -866,20 +866,21 @@
TComDataCU* TComDataCU::getPUBelowLeft(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx)
{
uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
- uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInLCU] + ((m_cuSize[0] >> m_pic->getLog2UnitSize()) - 1) * m_pic->getNumPartInCUSize();
- uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
if ((m_pic->getCU(m_cuAddr)->getCUPelY() + g_rasterToPelY[absPartIdxLB] + m_pic->getUnitSize()) >= m_slice->getSPS()->getPicHeightInLumaSamples())
{
return NULL;
}
+ uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
+
if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - 1, numPartInCUSize))
{
if (!RasterAddress::isZeroCol(absPartIdxLB, numPartInCUSize))
{
if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + numPartInCUSize - 1])
{
+ uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInLCU] + ((1 << (m_log2CUSize[0] - m_pic->getLog2UnitSize())) - 1) * m_pic->getNumPartInCUSize();
blPartUnitIdx = g_rasterToZscan[absPartIdxLB + numPartInCUSize - 1];
if (RasterAddress::isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB, numPartInCUSize))
{
@@ -903,21 +904,22 @@
TComDataCU* TComDataCU::getPUBelowLeftAdi(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset)
{
uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
- uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInLCU] + ((m_cuSize[0] >> m_pic->getLog2UnitSize()) - 1) * m_pic->getNumPartInCUSize();
- uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
- if ((m_pic->getCU(m_cuAddr)->getCUPelY() + g_rasterToPelY[absPartIdxLB] + (partUnitOffset << m_pic->getPicSym()->getLog2UnitSize())) >=
+ if ((m_pic->getCU(m_cuAddr)->getCUPelY() + g_rasterToPelY[absPartIdxLB] + (partUnitOffset << m_pic->getLog2UnitSize())) >=
m_slice->getSPS()->getPicHeightInLumaSamples())
{
return NULL;
}
+ uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
+
if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - partUnitOffset, numPartInCUSize))
{
if (!RasterAddress::isZeroCol(absPartIdxLB, numPartInCUSize))
{
if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + partUnitOffset * numPartInCUSize - 1])
{
+ uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInLCU] + ((1 << (m_log2CUSize[0] - m_pic->getLog2UnitSize())) - 1) * m_pic->getNumPartInCUSize();
blPartUnitIdx = g_rasterToZscan[absPartIdxLB + partUnitOffset * numPartInCUSize - 1];
if (RasterAddress::isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB, numPartInCUSize))
{
@@ -945,21 +947,22 @@
TComDataCU* TComDataCU::getPUAboveRightAdi(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset)
{
uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx];
- uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + (m_cuSize[0] >> m_pic->getLog2UnitSize()) - 1;
- uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
- if ((m_pic->getCU(m_cuAddr)->getCUPelX() + g_rasterToPelX[absPartIdxRT] + (partUnitOffset << m_pic->getPicSym()->getLog2UnitSize())) >=
+ if ((m_pic->getCU(m_cuAddr)->getCUPelX() + g_rasterToPelX[absPartIdxRT] + (partUnitOffset << m_pic->getLog2UnitSize())) >=
m_slice->getSPS()->getPicWidthInLumaSamples())
{
return NULL;
}
+ uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
+
if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - partUnitOffset, numPartInCUSize))
{
if (!RasterAddress::isZeroRow(absPartIdxRT, numPartInCUSize))
{
if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - numPartInCUSize + partUnitOffset])
{
+ uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + (1 << (m_log2CUSize[0] - m_pic->getLog2UnitSize())) - 1;
arPartUnitIdx = g_rasterToZscan[absPartIdxRT - numPartInCUSize + partUnitOffset];
if (RasterAddress::isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, numPartInCUSize))
{
@@ -1207,7 +1210,7 @@
uint32_t TComDataCU::getQuadtreeTULog2MinSizeInCU(uint32_t absPartIdx)
{
- uint32_t log2CUSize = g_convertToBit[getCUSize(absPartIdx)] + 2;
+ uint32_t log2CUSize = getLog2CUSize(absPartIdx);
PartSize partSize = getPartitionSize(absPartIdx);
uint32_t quadtreeTUMaxDepth = getPredictionMode(absPartIdx) == MODE_INTRA ? m_slice->getSPS()->getQuadtreeTUMaxDepthIntra() : m_slice->getSPS()->getQuadtreeTUMaxDepthInter();
int intraSplitFlag = (getPredictionMode(absPartIdx) == MODE_INTRA && partSize == SIZE_NxN) ? 1 : 0;
@@ -1528,7 +1531,7 @@
void TComDataCU::getPartIndexAndSize(uint32_t partIdx, uint32_t& outPartAddr, int& outWidth, int& outHeight)
{
- int cuSize = getCUSize(0);
+ int cuSize = 1 << getLog2CUSize(0);
switch (m_partSizes[0])
{
@@ -1592,7 +1595,7 @@
void TComDataCU::deriveLeftRightTopIdxGeneral(uint32_t absPartIdx, uint32_t partIdx, uint32_t& outPartIdxLT, uint32_t& outPartIdxRT)
{
outPartIdxLT = m_absIdxInLCU + absPartIdx;
- uint32_t cuSize = m_cuSize[absPartIdx];
+ uint32_t cuSize = 1 << m_log2CUSize[absPartIdx];
uint32_t puWidth = 0;
switch (m_partSizes[absPartIdx])
@@ -1647,7 +1650,7 @@
void TComDataCU::deriveLeftBottomIdxGeneral(uint32_t absPartIdx, uint32_t partIdx, uint32_t& outPartIdxLB)
{
- uint32_t cuSize = m_cuSize[absPartIdx];
+ uint32_t cuSize = 1 << m_log2CUSize[absPartIdx];
uint32_t puHeight = 0;
switch (m_partSizes[absPartIdx])
@@ -1703,7 +1706,7 @@
void TComDataCU::deriveLeftRightTopIdx(uint32_t partIdx, uint32_t& ruiPartIdxLT, uint32_t& ruiPartIdxRT)
{
ruiPartIdxLT = m_absIdxInLCU;
- ruiPartIdxRT = g_rasterToZscan[g_zscanToRaster[ruiPartIdxLT] + (m_cuSize[0] >> m_pic->getLog2UnitSize()) - 1];
+ ruiPartIdxRT = g_rasterToZscan[g_zscanToRaster[ruiPartIdxLT] + (1 << (m_log2CUSize[0] - m_pic->getLog2UnitSize())) - 1];
switch (m_partSizes[0])
{
@@ -1744,7 +1747,7 @@
void TComDataCU::deriveLeftBottomIdx(uint32_t partIdx, uint32_t& outPartIdxLB)
{
- outPartIdxLB = g_rasterToZscan[g_zscanToRaster[m_absIdxInLCU] + (((m_cuSize[0] >> m_pic->getLog2UnitSize()) >> 1) - 1) * m_pic->getNumPartInCUSize()];
+ outPartIdxLB = g_rasterToZscan[g_zscanToRaster[m_absIdxInLCU] + ((1 << (m_log2CUSize[0] - m_pic->getLog2UnitSize() - 1)) - 1) * m_pic->getNumPartInCUSize()];
switch (m_partSizes[0])
{
@@ -1785,8 +1788,9 @@
*/
void TComDataCU::deriveRightBottomIdx(uint32_t partIdx, uint32_t& outPartIdxRB)
{
- outPartIdxRB = g_rasterToZscan[g_zscanToRaster[m_absIdxInLCU] + (((m_cuSize[0] >> m_pic->getLog2UnitSize()) >> 1) - 1) *
- m_pic->getNumPartInCUSize() + (m_cuSize[0] >> m_pic->getLog2UnitSize()) - 1];
+ outPartIdxRB = g_rasterToZscan[g_zscanToRaster[m_absIdxInLCU] +
+ ((1 << (m_log2CUSize[0] - m_pic->getLog2UnitSize() - 1)) - 1) * m_pic->getNumPartInCUSize() +
+ (1 << (m_log2CUSize[0] - m_pic->getLog2UnitSize())) - 1];
switch (m_partSizes[0])
{
@@ -1822,7 +1826,7 @@
void TComDataCU::deriveLeftRightTopIdxAdi(uint32_t& outPartIdxLT, uint32_t& outPartIdxRT, uint32_t partOffset, uint32_t partDepth)
{
- uint32_t numPartInWidth = m_cuSize[0] >> (m_pic->getLog2UnitSize() + partDepth);
+ uint32_t numPartInWidth = 1 << (m_log2CUSize[0] - m_pic->getLog2UnitSize() - partDepth);
outPartIdxLT = m_absIdxInLCU + partOffset;
outPartIdxRT = g_rasterToZscan[g_zscanToRaster[outPartIdxLT] + numPartInWidth - 1];
@@ -2196,7 +2200,7 @@
{
uint32_t col = m_cuPelX;
uint32_t row = m_cuPelY;
- uint32_t cuSize = getCUSize(0);
+ uint32_t cuSize = 1 << getLog2CUSize(0);
switch (m_partSizes[0])
{
@@ -2425,7 +2429,7 @@
bool TComDataCU::isBipredRestriction()
{
- return getCUSize(0) == 8 && getPartitionSize(0) != SIZE_2Nx2N;
+ return getLog2CUSize(0) == 3 && getPartitionSize(0) != SIZE_2Nx2N;
}
void TComDataCU::clipMv(MV& outMV)
@@ -2744,8 +2748,8 @@
outPartIdxCenter = m_absIdxInLCU + partAddr; // partition origin.
outPartIdxCenter = g_rasterToZscan[g_zscanToRaster[outPartIdxCenter]
- + (partHeight >> m_pic->getLog2UnitSize()) / 2 * m_pic->getNumPartInCUSize()
- + (partWidth >> m_pic->getLog2UnitSize()) / 2];
+ + (partHeight >> (m_pic->getLog2UnitSize() + 1)) * m_pic->getNumPartInCUSize()
+ + (partWidth >> (m_pic->getLog2UnitSize() + 1))];
}
uint32_t TComDataCU::getCoefScanIdx(uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma, bool bIsIntra)
diff -r 6055baa75085 -r fa683df9621e source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h Mon Jul 14 10:53:01 2014 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.h Mon Jul 14 14:50:34 2014 +0900
@@ -76,7 +76,7 @@
{
char* qpMemBlock;
uint8_t* depthMemBlock;
- uint8_t* cuSizeMemBlock;
+ uint8_t* log2CUSizeMemBlock;
bool* skipFlagMemBlock;
char* partSizeMemBlock;
char* predModeMemBlock;
@@ -119,7 +119,7 @@
uint32_t m_cuPelX; ///< CU position in a pixel (X)
uint32_t m_cuPelY; ///< CU position in a pixel (Y)
uint32_t m_numPartitions; ///< total number of minimum partitions in a CU
- uint8_t* m_cuSize; ///< array of cu width/height
+ uint8_t* m_log2CUSize; ///< array of cu width/height
uint8_t* m_depth; ///< array of depths
int m_chromaFormat;
int m_hChromaShift;
@@ -271,9 +271,9 @@
void setPredModeSubParts(PredMode eMode, uint32_t absPartIdx, uint32_t depth);
- uint8_t* getCUSize() { return m_cuSize; }
+ uint8_t* getLog2CUSize() { return m_log2CUSize; }
- uint8_t getCUSize(uint32_t idx) { return m_cuSize[idx]; }
+ uint8_t getLog2CUSize(uint32_t idx) const { return m_log2CUSize[idx]; }
char* getQP() { return m_qp; }
diff -r 6055baa75085 -r fa683df9621e source/Lib/TLibCommon/TComLoopFilter.cpp
--- a/source/Lib/TLibCommon/TComLoopFilter.cpp Mon Jul 14 10:53:01 2014 +0530
+++ b/source/Lib/TLibCommon/TComLoopFilter.cpp Mon Jul 14 14:50:34 2014 +0900
@@ -139,9 +139,9 @@
for (uint32_t partIdx = absZOrderIdx; partIdx < absZOrderIdx + curNumParts; partIdx++)
{
uint32_t bsCheck;
- if ((g_maxCUSize >> g_maxCUDepth) == 4)
+ if (g_log2UnitSize == 2)
{
- bsCheck = (dir == EDGE_VER && partIdx % 2 == 0) || (dir == EDGE_HOR && (partIdx - ((partIdx >> 2) << 2)) / 2 == 0);
+ bsCheck = (dir == EDGE_VER && (partIdx & 1) == 0) || (dir == EDGE_HOR && (partIdx & 2) == 0);
}
else
{
@@ -154,16 +154,16 @@
}
}
- uint32_t pelsInPart = g_maxCUSize >> g_maxCUDepth;
- uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK / pelsInPart ? DEBLOCK_SMALLEST_BLOCK / pelsInPart : 1;
+ uint32_t log2UnitSize = g_log2UnitSize;
+ uint32_t partIdxIncr = (DEBLOCK_SMALLEST_BLOCK >> log2UnitSize) ? (DEBLOCK_SMALLEST_BLOCK >> log2UnitSize) : 1;
uint32_t sizeInPU = pic->getNumPartInCUSize() >> (depth);
uint32_t shiftFactor = (dir == EDGE_VER) ? cu->getHorzChromaShift() : cu->getVertChromaShift();
- const bool bAlwaysDoChroma = (cu->getChromaFormat() == CHROMA_444);
+ const bool bAlwaysDoChroma = (cu->getChromaFormat() == CHROMA_444 || (1 << log2UnitSize) > DEBLOCK_SMALLEST_BLOCK);
for (uint32_t e = 0; e < sizeInPU; e += partIdxIncr)
{
xEdgeFilterLuma(cu, absZOrderIdx, depth, dir, e, blockingStrength);
- if (bAlwaysDoChroma || (pelsInPart > DEBLOCK_SMALLEST_BLOCK) || (e % ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) / pelsInPart)) == 0)
+ if (bAlwaysDoChroma || (e % ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> log2UnitSize)) == 0)
{
xEdgeFilterChroma(cu, absZOrderIdx, depth, dir, e, blockingStrength);
}
@@ -178,7 +178,6 @@
}
const uint32_t numElem = widthInBaseUnits;
X265_CHECK(numElem > 0, "numElem edge filter check\n");
- X265_CHECK(widthInBaseUnits > 0, "widthInBaseUnits edge filter check\n");
for (uint32_t i = 0; i < numElem; i++)
{
const uint32_t bsidx = xCalcBsIdx(cu, scanIdx, dir, edgeIdx, i);
@@ -205,9 +204,7 @@
return;
}
- int trWidth = cu->getCUSize(absZOrderIdx) >> cu->getTransformIdx(absZOrderIdx);
-
- uint32_t widthInBaseUnits = trWidth / (g_maxCUSize >> g_maxCUDepth);
+ uint32_t widthInBaseUnits = 1 << (cu->getLog2CUSize(absZOrderIdx) - cu->getTransformIdx(absZOrderIdx) - g_log2UnitSize);
xSetEdgefilterMultiple(cu, absTUPartIdx, depth, dir, 0, true, edgeFilter, blockingStrength, widthInBaseUnits);
}
@@ -451,7 +448,8 @@
int qpQ = 0;
uint32_t numParts = cu->getPic()->getNumPartInCUSize() >> depth;
- uint32_t pelsInPart = g_maxCUSize >> g_maxCUDepth;
+ uint32_t log2UnitSize = g_log2UnitSize;
+ uint32_t blocksInPart = (log2UnitSize - 2) > 0 ? 1 << (log2UnitSize - 2) : 1;
uint32_t bsAbsIdx = 0, bs = 0;
int offset, srcStep;
@@ -468,17 +466,18 @@
{
offset = 1;
srcStep = stride;
- tmpsrc += edge * pelsInPart;
+ tmpsrc += (edge << log2UnitSize);
}
else // (dir == EDGE_HOR)
{
offset = stride;
srcStep = 1;
- tmpsrc += edge * pelsInPart * stride;
+ tmpsrc += (edge << log2UnitSize) * stride;
}
for (uint32_t idx = 0; idx < numParts; idx++)
{
+ uint32_t partOffset = idx << log2UnitSize;
bsAbsIdx = xCalcBsIdx(cu, absZOrderIdx, dir, edge, idx);
bs = blockingStrength[bsAbsIdx];
if (bs)
@@ -507,13 +506,12 @@
int sideThreshold = (beta + (beta >> 1)) >> 3;
int thrCut = tc * 10;
- uint32_t blocksInPart = pelsInPart / 4 ? pelsInPart / 4 : 1;
for (uint32_t blkIdx = 0; blkIdx < blocksInPart; blkIdx++)
{
- int dp0 = xCalcDP(tmpsrc + srcStep * (idx * pelsInPart + blkIdx * 4 + 0), offset);
- int dq0 = xCalcDQ(tmpsrc + srcStep * (idx * pelsInPart + blkIdx * 4 + 0), offset);
- int dp3 = xCalcDP(tmpsrc + srcStep * (idx * pelsInPart + blkIdx * 4 + 3), offset);
- int dq3 = xCalcDQ(tmpsrc + srcStep * (idx * pelsInPart + blkIdx * 4 + 3), offset);
+ int dp0 = xCalcDP(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0), offset);
+ int dq0 = xCalcDQ(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0), offset);
+ int dp3 = xCalcDP(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3), offset);
+ int dq3 = xCalcDQ(tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3), offset);
int d0 = dp0 + dq0;
int d3 = dp3 + dq3;
@@ -533,12 +531,12 @@
bool bFilterP = (dp < sideThreshold);
bool bFilterQ = (dq < sideThreshold);
- bool sw = xUseStrongFiltering(offset, 2 * d0, beta, tc, tmpsrc + srcStep * (idx * pelsInPart + blkIdx * 4 + 0))
- && xUseStrongFiltering(offset, 2 * d3, beta, tc, tmpsrc + srcStep * (idx * pelsInPart + blkIdx * 4 + 3));
+ bool sw = xUseStrongFiltering(offset, 2 * d0, beta, tc, tmpsrc + srcStep * (partOffset + blkIdx * 4 + 0))
+ && xUseStrongFiltering(offset, 2 * d3, beta, tc, tmpsrc + srcStep * (partOffset + blkIdx * 4 + 3));
for (int i = 0; i < DEBLOCK_SMALLEST_BLOCK / 2; i++)
{
- xPelFilterLuma(tmpsrc + srcStep * (idx * pelsInPart + blkIdx * 4 + i), offset, tc, sw, bPartPNoFilter, bPartQNoFilter, thrCut, bFilterP, bFilterQ);
+ xPelFilterLuma(tmpsrc + srcStep * (partOffset + blkIdx * 4 + i), offset, tc, sw, bPartPNoFilter, bPartQNoFilter, thrCut, bFilterP, bFilterQ);
}
}
}
@@ -555,8 +553,10 @@
int qp = 0;
int qpP = 0;
int qpQ = 0;
- uint32_t pelsInPartChromaH = g_maxCUSize >> (g_maxCUDepth + cu->getHorzChromaShift());
- uint32_t pelsInPartChromaV = g_maxCUSize >> (g_maxCUDepth + cu->getVertChromaShift());
+ uint32_t log2UnitSizeH = g_log2UnitSize - cu->getHorzChromaShift();
+ uint32_t log2UnitSizeV = g_log2UnitSize - cu->getVertChromaShift();
+ uint32_t unitSizeChromaH = 1 << log2UnitSizeH;
+ uint32_t unitSizeChromaV = 1 << log2UnitSizeV;
int offset, srcStep;
const uint32_t lcuWidthInBaseUnits = cu->getPic()->getNumPartInCUSize();
@@ -573,9 +573,9 @@
uint32_t edgeNumInLCUVert = g_zscanToRaster[absZOrderIdx] % lcuWidthInBaseUnits + edge;
uint32_t edgeNumInLCUHor = g_zscanToRaster[absZOrderIdx] / lcuWidthInBaseUnits + edge;
- if ((pelsInPartChromaH < DEBLOCK_SMALLEST_BLOCK) && (pelsInPartChromaV < DEBLOCK_SMALLEST_BLOCK) &&
- (((edgeNumInLCUVert % (DEBLOCK_SMALLEST_BLOCK / pelsInPartChromaH)) && (dir == 0)) ||
- ((edgeNumInLCUHor % (DEBLOCK_SMALLEST_BLOCK / pelsInPartChromaV)) && dir)))
+ if ((unitSizeChromaH < DEBLOCK_SMALLEST_BLOCK) && (unitSizeChromaV < DEBLOCK_SMALLEST_BLOCK) &&
+ (((edgeNumInLCUVert % (DEBLOCK_SMALLEST_BLOCK >> log2UnitSizeH)) && (dir == 0)) ||
+ ((edgeNumInLCUHor % (DEBLOCK_SMALLEST_BLOCK >> log2UnitSizeV)) && dir)))
{
return;
}
@@ -593,17 +593,17 @@
{
offset = 1;
srcStep = stride;
- tmpSrcCb += edge * pelsInPartChromaH;
- tmpSrcCr += edge * pelsInPartChromaH;
- loopLength = pelsInPartChromaV;
+ tmpSrcCb += (edge << log2UnitSizeH);
+ tmpSrcCr += (edge << log2UnitSizeH);
+ loopLength = unitSizeChromaV;
}
else // (dir == EDGE_HOR)
{
offset = stride;
srcStep = 1;
- tmpSrcCb += edge * stride * pelsInPartChromaV;
- tmpSrcCr += edge * stride * pelsInPartChromaV;
- loopLength = pelsInPartChromaH;
+ tmpSrcCb += edge * stride << log2UnitSizeV;
+ tmpSrcCr += edge * stride << log2UnitSizeV;
+ loopLength = unitSizeChromaH;
}
for (uint32_t idx = 0; idx < numParts; idx++)
diff -r 6055baa75085 -r fa683df9621e source/Lib/TLibCommon/TComPattern.cpp
--- a/source/Lib/TLibCommon/TComPattern.cpp Mon Jul 14 10:53:01 2014 +0530
+++ b/source/Lib/TLibCommon/TComPattern.cpp Mon Jul 14 14:50:34 2014 +0900
@@ -68,7 +68,7 @@
fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors);
- bool bUseFilteredPredictions = (dirMode == ALL_IDX || TComPrediction::filteringIntraReferenceSamples(dirMode, tuSize));
+ bool bUseFilteredPredictions = (dirMode == ALL_IDX || TComPrediction::filteringIntraReferenceSamples(dirMode, intraNeighbors.log2TrSize));
if (bUseFilteredPredictions && 8 <= tuSize && tuSize <= 32)
{
@@ -104,7 +104,7 @@
if (bilinearLeft && bilinearAbove)
{
- int shift = g_convertToBit[tuSize] + 3; // log2(tuSize2)
+ int shift = intraNeighbors.log2TrSize + 1;
filterBufN[0] = filterBuf[0];
filterBufN[tuSize2] = filterBuf[tuSize2];
filterBufN[bufSize - 1] = filterBuf[bufSize - 1];
@@ -183,16 +183,15 @@
void TComPattern::initIntraNeighbors(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, TextType cType, IntraNeighbors *intraNeighbors)
{
- uint32_t tuSize = cu->getCUSize(0) >> partDepth;
- int baseUnitSize = g_maxCUSize >> g_maxCUDepth;
- int unitWidth = baseUnitSize;
- int unitHeight = baseUnitSize;
+ uint32_t log2TrSize = cu->getLog2CUSize(0) - partDepth;
+ int log2UnitWidth = g_log2UnitSize;
+ int log2UnitHeight = g_log2UnitSize;
if (cType != TEXT_LUMA)
{
- tuSize >>= cu->getHorzChromaShift();
- unitWidth >>= cu->getHorzChromaShift();
- unitHeight >>= cu->getVertChromaShift();
+ log2TrSize -= cu->getHorzChromaShift();
+ log2UnitWidth -= cu->getHorzChromaShift();
+ log2UnitHeight -= cu->getVertChromaShift();
}
int numIntraNeighbor = 0;
@@ -202,11 +201,12 @@
cu->deriveLeftRightTopIdxAdi(partIdxLT, partIdxRT, zOrderIdxInPart, partDepth);
- int partIdxStride = cu->getPic()->getNumPartInCUSize();
- int tuHeightInUnits = tuSize / unitHeight;
- int tuWidthInUnits = tuSize / unitWidth;
+ uint32_t tuSize = 1 << log2TrSize;
+ int tuWidthInUnits = tuSize >> log2UnitWidth;
+ int tuHeightInUnits = tuSize >> log2UnitHeight;
int aboveUnits = tuWidthInUnits << 1;
int leftUnits = tuHeightInUnits << 1;
+ int partIdxStride = cu->getPic()->getNumPartInCUSize();
partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) * partIdxStride)];
if (!cu->getSlice()->getPPS()->getConstrainedIntraPred())
@@ -231,9 +231,10 @@
intraNeighbors->totalUnits = aboveUnits + leftUnits + 1;
intraNeighbors->aboveUnits = aboveUnits;
intraNeighbors->leftUnits = leftUnits;
+ intraNeighbors->unitWidth = 1 << log2UnitWidth;
+ intraNeighbors->unitHeight = 1 << log2UnitHeight;
intraNeighbors->tuSize = tuSize;
- intraNeighbors->unitWidth = unitWidth;
- intraNeighbors->unitHeight = unitHeight;
+ intraNeighbors->log2TrSize = log2TrSize;
}
void TComPattern::fillReferenceSamples(pixel* roiOrigin, int picStride, pixel* adiTemp, const IntraNeighbors& intraNeighbors)
diff -r 6055baa75085 -r fa683df9621e source/Lib/TLibCommon/TComPattern.h
--- a/source/Lib/TLibCommon/TComPattern.h Mon Jul 14 10:53:01 2014 +0530
+++ b/source/Lib/TLibCommon/TComPattern.h Mon Jul 14 14:50:34 2014 +0900
@@ -59,9 +59,10 @@
int totalUnits;
int aboveUnits;
int leftUnits;
- int tuSize;
int unitWidth;
int unitHeight;
+ int tuSize;
+ uint32_t log2TrSize;
bool bNeighborFlags[4 * MAX_NUM_SPU_W + 1];
};
diff -r 6055baa75085 -r fa683df9621e source/Lib/TLibCommon/TComPicSym.cpp
--- a/source/Lib/TLibCommon/TComPicSym.cpp Mon Jul 14 10:53:01 2014 +0530
+++ b/source/Lib/TLibCommon/TComPicSym.cpp Mon Jul 14 14:50:34 2014 +0900
@@ -66,13 +66,13 @@
m_saoParam = NULL;
m_numPartitions = 1 << (g_maxCUDepth << 1);
- m_unitSize = g_maxCUSize >> g_maxCUDepth;
- m_log2UnitSize = g_convertToBit[m_unitSize] + 2;
+ m_log2UnitSize = g_log2UnitSize;
+ m_unitSize = 1 << m_log2UnitSize;
m_numPartInCUSize = g_maxCUSize >> m_log2UnitSize;
- m_widthInCU = (param->sourceWidth + g_maxCUSize - 1) / g_maxCUSize;
- m_heightInCU = (param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
+ m_widthInCU = (param->sourceWidth + g_maxCUSize - 1) >> g_maxLog2CUSize;
+ m_heightInCU = (param->sourceHeight + g_maxCUSize - 1) >> g_maxLog2CUSize;
m_numCUsInFrame = m_widthInCU * m_heightInCU;
@@ -84,7 +84,7 @@
bool tqBypass = param->bCULossless || param->bLossless;
for (i = 0; i < m_numCUsInFrame; i++)
{
- uint32_t sizeL = g_maxCUSize * g_maxCUSize;
+ uint32_t sizeL = 1 << (g_maxLog2CUSize * 2);
uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(param->internalCsp) + CHROMA_V_SHIFT(param->internalCsp));
if (!m_cuData[i].initialize(m_numPartitions, sizeL, sizeC, 1, tqBypass))
return false;
diff -r 6055baa75085 -r fa683df9621e source/Lib/TLibCommon/TComPrediction.cpp
--- a/source/Lib/TLibCommon/TComPrediction.cpp Mon Jul 14 10:53:01 2014 +0530
+++ b/source/Lib/TLibCommon/TComPrediction.cpp Mon Jul 14 14:50:34 2014 +0900
@@ -113,29 +113,28 @@
// Public member functions
// ====================================================================================================================
-bool TComPrediction::filteringIntraReferenceSamples(uint32_t dirMode, uint32_t tuSize)
+bool TComPrediction::filteringIntraReferenceSamples(uint32_t dirMode, uint32_t log2TrSize)
{
bool bFilter;
- if (dirMode == DC_IDX || tuSize <= 4)
+ if (dirMode == DC_IDX || log2TrSize <= 2)
{
bFilter = false; // no smoothing for DC
}
else
{
int diff = std::min<int>(abs((int)dirMode - HOR_IDX), abs((int)dirMode - VER_IDX));
- uint32_t sizeIdx = g_convertToBit[tuSize];
+ uint32_t sizeIdx = log2TrSize - 2;
bFilter = diff > intraFilterThreshold[sizeIdx];
}
return bFilter;
}
-void TComPrediction::predIntraLumaAng(uint32_t dirMode, pixel* dst, intptr_t stride, int tuSize)
+void TComPrediction::predIntraLumaAng(uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSize)
{
- X265_CHECK(tuSize >= 4 && tuSize <= 64, "intra block size is out of range\n");
- int sizeIdx = g_convertToBit[tuSize];
- bool bUseFilteredPredictions = TComPrediction::filteringIntraReferenceSamples(dirMode, tuSize);
+ int tuSize = 1 << log2TrSize;
+ bool bUseFilteredPredictions = filteringIntraReferenceSamples(dirMode, log2TrSize);
pixel *refLft, *refAbv;
refLft = m_refLeft + tuSize - 1;
@@ -147,31 +146,23 @@
refAbv = m_refAboveFlt + tuSize - 1;
}
- bool bFilter = tuSize <= 16 && dirMode != PLANAR_IDX;
+ bool bFilter = log2TrSize <= 4 && dirMode != PLANAR_IDX;
+ int sizeIdx = log2TrSize - 2;
+ X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n");
primitives.intra_pred[sizeIdx][dirMode](dst, stride, refLft, refAbv, dirMode, bFilter);
}
// Angular chroma
-void TComPrediction::predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* dst, intptr_t stride, int tuSize, int chFmt)
+void TComPrediction::predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSizeC, int chFmt)
{
- int sizeIdx = g_convertToBit[tuSize];
+ int tuSize = 1 << log2TrSizeC;
uint32_t tuSize2 = tuSize << 1;
// Create the prediction
pixel refAbv[3 * MAX_CU_SIZE];
pixel refLft[3 * MAX_CU_SIZE];
- bool bUseFilteredPredictions = true;
-
- if (chFmt != CHROMA_444)
- {
- bUseFilteredPredictions = false;
- }
- else
- {
- X265_CHECK(tuSize >= 4 && tuSize < 128, "intra prediction size is out of range\n");
- bUseFilteredPredictions = TComPrediction::filteringIntraReferenceSamples(dirMode, tuSize);
- }
+ bool bUseFilteredPredictions = (chFmt == CHROMA_444 && filteringIntraReferenceSamples(dirMode, log2TrSizeC));
if (bUseFilteredPredictions)
{
@@ -222,6 +213,8 @@
}
}
+ int sizeIdx = log2TrSizeC - 2;
+ X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n");
primitives.intra_pred[sizeIdx][dirMode](dst, stride, refLft + tuSize - 1, refAbv + tuSize - 1, dirMode, 0);
}
diff -r 6055baa75085 -r fa683df9621e source/Lib/TLibCommon/TComPrediction.h
--- a/source/Lib/TLibCommon/TComPrediction.h Mon Jul 14 10:53:01 2014 +0530
+++ b/source/Lib/TLibCommon/TComPrediction.h Mon Jul 14 14:50:34 2014 +0900
@@ -103,9 +103,9 @@
void motionCompensation(TComDataCU* cu, TComYuv* predYuv, int picList = REF_PIC_LIST_X, int partIdx = -1, bool bLuma = true, bool bChroma = true);
// Angular Intra
- void predIntraLumaAng(uint32_t dirMode, pixel* pred, intptr_t stride, int tuSize);
- void predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* pred, intptr_t stride, int tuSize, int chFmt);
- static bool filteringIntraReferenceSamples(uint32_t dirMode, uint32_t tuSize);
+ void predIntraLumaAng(uint32_t dirMode, pixel* pred, intptr_t stride, uint32_t log2TrSize);
+ void predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* pred, intptr_t stride, uint32_t log2TrSizeC, int chFmt);
+ static bool filteringIntraReferenceSamples(uint32_t dirMode, uint32_t log2TrSize);
};
}
//! \}
diff -r 6055baa75085 -r fa683df9621e source/Lib/TLibCommon/TComRom.cpp
--- a/source/Lib/TLibCommon/TComRom.cpp Mon Jul 14 10:53:01 2014 +0530
+++ b/source/Lib/TLibCommon/TComRom.cpp Mon Jul 14 14:50:34 2014 +0900
@@ -43,7 +43,7 @@
//! \ingroup TLibCommon
//! \{
// scanning order table
-uint16_t* g_scanOrder[SCAN_NUMBER_OF_GROUP_TYPES][SCAN_NUMBER_OF_TYPES][MAX_CU_DEPTH];
+uint16_t* g_scanOrder[SCAN_NUMBER_OF_GROUP_TYPES][SCAN_NUMBER_OF_TYPES][MAX_LOG2_TR_SIZE + 1];
class ScanGenerator
{
@@ -192,7 +192,7 @@
}
// initialise scan orders
- for (uint32_t log2BlockSize = 0; log2BlockSize < MAX_CU_DEPTH; log2BlockSize++)
+ for (uint32_t log2BlockSize = 0; log2BlockSize <= MAX_LOG2_TR_SIZE; log2BlockSize++)
{
const uint32_t blockWidth = 1 << log2BlockSize;
const uint32_t blockHeight = 1 << log2BlockSize;
@@ -259,7 +259,7 @@
{
for (uint32_t scanOrderIndex = 0; scanOrderIndex < SCAN_NUMBER_OF_TYPES; scanOrderIndex++)
{
- for (uint32_t log2BlockSize = 0; log2BlockSize < MAX_CU_DEPTH; log2BlockSize++)
+ for (uint32_t log2BlockSize = 0; log2BlockSize <= MAX_LOG2_TR_SIZE; log2BlockSize++)
{
X265_FREE(g_scanOrder[groupTypeIndex][scanOrderIndex][log2BlockSize]);
}
@@ -271,9 +271,11 @@
// Data structure related table & variable
// ====================================================================================================================
+uint32_t g_maxLog2CUSize = MAX_LOG2_CU_SIZE;
uint32_t g_maxCUSize = MAX_CU_SIZE;
-uint32_t g_maxCUDepth = MAX_CU_DEPTH;
-uint32_t g_addCUDepth = 0;
+uint32_t g_maxCUDepth = MAX_FULL_DEPTH;
+uint32_t g_addCUDepth = 1;
+uint32_t g_log2UnitSize = 2;
uint32_t g_zscanToRaster[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
uint32_t g_rasterToZscan[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
uint32_t g_rasterToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
diff -r 6055baa75085 -r fa683df9621e source/Lib/TLibCommon/TComRom.h
--- a/source/Lib/TLibCommon/TComRom.h Mon Jul 14 10:53:01 2014 +0530
+++ b/source/Lib/TLibCommon/TComRom.h Mon Jul 14 14:50:34 2014 +0900
@@ -50,13 +50,20 @@
// Macros
// ====================================================================================================================
-#define MAX_CU_DEPTH 6 // log2(LCUSize)
-#define MAX_CU_SIZE (1 << (MAX_CU_DEPTH)) // maximum allowable size of CU
+#define MAX_CU_DEPTH 4 // maximun CU depth
+#define MAX_FULL_DEPTH 5 // maximun full depth
+#define MAX_LOG2_CU_SIZE 6 // log2(LCUSize)
+#define MAX_CU_SIZE (1 << MAX_LOG2_CU_SIZE) // maximum allowable size of CU
#define MIN_PU_SIZE 4
#define MIN_TU_SIZE 4
#define MAX_NUM_SPU_W (MAX_CU_SIZE / MIN_PU_SIZE) // maximum number of SPU in horizontal line
#define ADI_BUF_STRIDE (2 * MAX_CU_SIZE + 1 + 15) // alignment to 16 bytes
+#define MAX_LOG2_TR_SIZE 5
+#define MAX_LOG2_TS_SIZE 2 // TODO: RExt
+#define MAX_TR_SIZE (1 << MAX_LOG2_TR_SIZE)
+#define MAX_TS_SIZE (1 << MAX_LOG2_TS_SIZE)
+
// ====================================================================================================================
// Initialize / destroy functions
// ====================================================================================================================
@@ -75,7 +82,7 @@
// flexible conversion from relative to absolute index
extern uint32_t g_zscanToRaster[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
extern uint32_t g_rasterToZscan[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
-extern uint16_t* g_scanOrder[SCAN_NUMBER_OF_GROUP_TYPES][SCAN_NUMBER_OF_TYPES][MAX_CU_DEPTH];
+extern uint16_t* g_scanOrder[SCAN_NUMBER_OF_GROUP_TYPES][SCAN_NUMBER_OF_TYPES][MAX_LOG2_TR_SIZE + 1];
void initZscanToRaster(int maxDepth, int depth, uint32_t startVal, uint32_t*& curIdx);
void initRasterToZscan(uint32_t maxCUSize, uint32_t maxCUDepth);
@@ -86,12 +93,11 @@
void initRasterToPelXY(uint32_t maxCUSize, uint32_t maxCUDepth);
// global variable (LCU width/height, max. CU depth)
+extern uint32_t g_maxLog2CUSize;
extern uint32_t g_maxCUSize;
extern uint32_t g_maxCUDepth;
extern uint32_t g_addCUDepth;
-
-#define LOG2_MAX_TS_SIZE 2 // TODO: RExt
-#define MAX_TS_SIZE (1 << LOG2_MAX_TS_SIZE)
+extern uint32_t g_log2UnitSize;
extern const uint32_t g_puOffset[8];
diff -r 6055baa75085 -r fa683df9621e source/Lib/TLibCommon/TComSlice.h
--- a/source/Lib/TLibCommon/TComSlice.h Mon Jul 14 10:53:01 2014 +0530
+++ b/source/Lib/TLibCommon/TComSlice.h Mon Jul 14 14:50:34 2014 +0900
@@ -954,7 +954,7 @@
// AMP accuracy
int getAMPAcc(uint32_t depth) const { return m_iAMPAcc[depth]; }
- void setAMPAcc(uint32_t depth, int iAccu) { X265_CHECK(depth < g_maxCUDepth, "AMP Acc depth\n"); m_iAMPAcc[depth] = iAccu; }
+ void setAMPAcc(uint32_t depth, int iAccu) { X265_CHECK(depth < MAX_CU_DEPTH, "AMP Acc depth\n"); m_iAMPAcc[depth] = iAccu; }
// Bit-depth
int getBitDepthY() const { return m_bitDepthY; }
diff -r 6055baa75085 -r fa683df9621e source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp Mon Jul 14 10:53:01 2014 +0530
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp Mon Jul 14 14:50:34 2014 +0900
@@ -1365,7 +1365,7 @@
*/
void TComTrQuant::setErrScaleCoeff(uint32_t list, uint32_t size, uint32_t qp)
{
- uint32_t log2TrSize = g_convertToBit[g_scalingListSizeX[size]] + 2;
+ uint32_t log2TrSize = size + 2;
int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; // Represents scaling through forward transform
uint32_t i, maxNumCoeff = g_scalingListSize[size];
diff -r 6055baa75085 -r fa683df9621e source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp Mon Jul 14 10:53:01 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncCu.cpp Mon Jul 14 14:50:34 2014 +0900
@@ -102,6 +102,8 @@
*/
bool TEncCu::create(uint8_t totalDepth, uint32_t maxWidth)
{
+ X265_CHECK(totalDepth <= MAX_CU_DEPTH, "invalid totalDepth\n");
+
m_totalDepth = totalDepth;
m_bestPredYuv = new TComYuv*[totalDepth];
@@ -494,7 +496,7 @@
bTestMergeAMP_Ver = true;
}
- if (outBestCU->getCUSize(0) == 64)
+ if (outBestCU->getLog2CUSize(0) == 6)
{
bTestAMP_Hor = false;
bTestAMP_Ver = false;
@@ -527,10 +529,11 @@
// copy partition YUV from depth 0 CTU cache
m_origYuv[0]->copyPartToYuv(m_origYuv[depth], outBestCU->getZorderIdxInCU());
- uint32_t cuSize = outTempCU->getCUSize(0);
+ uint32_t log2CUSize = outTempCU->getLog2CUSize(0);
TComSlice* slice = outTempCU->getSlice();
if (!bInsidePicture)
{
+ uint32_t cuSize = 1 << log2CUSize;
uint32_t lpelx = outBestCU->getCUPelX();
uint32_t tpely = outBestCU->getCUPelY();
uint32_t rpelx = lpelx + cuSize;
@@ -546,7 +549,7 @@
if (depth == g_maxCUDepth - g_addCUDepth)
{
- if (cuSize > (1 << slice->getSPS()->getQuadtreeTULog2MinSize()))
+ if (log2CUSize > slice->getSPS()->getQuadtreeTULog2MinSize())
xCheckRDCostIntra(outBestCU, outTempCU, SIZE_NxN);
}
@@ -678,10 +681,11 @@
bool doNotBlockPu = true;
bool earlyDetectionSkipMode = false;
- uint32_t cuSize = outTempCU->getCUSize(0);
+ uint32_t log2CUSize = outTempCU->getLog2CUSize(0);
TComSlice* slice = outTempCU->getSlice();
if (!bInsidePicture)
{
+ uint32_t cuSize = 1 << log2CUSize;
uint32_t lpelx = outBestCU->getCUPelX();
uint32_t tpely = outBestCU->getCUPelY();
uint32_t rpelx = lpelx + cuSize;
@@ -725,7 +729,7 @@
if (slice->getSliceType() != I_SLICE)
{
// 2Nx2N, NxN
- if (!(cuSize == 8))
+ if (!(log2CUSize == 3))
{
if (depth == g_maxCUDepth - g_addCUDepth && doNotBlockPu)
{
@@ -842,7 +846,7 @@
if (depth == g_maxCUDepth - g_addCUDepth)
{
- if (cuSize > (1 << slice->getSPS()->getQuadtreeTULog2MinSize()))
+ if (log2CUSize > slice->getSPS()->getQuadtreeTULog2MinSize())
{
xCheckRDCostIntraInInter(outBestCU, outTempCU, SIZE_NxN);
outTempCU->initEstData();
@@ -984,7 +988,7 @@
uint32_t posy = (externalAddress / pic->getFrameWidthInCU()) * g_maxCUSize + g_rasterToPelY[g_zscanToRaster[internalAddress]];
uint32_t width = slice->getSPS()->getPicWidthInLumaSamples();
uint32_t height = slice->getSPS()->getPicHeightInLumaSamples();
- uint32_t cuSize = cu->getCUSize(absPartIdx);
+ uint32_t cuSize = 1 << cu->getLog2CUSize(absPartIdx);
while (posx >= width || posy >= height)
{
@@ -1110,7 +1114,7 @@
m_sbacCoder->codePredInfo(cu, absPartIdx);
// Encode Coefficients, allow codeCoeff() to modify m_bEncodeDQP
- m_sbacCoder->codeCoeff(cu, absPartIdx, depth, cu->getCUSize(absPartIdx), m_bEncodeDQP);
+ m_sbacCoder->codeCoeff(cu, absPartIdx, depth, m_bEncodeDQP);
// --- write terminating bit ---
finishCU(cu, absPartIdx, depth);
@@ -1275,15 +1279,15 @@
outTempCU->m_mvBits = m_sbacCoder->getNumberOfWrittenBits();
// Encode Coefficients
- bool bEncodeDQP = m_bEncodeDQP;
- m_sbacCoder->codeCoeff(outTempCU, 0, depth, outTempCU->getCUSize(0), bEncodeDQP);
+ bool bCodeDQP = m_bEncodeDQP;
+ m_sbacCoder->codeCoeff(outTempCU, 0, depth, bCodeDQP);
m_sbacCoder->store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
outTempCU->m_totalBits = m_sbacCoder->getNumberOfWrittenBits();
outTempCU->m_coeffBits = outTempCU->m_totalBits - outTempCU->m_mvBits;
if (m_rdCost.psyRdEnabled())
{
- int part = g_convertToBit[outTempCU->getCUSize(0)];
+ int part = outTempCU->getLog2CUSize(0) - 2;
outTempCU->m_psyEnergy = m_rdCost.psyCost(part, m_origYuv[depth]->getLumaAddr(), m_origYuv[depth]->getStride(),
m_tmpRecoYuv[depth]->getLumaAddr(), m_tmpRecoYuv[depth]->getStride());
outTempCU->m_totalPsyCost = m_rdCost.calcPsyRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits, outTempCU->m_psyEnergy);
@@ -1325,14 +1329,14 @@
// Encode Coefficients
bool bCodeDQP = m_bEncodeDQP;
- m_sbacCoder->codeCoeff(outTempCU, 0, depth, outTempCU->getCUSize(0), bCodeDQP);
+ m_sbacCoder->codeCoeff(outTempCU, 0, depth, bCodeDQP);
m_sbacCoder->store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
outTempCU->m_totalBits = m_sbacCoder->getNumberOfWrittenBits();
outTempCU->m_coeffBits = outTempCU->m_totalBits - outTempCU->m_mvBits;
if (m_rdCost.psyRdEnabled())
{
- int part = g_convertToBit[outTempCU->getCUSize(0)];
+ int part = outTempCU->getLog2CUSize(0) - 2;
outTempCU->m_psyEnergy = m_rdCost.psyCost(part, m_origYuv[depth]->getLumaAddr(), m_origYuv[depth]->getStride(),
m_tmpRecoYuv[depth]->getLumaAddr(), m_tmpRecoYuv[depth]->getStride());
outTempCU->m_totalPsyCost = m_rdCost.calcPsyRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits, outTempCU->m_psyEnergy);
@@ -1397,8 +1401,8 @@
*/
void TEncCu::xFillOrigYUVBuffer(TComDataCU* cu, TComYuv* fencYuv)
{
- uint32_t width = cu->getCUSize(0);
- uint32_t height = cu->getCUSize(0);
+ uint32_t width = 1 << cu->getLog2CUSize(0);
+ uint32_t height = 1 << cu->getLog2CUSize(0);
pixel* srcY = fencYuv->getLumaAddr();
pixel* dstY = cu->getLumaOrigYuv();
diff -r 6055baa75085 -r fa683df9621e source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jul 14 10:53:01 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jul 14 14:50:34 2014 +0900
@@ -98,7 +98,7 @@
m_qtTempCoeff[1] = m_qtTempCoeff[0] + m_numLayers;
m_qtTempCoeff[2] = m_qtTempCoeff[0] + m_numLayers * 2;
m_qtTempShortYuv = new ShortYuv[m_numLayers];
- uint32_t sizeL = g_maxCUSize * g_maxCUSize;
+ uint32_t sizeL = 1 << (g_maxLog2CUSize * 2);
uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
for (uint32_t i = 0; i < m_numLayers; ++i)
{
@@ -145,7 +145,7 @@
uint32_t fullDepth = cu->getDepth(0) + trDepth;
uint32_t trMode = cu->getTransformIdx(absPartIdx);
uint32_t subdiv = (trMode > trDepth ? 1 : 0);
- uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
if (cu->getPredictionMode(0) == MODE_INTRA && cu->getPartitionSize(0) == SIZE_NxN && trDepth == 0)
{
@@ -189,7 +189,7 @@
uint32_t fullDepth = cu->getDepth(0) + trDepth;
uint32_t trMode = cu->getTransformIdx(absPartIdx);
uint32_t subdiv = (trMode > trDepth ? 1 : 0);
- uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
int chFmt = cu->getChromaFormat();
if ((log2TrSize > 2) && !(chFmt == CHROMA_444))
@@ -236,7 +236,7 @@
return;
}
- uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
uint32_t log2UnitSize = cu->getPic()->getLog2UnitSize();
uint32_t coeffOffset = absPartIdx << (log2UnitSize * 2);
@@ -263,7 +263,7 @@
return;
}
- uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
uint32_t log2UnitSize = cu->getPic()->getLog2UnitSize();
@@ -371,9 +371,10 @@
uint32_t TEncSearch::xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep)
{
+ int cuSize = 1 << cu->getLog2CUSize(absPartIdx);
m_sbacCoder->resetBits();
xEncIntraHeaderChroma(cu, absPartIdx);
- xEncSubdivCbfQTChroma(cu, trDepth, absPartIdx, absPartIdxStep, cu->getCUSize(absPartIdx), cu->getCUSize(absPartIdx));
+ xEncSubdivCbfQTChroma(cu, trDepth, absPartIdx, absPartIdxStep, cuSize, cuSize);
xEncCoeffQTChroma(cu, trDepth, absPartIdx, TEXT_CHROMA_U);
xEncCoeffQTChroma(cu, trDepth, absPartIdx, TEXT_CHROMA_V);
return m_sbacCoder->getNumberOfWrittenBits();
@@ -410,7 +411,6 @@
uint32_t& cbf,
uint32_t& outDist)
{
- uint32_t tuSize = 1 << log2TrSize;
uint32_t stride = fencYuv->getStride();
pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
pixel* pred = predYuv->getLumaAddr(absPartIdx);
@@ -420,19 +420,22 @@
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getStride();
bool useTransformSkip = !!cu->getTransformSkip(absPartIdx, TEXT_LUMA);
- int part = partitionFromSize(tuSize);
+ int part = partitionFromLog2Size(log2TrSize);
int sizeIdx = log2TrSize - 2;
//===== get residual signal =====
+#if CHECKED_BUILD || _DEBUG
+ uint32_t tuSize = 1 << log2TrSize;
X265_CHECK(!((intptr_t)fenc & (tuSize - 1)), "fenc alignment check fail\n");
X265_CHECK(!((intptr_t)pred & (tuSize - 1)), "pred alignment check fail\n");
X265_CHECK(!((intptr_t)residual & (tuSize - 1)), "residual alignment check fail\n");
+#endif
primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
//===== transform and quantization =====
//--- init rate estimation arrays for RDOQ ---
if (m_bEnableRDOQ)
- m_sbacCoder->estBit(m_trQuant.m_estBitsSbac, tuSize, TEXT_LUMA);
+ m_sbacCoder->estBit(m_trQuant.m_estBitsSbac, log2TrSize, TEXT_LUMA);
//--- transform and quantization ---
int chFmt = cu->getChromaFormat();
@@ -455,7 +458,7 @@
else
{
#if CHECKED_BUILD || _DEBUG
- memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
+ memset(coeff, 0, sizeof(coeff_t) << log2TrSize * 2);
#endif
//===== reconstruction =====
primitives.square_copy_ps[sizeIdx](reconQt, reconQtStride, pred, stride);
@@ -479,7 +482,6 @@
uint32_t log2TrSizeC)
{
TextType ttype = (TextType)chromaId;
- uint32_t tuSize = 1 << log2TrSizeC;
uint32_t stride = fencYuv->getCStride();
pixel* fenc = fencYuv->getChromaAddr(chromaId, absPartIdx);
pixel* pred = predYuv->getChromaAddr(chromaId, absPartIdx);
@@ -489,19 +491,22 @@
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
bool useTransformSkipC = !!cu->getTransformSkip(absPartIdx, ttype);
- int part = partitionFromSize(tuSize);
+ int part = partitionFromLog2Size(log2TrSizeC);
int sizeIdxC = log2TrSizeC - 2;
//===== get residual signal =====
+#if CHECKED_BUILD || _DEBUG
+ uint32_t tuSize = 1 << log2TrSizeC;
X265_CHECK(!((intptr_t)fenc & (tuSize - 1)), "fenc alignment check fail\n");
X265_CHECK(!((intptr_t)pred & (tuSize - 1)), "pred alignment check fail\n");
X265_CHECK(!((intptr_t)residual & (tuSize - 1)), "residual alignment check fail\n");
+#endif
primitives.calcresidual[sizeIdxC](fenc, pred, residual, stride);
//===== transform and quantization =====
//--- init rate estimation arrays for RDOQ ---
if (m_bEnableRDOQ)
- m_sbacCoder->estBit(m_trQuant.m_estBitsSbac, tuSize, TEXT_CHROMA);
+ m_sbacCoder->estBit(m_trQuant.m_estBitsSbac, log2TrSizeC, TEXT_CHROMA);
//--- transform and quantization ---
int chFmt = cu->getChromaFormat();
@@ -531,7 +536,7 @@
else
{
#if CHECKED_BUILD || _DEBUG
- memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
+ memset(coeff, 0, sizeof(coeff_t) << log2TrSizeC * 2);
#endif
//===== reconstruction =====
primitives.square_copy_ps[sizeIdxC](reconQt, reconQtStride, pred, stride);
@@ -558,7 +563,7 @@
uint64_t& rdCost)
{
uint32_t fullDepth = cu->getDepth(0) + trDepth;
- uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
bool bCheckFull = (log2TrSize <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
bool bCheckSplit = (log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
@@ -592,7 +597,7 @@
uint32_t tuSize = 1 << log2TrSize;
bool checkTransformSkip = (cu->getSlice()->getPPS()->getUseTransformSkip() &&
- log2TrSize <= LOG2_MAX_TS_SIZE &&
+ log2TrSize <= MAX_LOG2_TS_SIZE &&
!cu->getCUTransquantBypass(0));
if (checkTransformSkip)
{
@@ -611,7 +616,7 @@
TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, lumaPredMode);
//===== get prediction signal =====
- predIntraLumaAng(lumaPredMode, pred, stride, tuSize);
+ predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);
cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
@@ -821,7 +826,7 @@
TComYuv* reconYuv)
{
uint32_t fullDepth = cu->getDepth(0) + trDepth;
- uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
bool bCheckFull = (log2TrSize <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize());
bool bCheckSplit = (log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx));
@@ -838,7 +843,6 @@
//----- code luma block with given intra prediction mode and store Cbf-----
uint32_t lumaPredMode = cu->getLumaIntraDir(absPartIdx);
- uint32_t tuSize = 1 << log2TrSize;
int chFmt = cu->getChromaFormat();
uint32_t stride = fencYuv->getStride();
pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
@@ -857,14 +861,17 @@
//===== init availability pattern =====
TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, lumaPredMode);
//===== get prediction signal =====
- predIntraLumaAng(lumaPredMode, pred, stride, tuSize);
+ predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);
cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
//===== get residual signal =====
+#if CHECKED_BUILD || _DEBUG
+ uint32_t tuSize = 1 << log2TrSize;
X265_CHECK(!((intptr_t)fenc & (tuSize - 1)), "fenc alignment failure\n");
X265_CHECK(!((intptr_t)pred & (tuSize - 1)), "pred alignment failure\n");
X265_CHECK(!((intptr_t)residual & (tuSize - 1)), "residual alignment failure\n");
+#endif
int sizeIdx = log2TrSize - 2;
primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
@@ -875,7 +882,7 @@
//--- set coded block flag ---
cu->setCbfSubParts((numSig ? 1 : 0) << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
- int part = partitionFromSize(tuSize);
+ int part = partitionFromLog2Size(log2TrSize);
if (numSig)
{
@@ -889,7 +896,7 @@
else
{
#if CHECKED_BUILD || _DEBUG
- memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
+ memset(coeff, 0, sizeof(coeff_t) << log2TrSize * 2);
#endif
// Generate Recon
@@ -924,7 +931,7 @@
if (trMode == trDepth)
{
- uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
//===== copy transform coefficients =====
@@ -934,7 +941,7 @@
::memcpy(coeffDestY, coeffSrcY, sizeof(coeff_t) << (log2TrSize * 2));
//===== copy reconstruction =====
- m_qtTempShortYuv[qtLayer].copyPartToPartLuma(reconYuv, absPartIdx, 1 << log2TrSize);
+ m_qtTempShortYuv[qtLayer].copyPartToPartLuma(reconYuv, absPartIdx, log2TrSize);
}
else
{
@@ -972,7 +979,7 @@
{
uint32_t depth = cu->getDepth(0);
uint32_t fullDepth = depth + trDepth;
- uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
uint32_t trDepthC = trDepth;
if ((log2TrSize == 2) && !(cu->getChromaFormat() == CHROMA_444))
@@ -1018,7 +1025,7 @@
if (trMode == trDepth)
{
int chFmt = cu->getChromaFormat();
- uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
uint32_t trDepthC = trDepth;
@@ -1033,19 +1040,20 @@
return;
}
+ uint32_t log2UnitSize = cu->getPic()->getLog2UnitSize();
uint32_t tuSize = 1 << log2TrSizeC;
uint32_t stride = fencYuv->getCStride();
const bool splitIntoSubTUs = (chFmt == CHROMA_422);
bool checkTransformSkip = (cu->getSlice()->getPPS()->getUseTransformSkip() &&
- log2TrSizeC <= LOG2_MAX_TS_SIZE &&
+ log2TrSizeC <= MAX_LOG2_TS_SIZE &&
!cu->getCUTransquantBypass(0));
uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
if (m_param->bEnableTSkipFast)
{
- checkTransformSkip &= ((cu->getCUSize(0) >> trDepth) <= 4);
+ checkTransformSkip &= (log2TrSize <= MAX_LOG2_TS_SIZE);
if (checkTransformSkip)
{
int nbLumaSkip = 0;
@@ -1079,14 +1087,14 @@
}
chromaPredMode = (chFmt == CHROMA_422) ? g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
//===== get prediction signal =====
- predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, tuSize, chFmt);
+ predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, log2TrSizeC, chFmt);
uint32_t singleCbfC = 0;
uint32_t singlePsyEnergyTmp = 0;
int16_t* reconQt = m_qtTempShortYuv[qtLayer].getChromaAddr(chromaId, absPartIdxC);
uint32_t reconQtStride = m_qtTempShortYuv[qtLayer].m_cwidth;
- uint32_t coeffOffsetC = absPartIdxC << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
+ uint32_t coeffOffsetC = absPartIdxC << (log2UnitSize * 2 - (m_hChromaShift + m_vChromaShift));
coeff_t* coeffC = m_qtTempCoeff[chromaId][qtLayer] + coeffOffsetC;
if (checkTransformSkip)
@@ -1222,11 +1230,10 @@
if (trMode == trDepth)
{
int chFmt = cu->getChromaFormat();
- uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
- bool bChromaSame = false;
if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
{
X265_CHECK(trDepth > 0, "invalid trDepth\n");
@@ -1235,7 +1242,6 @@
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth) << 1);
if ((absPartIdx & (qpdiv - 1)) != 0)
return;
- bChromaSame = true;
}
//===== copy transform coefficients =====
@@ -1251,7 +1257,7 @@
::memcpy(coeffDstV, coeffSrcV, sizeof(coeff_t) * numCoeffC);
//===== copy reconstruction =====
- m_qtTempShortYuv[qtLayer].copyPartToPartChroma(reconYuv, absPartIdx, 1 << log2TrSize, (bChromaSame && (chFmt != CHROMA_422)));
+ m_qtTempShortYuv[qtLayer].copyPartToPartChroma(reconYuv, absPartIdx, log2TrSizeC + m_hChromaShift);
}
else
{
@@ -1275,9 +1281,8 @@
if (trMode == trDepth)
{
int chFmt = cu->getChromaFormat();
- uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
+ uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
- uint32_t origTrDepth = trDepth;
uint32_t trDepthC = trDepth;
if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
{
@@ -1290,11 +1295,12 @@
return;
}
+ uint32_t log2UnitSize = cu->getPic()->getLog2UnitSize();
uint32_t tuSize = 1 << log2TrSizeC;
uint32_t stride = fencYuv->getCStride();
const bool splitIntoSubTUs = (chFmt == CHROMA_422);
int sizeIdxC = log2TrSizeC - 2;
- int part = partitionFromSize(tuSize);
+ int part = partitionFromLog2Size(log2TrSizeC);
for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
{
@@ -1310,7 +1316,7 @@
pixel* pred = predYuv->getChromaAddr(chromaId, absPartIdxC);
int16_t* residual = resiYuv->getChromaAddr(chromaId, absPartIdxC);
pixel* recon = reconYuv->getChromaAddr(chromaId, absPartIdxC);
- uint32_t coeffOffsetC = absPartIdxC << (cu->getPic()->getLog2UnitSize() * 2 - (m_hChromaShift + m_vChromaShift));
+ uint32_t coeffOffsetC = absPartIdxC << (log2UnitSize * 2 - (m_hChromaShift + m_vChromaShift));
coeff_t* coeff = cu->getCoeff(ttype) + coeffOffsetC;
uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;
pixel* reconIPred = cu->getPic()->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
@@ -1333,7 +1339,7 @@
pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId, tuSize, m_predBuf);
//===== get prediction signal =====
- predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, tuSize, chFmt);
+ predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, log2TrSizeC, chFmt);
//===== get residual signal =====
X265_CHECK(!((intptr_t)fenc & (tuSize - 1)), "fenc alignment failure\n");
@@ -1351,7 +1357,7 @@
uint32_t numSig = m_trQuant.transformNxN(cu, residual, stride, coeff, log2TrSizeC, ttype, absPartIdxC, useTransformSkipC);
//--- set coded block flag ---
- cu->setCbfPartRange((((numSig > 0) ? 1 : 0) << origTrDepth), ttype, absPartIdxC, tuIterator.absPartIdxStep);
+ cu->setCbfPartRange((((numSig > 0) ? 1 : 0) << trDepth), ttype, absPartIdxC, tuIterator.absPartIdxStep);
if (numSig)
{
@@ -1406,13 +1412,14 @@
uint32_t depth = cu->getDepth(0);
uint32_t initTrDepth = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
uint32_t numPU = 1 << (2 * initTrDepth);
- uint32_t tuSize = cu->getCUSize(0) >> initTrDepth;
+ uint32_t log2TrSize = cu->getLog2CUSize(0) - initTrDepth;
+ uint32_t tuSize = 1 << log2TrSize;
uint32_t qNumParts = cu->getTotalNumPart() >> 2;
uint32_t qPartNum = cu->getPic()->getNumPartInCU() >> ((depth + initTrDepth) << 1);
uint32_t overallDistY = 0;
uint32_t candNum;
uint64_t candCostList[FAST_UDI_MAX_RDMODE_NUM];
- uint32_t sizeIdx = g_convertToBit[tuSize]; // log2(tuSize) - 2
+ uint32_t sizeIdx = log2TrSize - 2;
static const uint8_t intraModeNumFast[] = { 8, 8, 3, 3, 3 }; // 4x4, 8x8, 16x16, 32x32, 64x64
//===== loop over partitions =====
@@ -1474,7 +1481,7 @@
scaleTuSize = 32;
scaleStride = 32;
costShift = 2;
- sizeIdx = 5 - 2; // g_convertToBit[scaleTuSize];
+ sizeIdx = 5 - 2; // log2(scaleTuSize) - 2
// Filtered and Unfiltered refAbove and refLeft pointing to above and left.
above = aboveScale;
@@ -1628,12 +1635,11 @@
if (pu != numPU - 1)
{
uint32_t zorder = cu->getZorderIdxInCU() + partOffset;
- int part = partitionFromSize(tuSize);
pixel* dst = cu->getPic()->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
uint32_t dststride = cu->getPic()->getPicYuvRec()->getStride();
pixel* src = reconYuv->getLumaAddr(partOffset);
uint32_t srcstride = reconYuv->getStride();
- primitives.luma_copy_pp[part](dst, dststride, src, srcstride);
+ primitives.square_copy_pp[log2TrSize - 2](dst, dststride, src, srcstride);
}
//=== update PU data ====
@@ -1671,7 +1677,8 @@
uint32_t maxMode = NUM_CHROMA_MODE;
uint32_t modeList[NUM_CHROMA_MODE];
- uint32_t tuSize = cu->getCUSize(0) >> (trDepth + m_hChromaShift);
+ uint32_t log2TrSizeC = cu->getLog2CUSize(0) - trDepth - m_hChromaShift;
+ uint32_t tuSize = 1 << log2TrSizeC;
int chFmt = cu->getChromaFormat();
uint32_t stride = fencYuv->getCStride();
int scaleTuSize = tuSize;
@@ -1681,8 +1688,9 @@
{
scaleTuSize = 32;
costShift = 2;
+ log2TrSizeC = 5;
}
- int sizeIdx = g_convertToBit[scaleTuSize];
+ int sizeIdx = log2TrSizeC - 2;
pixelcmp_t sa8d = primitives.sa8d[sizeIdx];
TComPattern::initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, 1);
@@ -1700,10 +1708,10 @@
{
pixel* fenc = fencYuv->getChromaAddr(chromaId, absPartIdx);
pixel* pred = predYuv->getChromaAddr(chromaId, absPartIdx);
- pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId, tuSize, m_predBuf);
+ pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId, scaleTuSize, m_predBuf);
//===== get prediction signal =====
- predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, scaleTuSize, chFmt);
+ predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, log2TrSizeC, chFmt);
cost += sa8d(fenc, stride, pred, stride) << costShift;
}
@@ -1726,11 +1734,11 @@
{
uint32_t depth = cu->getDepth(0);
uint32_t initTrDepth = (cu->getPartitionSize(0) != SIZE_2Nx2N) && (cu->getChromaFormat() == CHROMA_444 ? 1 : 0);
- uint32_t tuSize = cu->getCUSize(0) >> initTrDepth;
+ uint32_t log2TrSize = cu->getLog2CUSize(0) - initTrDepth;
uint32_t absPartIdx = (cu->getPic()->getNumPartInCU() >> (depth << 1));
int chFmt = cu->getChromaFormat();
- int part = partitionFromSize(tuSize);
+ int part = partitionFromLog2Size(log2TrSize);
TURecurse tuIterator((initTrDepth == 0) ? DONT_SPLIT : QUAD_SPLIT, absPartIdx, 0);
@@ -1843,7 +1851,7 @@
{
assert(cu->getPartitionSize(0) != SIZE_2Nx2N);
- if (cu->getCUSize(0) <= 8 && cu->getSlice()->getPPS()->getLog2ParallelMergeLevelMinus2())
+ if (cu->getLog2CUSize(0) <= 3 && cu->getSlice()->getPPS()->getLog2ParallelMergeLevelMinus2())
{
if (puIdx == 0)
{
@@ -1948,7 +1956,7 @@
merge.height = roiHeight;
mrgCost = xMergeEstimation(cu, partIdx, merge);
- if (bMergeOnly && cu->getCUSize(0) > 8)
+ if (bMergeOnly && cu->getLog2CUSize(0) > 3)
{
if (mrgCost == MAX_UINT)
{
@@ -2311,7 +2319,8 @@
uint32_t bits = 0, bestBits = 0;
uint32_t distortion = 0, bestDist = 0;
- uint32_t cuSize = cu->getCUSize(0);
+ uint32_t log2CUSize = cu->getLog2CUSize(0);
+ uint32_t cuSize = 1 << log2CUSize;
uint8_t depth = cu->getDepth(0);
// No residual coding : SKIP mode
@@ -2321,7 +2330,7 @@
predYuv->copyToPartYuv(outReconYuv, 0);
// Luma
- int part = partitionFromSize(cuSize);
+ int part = partitionFromLog2Size(log2CUSize);
distortion = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
// Chroma
part = partitionFromSizes(cuSize >> m_hChromaShift, cuSize >> m_vChromaShift);
@@ -2341,7 +2350,7 @@
cu->m_totalDistortion = distortion;
if (m_rdCost.psyRdEnabled())
{
- int size = g_convertToBit[cuSize];
+ int size = log2CUSize - 2;
cu->m_psyEnergy = m_rdCost.psyCost(size, fencYuv->getLumaAddr(), fencYuv->getStride(),
outReconYuv->getLumaAddr(), outReconYuv->getStride());
cu->m_totalPsyCost = m_rdCost.calcPsyRdCost(cu->m_totalDistortion, cu->m_totalBits, cu->m_psyEnergy);
@@ -2356,7 +2365,7 @@
return;
}
- outResiYuv->subtract(fencYuv, predYuv, cuSize);
+ outResiYuv->subtract(fencYuv, predYuv, log2CUSize);
// Residual coding.
bool bIsTQBypassEnable = false, bIsLosslessMode = false;
@@ -2394,7 +2403,7 @@
if (m_rdCost.psyRdEnabled())
{
// need to check whether zero distortion is similar to psyenergy of fenc
- int size = g_convertToBit[cuSize];
+ int size = log2CUSize - 2;
zeroPsyEnergyY = m_rdCost.psyCost(size, fencYuv->getLumaAddr(), fencYuv->getStride(), (pixel*)RDCost::zeroPel, 0);
zeroCost = m_rdCost.calcPsyRdCost(zeroDistortion, zeroResiBits, zeroPsyEnergyY);
}
@@ -2451,14 +2460,14 @@
predYuv->copyToPartYuv(outReconYuv, 0);
// update with clipped distortion and cost (qp estimation loop uses unclipped values)
- int part = partitionFromSize(cuSize);
+ int part = partitionFromLog2Size(log2CUSize);
bestDist = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
part = partitionFromSizes(cuSize >> m_hChromaShift, cuSize >> m_vChromaShift);
bestDist += m_rdCost.scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), outReconYuv->getCbAddr(), outReconYuv->getCStride()));
bestDist += m_rdCost.scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), outReconYuv->getCrAddr(), outReconYuv->getCStride()));
if (m_rdCost.psyRdEnabled())
{
- int size = g_convertToBit[cuSize];
+ int size = log2CUSize - 2;
cu->m_psyEnergy = m_rdCost.psyCost(size, fencYuv->getLumaAddr(), fencYuv->getStride(),
outReconYuv->getLumaAddr(), outReconYuv->getStride());
cu->m_totalPsyCost = m_rdCost.calcPsyRdCost(bestDist, bestBits, cu->m_psyEnergy);
@@ -2484,9 +2493,9 @@
if (cu->getPredictionMode(0) == MODE_INTER)
{
residualTransformQuantInter(cu, 0, resiYuv, cu->getDepth(0), true);
- uint32_t width = cu->getCUSize(0);
+ uint32_t cuSize = 1 << cu->getLog2CUSize(0);
if (cu->getQtRootCbf(0))
- reconYuv->addClip(predYuv, resiYuv, width);
+ reconYuv->addClip(predYuv, resiYuv, cuSize);
else
{
predYuv->copyToPartYuv(reconYuv, 0);
@@ -2507,7 +2516,7 @@
{
X265_CHECK(cu->getDepth(0) == cu->getDepth(absPartIdx), "invalid depth\n");
const uint32_t trMode = depth - cu->getDepth(0);
- const uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
+ const uint32_t log2TrSize = g_maxLog2CUSize - depth;
const uint32_t setCbf = 1 << trMode;
int chFmt = cu->getChromaFormat();
@@ -2664,7 +2673,7 @@
{
X265_CHECK(cu->getDepth(0) == cu->getDepth(absPartIdx), "depth not matching\n");
const uint32_t trMode = depth - cu->getDepth(0);
- const uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
+ const uint32_t log2TrSize = g_maxLog2CUSize - depth;
const uint32_t subTUDepth = trMode + 1;
const uint32_t setCbf = 1 << trMode;
int chFmt = cu->getChromaFormat();
@@ -2726,13 +2735,13 @@
cu->setTrIdxSubParts(depth - cu->getDepth(0), absPartIdx, depth);
bool checkTransformSkip = cu->getSlice()->getPPS()->getUseTransformSkip() && !cu->getCUTransquantBypass(0);
- bool checkTransformSkipY = checkTransformSkip && log2TrSize <= LOG2_MAX_TS_SIZE;
- bool checkTransformSkipUV = checkTransformSkip && log2TrSizeC <= LOG2_MAX_TS_SIZE;
+ bool checkTransformSkipY = checkTransformSkip && log2TrSize <= MAX_LOG2_TS_SIZE;
+ bool checkTransformSkipUV = checkTransformSkip && log2TrSizeC <= MAX_LOG2_TS_SIZE;
cu->setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
if (m_bEnableRDOQ && curuseRDOQ)
- m_sbacCoder->estBit(m_trQuant.m_estBitsSbac, trSize, TEXT_LUMA);
+ m_sbacCoder->estBit(m_trQuant.m_estBitsSbac, log2TrSize, TEXT_LUMA);
m_trQuant.setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
numSigY = m_trQuant.transformNxN(cu, resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, coeffCurY,
@@ -2761,7 +2770,7 @@
cu->setTransformSkipPartRange(0, TEXT_CHROMA_V, absPartIdxC, tuIterator.absPartIdxStep);
if (m_bEnableRDOQ && curuseRDOQ)
- m_sbacCoder->estBit(m_trQuant.m_estBitsSbac, trSizeC, TEXT_CHROMA);
+ m_sbacCoder->estBit(m_trQuant.m_estBitsSbac, log2TrSizeC, TEXT_CHROMA);
//Cb transform
int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
@@ -2804,13 +2813,13 @@
minCost[TEXT_CHROMA_V][subTUIndex] = MAX_INT64;
}
- int partSize = partitionFromSize(trSize);
+ int partSize = partitionFromLog2Size(log2TrSize);
uint32_t distY = primitives.sse_sp[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, (pixel*)RDCost::zeroPel, 0);
uint32_t psyEnergyY = 0;
if (m_rdCost.psyRdEnabled())
{
// need to check whether zero distortion is similar to psyenergy of fenc
- int size = g_convertToBit[trSize];
+ int size = log2TrSize - 2;
psyEnergyY = m_rdCost.psyCost(size, fencYuv->getLumaAddr(absPartIdx), fencYuv->getStride(), (pixel*)RDCost::zeroPel, 0);
}
int16_t *curResiY = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
@@ -2904,7 +2913,7 @@
{
TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx);
- int partSizeC = partitionFromSize(trSizeC);
+ int partSizeC = partitionFromLog2Size(log2TrSizeC);
do
{
@@ -3103,7 +3112,7 @@
cu->setTransformSkipSubParts(1, TEXT_LUMA, absPartIdx, depth);
if (m_bEnableRDOQ)
- m_sbacCoder->estBit(m_trQuant.m_estBitsSbac, trSize, TEXT_LUMA);
+ m_sbacCoder->estBit(m_trQuant.m_estBitsSbac, log2TrSize, TEXT_LUMA);
m_trQuant.setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
uint32_t numSigTSkipY = m_trQuant.transformNxN(cu, resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, tsCoeffY,
@@ -3167,7 +3176,7 @@
TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx);
- int partSizeC = partitionFromSize(trSizeC);
+ int partSizeC = partitionFromLog2Size(log2TrSizeC);
do
{
@@ -3186,7 +3195,7 @@
cu->setTransformSkipPartRange(1, TEXT_CHROMA_V, absPartIdxC, tuIterator.absPartIdxStep);
if (m_bEnableRDOQ)
- m_sbacCoder->estBit(m_trQuant.m_estBitsSbac, trSizeC, TEXT_CHROMA);
+ m_sbacCoder->estBit(m_trQuant.m_estBitsSbac, log2TrSizeC, TEXT_CHROMA);
int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
m_trQuant.setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
@@ -3510,7 +3519,7 @@
const uint32_t curTrMode = depth - cu->getDepth(0);
const uint32_t trMode = cu->getTransformIdx(absPartIdx);
const bool bSubdiv = curTrMode != trMode;
- const uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
+ const uint32_t log2TrSize = g_maxLog2CUSize - depth;
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
int chFmt = cu->getChromaFormat();
const bool splitIntoSubTUs = (chFmt == CHROMA_422);
@@ -3626,12 +3635,11 @@
if (curTrMode == trMode)
{
int chFmt = cu->getChromaFormat();
- const uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
+ const uint32_t log2TrSize = g_maxLog2CUSize - depth;
const uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
bool bCodeChroma = true;
- bool bChromaSame = false;
uint32_t trModeC = trMode;
if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
{
@@ -3639,16 +3647,14 @@
trModeC--;
uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trModeC) << 1);
bCodeChroma = ((absPartIdx & (qpdiv - 1)) == 0);
- bChromaSame = true;
}
if (bSpatial)
{
- uint32_t trSize = 1 << log2TrSize;
- m_qtTempShortYuv[qtLayer].copyPartToPartLuma(resiYuv, absPartIdx, trSize);
+ m_qtTempShortYuv[qtLayer].copyPartToPartLuma(resiYuv, absPartIdx, log2TrSize);
if (bCodeChroma)
- m_qtTempShortYuv[qtLayer].copyPartToPartChroma(resiYuv, absPartIdx, trSize, (bChromaSame && (chFmt != CHROMA_422)));
+ m_qtTempShortYuv[qtLayer].copyPartToPartChroma(resiYuv, absPartIdx, log2TrSizeC + m_hChromaShift);
}
else
{
@@ -3761,7 +3767,7 @@
m_sbacCoder->codePredInfo(cu, 0);
bool bDummy = false;
cu->m_mvBits = m_sbacCoder->getNumberOfWrittenBits();
- m_sbacCoder->codeCoeff(cu, 0, cu->getDepth(0), cu->getCUSize(0), bDummy);
+ m_sbacCoder->codeCoeff(cu, 0, cu->getDepth(0), bDummy);
int totalBits = m_sbacCoder->getNumberOfWrittenBits();
cu->m_coeffBits = totalBits - cu->m_mvBits;
return totalBits;
diff -r 6055baa75085 -r fa683df9621e source/common/frame.cpp
--- a/source/common/frame.cpp Mon Jul 14 10:53:01 2014 +0530
+++ b/source/common/frame.cpp Mon Jul 14 14:50:34 2014 +0900
@@ -73,8 +73,8 @@
bool isVbv = param->rc.vbvBufferSize > 0 && param->rc.vbvMaxBitrate > 0;
if (ok && (isVbv || param->rc.aqMode))
{
- int numCols = (param->sourceWidth + g_maxCUSize - 1) / g_maxCUSize;
- int numRows = (param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
+ int numCols = (param->sourceWidth + g_maxCUSize - 1) >> g_maxLog2CUSize;
+ int numRows = (param->sourceHeight + g_maxCUSize - 1) >> g_maxLog2CUSize;
if (param->rc.aqMode)
CHECKED_MALLOC(m_qpaAq, double, numRows);
@@ -126,8 +126,8 @@
void Frame::reinit(x265_param *param)
{
- int numCols = (param->sourceWidth + g_maxCUSize - 1) / g_maxCUSize;
- int numRows = (param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
+ int numCols = (param->sourceWidth + g_maxCUSize - 1) >> g_maxLog2CUSize;
+ int numRows = (param->sourceHeight + g_maxCUSize - 1) >> g_maxLog2CUSize;
if (param->rc.vbvBufferSize > 0 && param->rc.vbvMaxBitrate > 0)
{
memset(m_rowDiagQp, 0, numRows * sizeof(double));
diff -r 6055baa75085 -r fa683df9621e source/common/intrapred.cpp
--- a/source/common/intrapred.cpp Mon Jul 14 10:53:01 2014 +0530
+++ b/source/common/intrapred.cpp Mon Jul 14 14:50:34 2014 +0900
@@ -99,8 +99,8 @@
}
}
-template<int width>
-void planad_pred_c(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int /*dirMode*/, int /*bFilter*/)
+template<int log2Size>
+void planar_pred_c(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int /*dirMode*/, int /*bFilter*/)
{
above += 1;
left += 1;
@@ -110,10 +110,10 @@
int32_t leftColumn[MAX_CU_SIZE + 1], topRow[MAX_CU_SIZE + 1];
// CHECK_ME: dynamic range is 9 bits or 15 bits(I assume max input bit_depth is 14 bits)
int16_t bottomRow[MAX_CU_SIZE], rightColumn[MAX_CU_SIZE];
- int blkSize = width;
- int offset2D = width;
- int shift1D = g_convertToBit[width] + 2;
- int shift2D = shift1D + 1;
+ const int blkSize = 1 << log2Size;
+ const int offset2D = blkSize;
+ const int shift1D = log2Size;
+ const int shift2D = shift1D + 1;
// Get left and above reference column and row
for (k = 0; k < blkSize + 1; k++)
@@ -257,14 +257,16 @@
}
}
-template<int size>
+template<int log2Size>
void all_angs_pred_c(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, int bLuma)
{
+ const int size = 1 << log2Size;
+ const int sizeIdx = log2Size - 2;
for (int mode = 2; mode <= 34; mode++)
{
- pixel *left = (IntraFilterType[(int)g_convertToBit[size]][mode] ? left1 : left0);
- pixel *above = (IntraFilterType[(int)g_convertToBit[size]][mode] ? above1 : above0);
- pixel *out = dest + (mode - 2) * (size * size);
+ pixel *left = (IntraFilterType[sizeIdx][mode] ? left1 : left0);
+ pixel *above = (IntraFilterType[sizeIdx][mode] ? above1 : above0);
+ pixel *out = dest + ((mode - 2) << (log2Size * 2));
intra_pred_ang_c<size>(out, size, left, above, mode, bLuma);
@@ -293,10 +295,10 @@
void Setup_C_IPredPrimitives(EncoderPrimitives& p)
{
- p.intra_pred[BLOCK_4x4][0] = planad_pred_c<4>;
- p.intra_pred[BLOCK_8x8][0] = planad_pred_c<8>;
- p.intra_pred[BLOCK_16x16][0] = planad_pred_c<16>;
- p.intra_pred[BLOCK_32x32][0] = planad_pred_c<32>;
+ p.intra_pred[BLOCK_4x4][0] = planar_pred_c<2>;
+ p.intra_pred[BLOCK_8x8][0] = planar_pred_c<3>;
+ p.intra_pred[BLOCK_16x16][0] = planar_pred_c<4>;
+ p.intra_pred[BLOCK_32x32][0] = planar_pred_c<5>;
// Intra Prediction DC
p.intra_pred[BLOCK_4x4][1] = intra_pred_dc_c<4>;
@@ -311,9 +313,9 @@
p.intra_pred[BLOCK_32x32][i] = intra_pred_ang_c<32>;
}
- p.intra_pred_allangs[BLOCK_4x4] = all_angs_pred_c<4>;
- p.intra_pred_allangs[BLOCK_8x8] = all_angs_pred_c<8>;
- p.intra_pred_allangs[BLOCK_16x16] = all_angs_pred_c<16>;
- p.intra_pred_allangs[BLOCK_32x32] = all_angs_pred_c<32>;
+ p.intra_pred_allangs[BLOCK_4x4] = all_angs_pred_c<2>;
+ p.intra_pred_allangs[BLOCK_8x8] = all_angs_pred_c<3>;
+ p.intra_pred_allangs[BLOCK_16x16] = all_angs_pred_c<4>;
+ p.intra_pred_allangs[BLOCK_32x32] = all_angs_pred_c<5>;
}
}
diff -r 6055baa75085 -r fa683df9621e source/common/param.cpp
--- a/source/common/param.cpp Mon Jul 14 10:53:01 2014 +0530
+++ b/source/common/param.cpp Mon Jul 14 14:50:34 2014 +0900
@@ -875,7 +875,8 @@
return check_failed;
uint32_t maxCUDepth = (uint32_t)g_convertToBit[param->maxCUSize];
- uint32_t tuQTMaxLog2Size = maxCUDepth + 2 - 1;
+ uint32_t maxLog2CUSize = maxCUDepth + 2;
+ uint32_t tuQTMaxLog2Size = maxLog2CUSize - 1;
uint32_t tuQTMinLog2Size = 2; //log2(4)
CHECK((param->maxCUSize >> maxCUDepth) < 4,
@@ -947,16 +948,16 @@
CHECK(param->crQpOffset < -12, "Min. Chroma Cr QP Offset is -12");
CHECK(param->crQpOffset > 12, "Max. Chroma Cr QP Offset is 12");
- CHECK((1u << tuQTMaxLog2Size) > param->maxCUSize,
+ CHECK(tuQTMaxLog2Size > maxLog2CUSize,
"QuadtreeTULog2MaxSize must be log2(maxCUSize) or smaller.");
CHECK(param->tuQTMaxInterDepth < 1 || param->tuQTMaxInterDepth > 4,
"QuadtreeTUMaxDepthInter must be greater than 0 and less than 5");
- CHECK(param->maxCUSize < (1u << (tuQTMinLog2Size + param->tuQTMaxInterDepth - 1)),
+ CHECK(maxLog2CUSize < tuQTMinLog2Size + param->tuQTMaxInterDepth - 1,
"QuadtreeTUMaxDepthInter must be less than or equal to the difference between log2(maxCUSize) and QuadtreeTULog2MinSize plus 1");
CHECK(param->tuQTMaxIntraDepth < 1 || param->tuQTMaxIntraDepth > 4,
"QuadtreeTUMaxDepthIntra must be greater 0 and less than 5");
- CHECK(param->maxCUSize < (1u << (tuQTMinLog2Size + param->tuQTMaxIntraDepth - 1)),
+ CHECK(maxLog2CUSize < tuQTMinLog2Size + param->tuQTMaxIntraDepth - 1,
"QuadtreeTUMaxDepthInter must be less than or equal to the difference between log2(maxCUSize) and QuadtreeTULog2MinSize plus 1");
CHECK(param->maxNumMergeCand < 1, "MaxNumMergeCand must be 1 or greater.");
@@ -1087,17 +1088,15 @@
{
// set max CU width & height
g_maxCUSize = param->maxCUSize;
+ g_maxLog2CUSize = maxCUDepth + 2;
// compute actual CU depth with respect to config depth and max transform size
- g_addCUDepth = 0;
- while ((param->maxCUSize >> maxCUDepth) > (1u << (tuQTMinLog2Size + g_addCUDepth)))
- {
- g_addCUDepth++;
- }
+ g_addCUDepth = g_maxLog2CUSize - maxCUDepth - tuQTMinLog2Size;
maxCUDepth += g_addCUDepth;
g_addCUDepth++;
g_maxCUDepth = maxCUDepth;
+ g_log2UnitSize = g_maxLog2CUSize - g_maxCUDepth;
// initialize partition order
uint32_t* tmp = &g_zscanToRaster[0];
diff -r 6055baa75085 -r fa683df9621e source/common/primitives.h
--- a/source/common/primitives.h Mon Jul 14 10:53:01 2014 +0530
+++ b/source/common/primitives.h Mon Jul 14 14:50:34 2014 +0900
@@ -120,6 +120,13 @@
return part;
}
+inline int partitionFromLog2Size(int log2Size)
+{
+ X265_CHECK(2 <= log2Size && log2Size <= 6, "Invalid block size\n");
+ extern const uint8_t lumaPartitionsFromSquareBlocksTable[];
+ return (int)lumaPartitionsFromSquareBlocksTable[log2Size - 2];
+}
+
typedef int (*pixelcmp_t)(pixel *fenc, intptr_t fencstride, pixel *fref, intptr_t frefstride); // fenc is aligned
typedef int (*pixelcmp_ss_t)(int16_t *fenc, intptr_t fencstride, int16_t *fref, intptr_t frefstride);
typedef int (*pixelcmp_sp_t)(int16_t *fenc, intptr_t fencstride, pixel *fref, intptr_t frefstride);
diff -r 6055baa75085 -r fa683df9621e source/common/shortyuv.cpp
--- a/source/common/shortyuv.cpp Mon Jul 14 10:53:01 2014 +0530
+++ b/source/common/shortyuv.cpp Mon Jul 14 14:50:34 2014 +0900
@@ -85,9 +85,9 @@
::memset(m_buf[2], 0, (m_cwidth * m_cheight) * sizeof(int16_t));
}
-void ShortYuv::subtract(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t partSize)
+void ShortYuv::subtract(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t log2Size)
{
- int part = partitionFromSize(partSize);
+ int part = partitionFromLog2Size(log2Size);
pixel* srcY0 = srcYuv0->getLumaAddr();
pixel* srcY1 = srcYuv1->getLumaAddr();
@@ -119,67 +119,37 @@
primitives.pixeladd_ss(cpartSize, cpartSize, getCrAddr(), m_cwidth, srcV0, srcV1, srcYuv0->m_cwidth, srcYuv1->m_cwidth);
}
-void ShortYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t partSize)
+void ShortYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t log2Size)
{
- int part = partitionFromSize(partSize);
int16_t* src = getLumaAddr(partIdx);
int16_t* dst = dstPicYuv->getLumaAddr(partIdx);
- primitives.luma_copy_ss[part](dst, dstPicYuv->m_width, src, m_width);
+ primitives.square_copy_ss[log2Size - 2](dst, dstPicYuv->m_width, src, m_width);
}
-void ShortYuv::copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t partSize)
+void ShortYuv::copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t log2Size)
{
- int part = partitionFromSize(partSize);
int16_t* src = getLumaAddr(partIdx);
pixel* dst = dstPicYuv->getLumaAddr(partIdx);
- primitives.luma_copy_sp[part](dst, dstPicYuv->getStride(), src, m_width);
+ primitives.square_copy_sp[log2Size - 2](dst, dstPicYuv->getStride(), src, m_width);
}
-void ShortYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height)
+void ShortYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t log2SizeL)
{
- int part = partitionFromSizes(width, height);
- int16_t* src = getLumaAddr(partIdx);
- int16_t* dst = dstPicYuv->getLumaAddr(partIdx);
-
- primitives.luma_copy_ss[part](dst, dstPicYuv->m_width, src, m_width);
-}
-
-void ShortYuv::copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height)
-{
- int part = partitionFromSizes(width, height);
- int16_t* src = getLumaAddr(partIdx);
- pixel* dst = dstPicYuv->getLumaAddr(partIdx);
-
- primitives.luma_copy_sp[part](dst, dstPicYuv->getStride(), src, m_width);
-}
-
-void ShortYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, bool bChromaSame)
-{
- int part = partitionFromSize(lumaSize);
-
- part = ((part == 0) && (m_csp == CHROMA_422)) ? 1 : part;
+ int part = partitionFromLog2Size(log2SizeL);
int16_t* srcU = getCbAddr(partIdx);
int16_t* srcV = getCrAddr(partIdx);
int16_t* dstU = dstPicYuv->getCbAddr(partIdx);
int16_t* dstV = dstPicYuv->getCrAddr(partIdx);
- if (bChromaSame)
- {
- primitives.luma_copy_ss[part](dstU, dstPicYuv->m_cwidth, srcU, m_cwidth);
- primitives.luma_copy_ss[part](dstV, dstPicYuv->m_cwidth, srcV, m_cwidth);
- }
- else
- {
- primitives.chroma[m_csp].copy_ss[part](dstU, dstPicYuv->m_cwidth, srcU, m_cwidth);
- primitives.chroma[m_csp].copy_ss[part](dstV, dstPicYuv->m_cwidth, srcV, m_cwidth);
- }
+ primitives.chroma[m_csp].copy_ss[part](dstU, dstPicYuv->m_cwidth, srcU, m_cwidth);
+ primitives.chroma[m_csp].copy_ss[part](dstV, dstPicYuv->m_cwidth, srcV, m_cwidth);
}
-void ShortYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, bool bChromaSame)
+void ShortYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t log2SizeL)
{
- int part = partitionFromSize(lumaSize);
+ int part = partitionFromLog2Size(log2SizeL);
int16_t* srcU = getCbAddr(partIdx);
int16_t* srcV = getCrAddr(partIdx);
pixel* dstU = dstPicYuv->getCbAddr(partIdx);
@@ -188,16 +158,8 @@
uint32_t srcStride = m_cwidth;
uint32_t dstStride = dstPicYuv->getCStride();
- if (bChromaSame)
- {
- primitives.luma_copy_sp[part](dstU, dstStride, srcU, srcStride);
- primitives.luma_copy_sp[part](dstV, dstStride, srcV, srcStride);
- }
- else
- {
- primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, srcStride);
- primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, srcStride);
- }
+ primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, srcStride);
+ primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, srcStride);
}
void ShortYuv::copyPartToPartShortChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId)
diff -r 6055baa75085 -r fa683df9621e source/common/shortyuv.h
--- a/source/common/shortyuv.h Mon Jul 14 10:53:01 2014 +0530
+++ b/source/common/shortyuv.h Mon Jul 14 14:50:34 2014 +0900
@@ -89,17 +89,15 @@
int16_t* getChromaAddr(uint32_t chromaId, uint32_t partUnitIdx) { return m_buf[chromaId] + getChromaAddrOffset(partUnitIdx, m_cwidth); }
- void subtract(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t partSize);
+ void subtract(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t log2Size);
void addClip(ShortYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize);
- void copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t partSize);
- void copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height);
- void copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, bool bChromaSame);
+ void copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t log2Size);
+ void copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t log2SizeL);
void copyPartToPartShortChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId);
- void copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t partSize);
- void copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height);
- void copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, bool bChromaSame);
+ void copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t log2Size);
+ void copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t log2SizeL);
// -------------------------------------------------------------------------------------------------------------------
// member functions to support multiple color space formats
diff -r 6055baa75085 -r fa683df9621e source/encoder/compress.cpp
--- a/source/encoder/compress.cpp Mon Jul 14 10:53:01 2014 +0530
+++ b/source/encoder/compress.cpp Mon Jul 14 14:50:34 2014 +0900
@@ -69,14 +69,14 @@
// Encode Coefficients
bool bCodeDQP = m_bEncodeDQP;
- m_sbacCoder->codeCoeff(cu, 0, depth, cu->getCUSize(0), bCodeDQP);
+ m_sbacCoder->codeCoeff(cu, 0, depth, bCodeDQP);
m_sbacCoder->store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
cu->m_totalBits = m_sbacCoder->getNumberOfWrittenBits();
cu->m_coeffBits = cu->m_totalBits - cu->m_mvBits;
if (m_rdCost.psyRdEnabled())
{
- int part = g_convertToBit[cu->getCUSize(0)];
+ int part = cu->getLog2CUSize(0) - 2;
cu->m_psyEnergy = m_rdCost.psyCost(part, m_origYuv[depth]->getLumaAddr(), m_origYuv[depth]->getStride(),
m_tmpRecoYuv[depth]->getLumaAddr(), m_tmpRecoYuv[depth]->getStride());
cu->m_totalPsyCost = m_rdCost.calcPsyRdCost(cu->m_totalDistortion, cu->m_totalBits, cu->m_psyEnergy);
@@ -94,7 +94,8 @@
cu->setCUTransquantBypassSubParts(!!m_param->bLossless, 0, depth);
uint32_t initTrDepth = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
- uint32_t tuSize = cu->getCUSize(0) >> initTrDepth;
+ uint32_t log2TrSize = cu->getLog2CUSize(0) - initTrDepth;
+ uint32_t tuSize = 1 << log2TrSize;
const uint32_t partOffset = 0;
// Reference sample smoothing
@@ -116,7 +117,8 @@
ALIGN_VAR_32(pixel, tmp[33 * 32 * 32]);
int scaleTuSize = tuSize;
int scaleStride = stride;
- int costMultiplier = 1;
+ int costShift = 0;
+ int sizeIdx = log2TrSize - 2;
if (tuSize > 32)
{
@@ -137,7 +139,8 @@
scaleTuSize = 32;
scaleStride = 32;
- costMultiplier = 4;
+ costShift = 2;
+ sizeIdx = 5 - 2; // log2(scaleTuSize) - 2
// Filtered and Unfiltered refAbove and refLeft pointing to above and left.
above = aboveScale;
@@ -146,7 +149,6 @@
leftFiltered = leftScale;
}
- int sizeIdx = g_convertToBit[scaleTuSize];
pixelcmp_t sa8d = primitives.sa8d[sizeIdx];
uint32_t preds[3];
@@ -157,7 +159,7 @@
// DC
primitives.intra_pred[sizeIdx][DC_IDX](tmp, scaleStride, left, above, 0, (scaleTuSize <= 16));
- bsad = costMultiplier * sa8d(fenc, scaleStride, tmp, scaleStride);
+ bsad = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
bmode = mode = DC_IDX;
bbits = !(mpms & ((uint64_t)1 << mode)) ? rbits : xModeBitsIntra(cu, mode, partOffset, depth);
bcost = m_rdCost.calcRdSADCost(bsad, bbits);
@@ -173,7 +175,7 @@
// PLANAR
primitives.intra_pred[sizeIdx][PLANAR_IDX](tmp, scaleStride, leftPlanar, abovePlanar, 0, 0);
- sad = costMultiplier * sa8d(fenc, scaleStride, tmp, scaleStride);
+ sad = sa8d(fenc, scaleStride, tmp, scaleStride) << costShift;
mode = PLANAR_IDX;
bits = !(mpms & ((uint64_t)1 << mode)) ? rbits : xModeBitsIntra(cu, mode, partOffset, depth);
cost = m_rdCost.calcRdSADCost(sad, bits);
@@ -189,7 +191,7 @@
bool modeHor = (mode < 18);
pixel *cmp = (modeHor ? buf_trans : fenc);
intptr_t srcStride = (modeHor ? scaleTuSize : scaleStride);
- sad = costMultiplier * sa8d(cmp, srcStride, &tmp[(mode - 2) * (scaleTuSize * scaleTuSize)], scaleTuSize);
+ sad = sa8d(cmp, srcStride, &tmp[(mode - 2) * (scaleTuSize * scaleTuSize)], scaleTuSize) << costShift;
bits = !(mpms & ((uint64_t)1 << mode)) ? rbits : xModeBitsIntra(cu, mode, partOffset, depth);
cost = m_rdCost.calcRdSADCost(sad, bits);
COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits);
@@ -216,7 +218,7 @@
outTempCU->m_totalBits = 0;
if (predInterSearch(outTempCU, outPredYuv, bUseMRG, false))
{
- int sizeIdx = g_convertToBit[outTempCU->getCUSize(0)];
+ int sizeIdx = outTempCU->getLog2CUSize(0) - 2;
uint32_t distortion = primitives.sa8d[sizeIdx](m_origYuv[depth]->getLumaAddr(), m_origYuv[depth]->getStride(),
outPredYuv->getLumaAddr(), outPredYuv->getStride());
outTempCU->m_totalDistortion = distortion;
@@ -248,7 +250,7 @@
outBestCU->setPredModeSubParts(MODE_INTER, 0, depth);
outBestCU->setMergeFlag(0, true);
- int sizeIdx = g_convertToBit[outTempCU->getCUSize(0)];
+ int sizeIdx = outTempCU->getLog2CUSize(0) - 2;
int bestMergeCand = -1;
for (uint32_t mergeCand = 0; mergeCand < maxNumMergeCand; ++mergeCand)
@@ -353,10 +355,11 @@
TComSlice* slice = outTempCU->getSlice();
if (!bInsidePicture)
{
+ int cuSize = 1 << outTempCU->getLog2CUSize(0);
uint32_t lpelx = outTempCU->getCUPelX();
uint32_t tpely = outTempCU->getCUPelY();
- uint32_t rpelx = lpelx + outTempCU->getCUSize(0);
- uint32_t bpely = tpely + outTempCU->getCUSize(0);
+ uint32_t rpelx = lpelx + cuSize;
+ uint32_t bpely = tpely + cuSize;
bInsidePicture = (rpelx <= slice->getSPS()->getPicWidthInLumaSamples() &&
bpely <= slice->getSPS()->getPicHeightInLumaSamples());
}
@@ -555,7 +558,7 @@
for (int partIdx = 0; partIdx < numPart; partIdx++)
motionCompensation(outBestCU, m_bestPredYuv[depth], REF_PIC_LIST_X, partIdx, false, true);
- m_tmpResiYuv[depth]->subtract(m_origYuv[depth], m_bestPredYuv[depth], outBestCU->getCUSize(0));
+ m_tmpResiYuv[depth]->subtract(m_origYuv[depth], m_bestPredYuv[depth], outBestCU->getLog2CUSize(0));
generateCoeffRecon(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth], false);
}
else
@@ -851,7 +854,7 @@
uint32_t src2stride = m_bestPredYuv[0]->getStride();
uint32_t src1stride = m_origYuv[0]->getStride();
uint32_t dststride = m_tmpResiYuv[depth]->m_width;
- int part = partitionFromSize(cu->getCUSize(0));
+ int part = partitionFromLog2Size(cu->getLog2CUSize(0));
primitives.luma_sub_ps[part](dst, dststride, src1, src2, src1stride, src2stride);
src2 = m_bestPredYuv[0]->getCbAddr(absPartIdx);
@@ -910,7 +913,7 @@
// Generate Recon
TComPicYuv* rec = pic->getPicYuvRec();
- int part = partitionFromSize(cu->getCUSize(0));
+ int part = partitionFromLog2Size(cu->getLog2CUSize(0));
pixel* src = m_bestPredYuv[0]->getLumaAddr(absPartIdx);
pixel* dst = rec->getLumaAddr(cu->getAddr(), absPartIdx);
uint32_t srcstride = m_bestPredYuv[0]->getStride();
diff -r 6055baa75085 -r fa683df9621e source/encoder/cturow.h
--- a/source/encoder/cturow.h Mon Jul 14 10:53:01 2014 +0530
+++ b/source/encoder/cturow.h Mon Jul 14 14:50:34 2014 +0900
@@ -61,7 +61,7 @@
SBac m_sbacCoder;
SBac m_bufferSbacCoder;
- SBac m_rdSbacCoders[MAX_CU_DEPTH + 1][CI_NUM];
+ SBac m_rdSbacCoders[MAX_FULL_DEPTH + 1][CI_NUM];
// to compute stats for 2 pass
double m_iCuCnt;
diff -r 6055baa75085 -r fa683df9621e source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Mon Jul 14 10:53:01 2014 +0530
+++ b/source/encoder/encoder.cpp Mon Jul 14 14:50:34 2014 +0900
@@ -1063,16 +1063,10 @@
sps->setMaxCUSize(g_maxCUSize);
sps->setMaxCUDepth(g_maxCUDepth);
- int minCUSize = sps->getMaxCUSize() >> (sps->getMaxCUDepth() - g_addCUDepth);
- int log2MinCUSize = 0;
- while (minCUSize > 1)
- {
- minCUSize >>= 1;
- log2MinCUSize++;
- }
+ int log2MinCUSize = g_maxLog2CUSize - (g_maxCUDepth - g_addCUDepth);
sps->setLog2MinCodingBlockSize(log2MinCUSize);
- sps->setLog2DiffMaxMinCodingBlockSize(sps->getMaxCUDepth() - g_addCUDepth);
+ sps->setLog2DiffMaxMinCodingBlockSize(g_maxCUDepth - g_addCUDepth);
sps->setQuadtreeTULog2MaxSize(m_quadtreeTULog2MaxSize);
sps->setQuadtreeTULog2MinSize(m_quadtreeTULog2MinSize);
@@ -1219,7 +1213,8 @@
setThreadPool(ThreadPool::allocThreadPool(p->poolNumThreads));
int poolThreadCount = ThreadPool::getThreadPool()->getThreadCount();
- int rows = (p->sourceHeight + p->maxCUSize - 1) / p->maxCUSize;
+ uint32_t maxLog2CUSize = g_convertToBit[p->maxCUSize] + 2;
+ int rows = (p->sourceHeight + p->maxCUSize - 1) >> maxLog2CUSize;
if (p->frameNumThreads == 0)
{
@@ -1362,7 +1357,7 @@
//====== Coding Tools ========
- uint32_t tuQTMaxLog2Size = g_convertToBit[p->maxCUSize] + 2 - 1;
+ uint32_t tuQTMaxLog2Size = maxLog2CUSize - 1;
m_quadtreeTULog2MaxSize = tuQTMaxLog2Size;
uint32_t tuQTMinLog2Size = 2; //log2(4)
m_quadtreeTULog2MinSize = tuQTMinLog2Size;
diff -r 6055baa75085 -r fa683df9621e source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp Mon Jul 14 10:53:01 2014 +0530
+++ b/source/encoder/entropy.cpp Mon Jul 14 14:50:34 2014 +0900
@@ -68,10 +68,10 @@
}
void SBac::encodeTransform(TComDataCU* cu, CoeffCodeState& state, uint32_t offsetLuma, uint32_t offsetChroma, uint32_t absPartIdx,
- uint32_t absPartIdxStep, uint32_t depth, uint32_t tuSize, uint32_t trIdx, bool& bCodeDQP)
+ uint32_t absPartIdxStep, uint32_t depth, uint32_t log2TrSize, uint32_t trIdx, bool& bCodeDQP)
{
const bool subdiv = cu->getTransformIdx(absPartIdx) + cu->getDepth(absPartIdx) > (uint8_t)depth;
- const uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
+// const uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - depth;
uint32_t hChromaShift = cu->getHorzChromaShift();
uint32_t vChromaShift = cu->getVertChromaShift();
uint32_t cbfY = cu->getCbf(absPartIdx, TEXT_LUMA, trIdx);
@@ -133,12 +133,13 @@
const bool bFirstCbfOfCU = trDepthCurr == 0;
bool mCodeAll = true;
- const uint32_t numPels = (tuSize * tuSize) >> (hChromaShift + vChromaShift);
+ const uint32_t numPels = 1 << (log2TrSize * 2 - hChromaShift - vChromaShift);
if (numPels < (MIN_TU_SIZE * MIN_TU_SIZE))
mCodeAll = false;
if (bFirstCbfOfCU || mCodeAll)
{
+ uint32_t tuSize = 1 << log2TrSize;
if (bFirstCbfOfCU || cu->getCbf(absPartIdx, TEXT_CHROMA_U, trDepthCurr - 1))
codeQtCbf(cu, absPartIdx, absPartIdxStep, (tuSize >> hChromaShift), (tuSize >> vChromaShift), TEXT_CHROMA_U, trDepthCurr, (subdiv == 0));
if (bFirstCbfOfCU || cu->getCbf(absPartIdx, TEXT_CHROMA_V, trDepthCurr - 1))
@@ -152,30 +153,30 @@
if (subdiv)
{
- tuSize >>= 1;
- uint32_t numCoeff = tuSize * tuSize;
+ log2TrSize--;
+ uint32_t numCoeff = 1 << (log2TrSize * 2);
uint32_t numCoeffC = (numCoeff >> (hChromaShift + vChromaShift));
trIdx++;
++depth;
absPartIdxStep >>= 2;
const uint32_t partNum = cu->getPic()->getNumPartInCU() >> (depth << 1);
- encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, tuSize, trIdx, bCodeDQP);
+ encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP);
absPartIdx += partNum;
offsetLuma += numCoeff;
offsetChroma += numCoeffC;
- encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, tuSize, trIdx, bCodeDQP);
+ encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP);
absPartIdx += partNum;
offsetLuma += numCoeff;
offsetChroma += numCoeffC;
- encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, tuSize, trIdx, bCodeDQP);
+ encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP);
absPartIdx += partNum;
offsetLuma += numCoeff;
offsetChroma += numCoeffC;
- encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, tuSize, trIdx, bCodeDQP);
+ encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP);
}
else
{
@@ -324,11 +325,8 @@
codeRefFrmIdx(cu, absPartIdx, list);
}
-void SBac::codeCoeff(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, uint32_t cuSize, bool& bCodeDQP)
+void SBac::codeCoeff(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool& bCodeDQP)
{
- uint32_t lumaOffset = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
- uint32_t chromaOffset = lumaOffset >> (cu->getHorzChromaShift() + cu->getVertChromaShift());
-
if (!cu->isIntra(absPartIdx))
{
if (!(cu->getMergeFlag(absPartIdx) && cu->getPartitionSize(absPartIdx) == SIZE_2Nx2N))
@@ -337,9 +335,12 @@
return;
}
+ uint32_t log2CUSize = cu->getLog2CUSize(absPartIdx);
+ uint32_t lumaOffset = absPartIdx << cu->getPic()->getLog2UnitSize() * 2;
+ uint32_t chromaOffset = lumaOffset >> (cu->getHorzChromaShift() + cu->getVertChromaShift());
uint32_t absPartIdxStep = cu->getPic()->getNumPartInCU() >> (depth << 1);
CoeffCodeState state;
- encodeTransform(cu, state, lumaOffset, chromaOffset, absPartIdx, absPartIdxStep, depth, cuSize, 0, bCodeDQP);
+ encodeTransform(cu, state, lumaOffset, chromaOffset, absPartIdx, absPartIdxStep, depth, log2CUSize, 0, bCodeDQP);
}
void SBac::codeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx)
@@ -1711,7 +1712,7 @@
case SIZE_nRx2N:
encodeBin(0, m_contextModels[OFF_PART_SIZE_CTX + 0]);
encodeBin(0, m_contextModels[OFF_PART_SIZE_CTX + 1]);
- if (depth == g_maxCUDepth - g_addCUDepth && !(cu->getCUSize(absPartIdx) == 8))
+ if (depth == g_maxCUDepth - g_addCUDepth && !(cu->getLog2CUSize(absPartIdx) == 3))
encodeBin(1, m_contextModels[OFF_PART_SIZE_CTX + 2]);
if (cu->getSlice()->getSPS()->getAMPAcc(depth))
{
@@ -1722,7 +1723,7 @@
break;
case SIZE_NxN:
- if (depth == g_maxCUDepth - g_addCUDepth && !(cu->getCUSize(absPartIdx) == 8))
+ if (depth == g_maxCUDepth - g_addCUDepth && !(cu->getLog2CUSize(absPartIdx) == 3))
{
encodeBin(0, m_contextModels[OFF_PART_SIZE_CTX + 0]);
encodeBin(0, m_contextModels[OFF_PART_SIZE_CTX + 1]);
@@ -1892,7 +1893,7 @@
const uint32_t interDir = cu->getInterDir(absPartIdx) - 1;
const uint32_t ctx = cu->getCtxInterDir(absPartIdx);
- if (cu->getPartitionSize(absPartIdx) == SIZE_2Nx2N || cu->getCUSize(absPartIdx) != 8)
+ if (cu->getPartitionSize(absPartIdx) == SIZE_2Nx2N || cu->getLog2CUSize(absPartIdx) != 3)
encodeBin(interDir == 2 ? 1 : 0, m_contextModels[OFF_INTER_DIR_CTX + ctx]);
if (interDir < 2)
encodeBin(interDir, m_contextModels[OFF_INTER_DIR_CTX + 4]);
@@ -2326,14 +2327,14 @@
}
/* estimate bit cost for CBP, significant map and significant coefficients */
-void SBac::estBit(EstBitsSbac* estBitsSbac, int trSize, TextType ttype)
+void SBac::estBit(EstBitsSbac* estBitsSbac, uint32_t log2TrSize, TextType ttype)
{
estCBFBit(estBitsSbac);
estSignificantCoeffGroupMapBit(estBitsSbac, ttype);
// encode significance map
- estSignificantMapBit(estBitsSbac, trSize, ttype);
+ estSignificantMapBit(estBitsSbac, log2TrSize, ttype);
// encode significant coefficients
estSignificantCoefficientsBit(estBitsSbac, ttype);
@@ -2371,16 +2372,16 @@
}
/* estimate SAMBAC bit cost for significant coefficient map */
-void SBac::estSignificantMapBit(EstBitsSbac* estBitsSbac, int trSize, TextType ttype)
+void SBac::estSignificantMapBit(EstBitsSbac* estBitsSbac, uint32_t log2TrSize, TextType ttype)
{
int firstCtx = 1, numCtx = 8;
- if (trSize >= 16)
+ if (log2TrSize >= 4)
{
firstCtx = (ttype == TEXT_LUMA) ? 21 : 12;
numCtx = (ttype == TEXT_LUMA) ? 6 : 3;
}
- else if (trSize == 8)
+ else if (log2TrSize == 3)
{
firstCtx = 9;
numCtx = (ttype == TEXT_LUMA) ? 12 : 3;
@@ -2418,7 +2419,6 @@
}
int bitsX = 0, bitsY = 0;
- uint32_t log2TrSize = g_convertToBit[trSize] + 2;
int blkSizeOffset = ttype ? NUM_CTX_LAST_FLAG_XY_LUMA : ((log2TrSize - 2) * 3 + ((log2TrSize - 1) >> 2));
int ctxShift = ttype ? log2TrSize - 2 : ((log2TrSize + 1) >> 2);
uint32_t maxGroupIdx = log2TrSize * 2 - 1;
diff -r 6055baa75085 -r fa683df9621e source/encoder/entropy.h
--- a/source/encoder/entropy.h Mon Jul 14 10:53:01 2014 +0530
+++ b/source/encoder/entropy.h Mon Jul 14 14:50:34 2014 +0900
@@ -155,17 +155,17 @@
void codeQtCbf(TComDataCU* cu, uint32_t absPartIdx, TextType ttype, uint32_t trDepth);
void codeQtCbfZero(TComDataCU* cu, TextType ttype, uint32_t trDepth);
void codeQtRootCbfZero(TComDataCU* cu);
- void codeCoeff(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, uint32_t cuSize, bool& bCodeDQP);
+ void codeCoeff(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool& bCodeDQP);
void codeCoeffNxN(TComDataCU* cu, coeff_t* coef, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype);
void codeIntraDirLumaAng(TComDataCU* cu, uint32_t absPartIdx, bool isMultiple);
void codeIntraDirChroma(TComDataCU* cu, uint32_t absPartIdx);
// RDO functions
- void estBit(EstBitsSbac* estBitsSbac, int trSize, TextType ttype);
+ void estBit(EstBitsSbac* estBitsSbac, uint32_t log2TrSize, TextType ttype);
void estCBFBit(EstBitsSbac* estBitsSbac);
void estSignificantCoeffGroupMapBit(EstBitsSbac* estBitsSbac, TextType ttype);
- void estSignificantMapBit(EstBitsSbac* estBitsSbac, int trSize, TextType ttype);
+ void estSignificantMapBit(EstBitsSbac* estBitsSbac, uint32_t log2TrSize, TextType ttype);
void estSignificantCoefficientsBit(EstBitsSbac* estBitsSbac, TextType ttype);
private:
@@ -215,7 +215,7 @@
uint32_t bakAbsPartIdxCU;
};
- void encodeTransform(TComDataCU* cu, CoeffCodeState& state, uint32_t offsetLumaOffset, uint32_t offsetChroma, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t depth, uint32_t tuSize, uint32_t uiTrIdx, bool& bCodeDQP);
+ void encodeTransform(TComDataCU* cu, CoeffCodeState& state, uint32_t offsetLumaOffset, uint32_t offsetChroma, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t depth, uint32_t log2TrSize, uint32_t uiTrIdx, bool& bCodeDQP);
void copyFrom(SBac& src);
void copyContextsFrom(SBac& src);
diff -r 6055baa75085 -r fa683df9621e source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Mon Jul 14 10:53:01 2014 +0530
+++ b/source/encoder/slicetype.cpp Mon Jul 14 14:50:34 2014 +0900
@@ -1605,7 +1605,7 @@
}
if (!fenc->bIntraCalculated)
{
- int sizeIdx = g_convertToBit[cuSize]; // partition size
+ const int sizeIdx = X265_LOWRES_CU_BITS - 2; // partition size
pixel _above0[X265_LOWRES_CU_SIZE * 4 + 1], *const above0 = _above0 + 2 * X265_LOWRES_CU_SIZE;
pixel _above1[X265_LOWRES_CU_SIZE * 4 + 1], *const above1 = _above1 + 2 * X265_LOWRES_CU_SIZE;
@@ -1653,7 +1653,7 @@
// calculate 35 satd costs, keep least cost
ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
primitives.transpose[sizeIdx](buf_trans, m_me.fenc, FENC_STRIDE);
- pixelcmp_t satd = primitives.satd[partitionFromSize(cuSize)];
+ pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
int icost = m_me.COST_MAX, cost;
for (uint32_t mode = 0; mode < 35; mode++)
{
More information about the x265-devel
mailing list